Quick Start
Get parallel execution working in under 5 minutes.
Installation
Section titled “Installation”Add Blitz to your build.zig.zon:
.dependencies = .{ .blitz = .{ .url = "https://github.com/NerdMeNot/blitz/archive/refs/tags/v1.0.0-zig0.15.2.tar.gz", .hash = "...", // Get from error message on first build },},In build.zig:
const blitz = b.dependency("blitz", .{ .target = target, .optimize = optimize,});exe.root_module.addImport("blitz", blitz.module("blitz"));Hello Parallel World
Section titled “Hello Parallel World”const blitz = @import("blitz");
pub fn main() !void { var numbers = [_]i64{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
// Parallel sum - that's it! const sum = blitz.iter(i64, &numbers).sum(); // sum = 55}The Two APIs
Section titled “The Two APIs”Blitz provides two levels of abstraction:
| API | Best For | Example |
|---|---|---|
| Iterators (recommended) | Data processing, aggregations, transforms | blitz.iter(T, data).sum() |
| Fork-Join | Divide-and-conquer, recursive algorithms | blitz.join(.{...}) |
Iterator API (Recommended)
Section titled “Iterator API (Recommended)”Aggregations
Section titled “Aggregations”const data: []const i64 = &.{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
const sum = blitz.iter(i64, data).sum(); // 55const min = blitz.iter(i64, data).min(); // ?i64 = 1const max = blitz.iter(i64, data).max(); // ?i64 = 10const count = blitz.iter(i64, data).count(); // 10Search (with early exit)
Section titled “Search (with early exit)”// Find any match (fastest - non-deterministic order)const found = blitz.iter(i64, data).findAny(isNegative);
// Find first match (deterministic)const first = blitz.iter(i64, data).findFirst(isNegative);
// Check predicates (short-circuit on match)const has_negative = blitz.iter(i64, data).any(isNegative);const all_positive = blitz.iter(i64, data).all(isPositive);
fn isNegative(x: i64) bool { return x < 0; }fn isPositive(x: i64) bool { return x > 0; }Transform
Section titled “Transform”var data: [100]i64 = undefined;for (&data, 0..) |*v, i| v.* = @intCast(i);
// Transform in-placeblitz.iterMut(i64, &data).mapInPlace(double);
// Fill with valueblitz.iterMut(i64, &data).fill(0);
fn double(x: i64) i64 { return x * 2; }Custom Reduce
Section titled “Custom Reduce”const product = blitz.iter(i64, data).reduce(1, multiply);
fn multiply(a: i64, b: i64) i64 { return a * b; }Fork-Join API
Section titled “Fork-Join API”Two Tasks
Section titled “Two Tasks”const result = blitz.join(.{ .left = .{ computeLeft, left_data }, .right = .{ computeRight, right_data },});// Access: result.left, result.rightThree or More Tasks
Section titled “Three or More Tasks”const result = blitz.join(.{ .a = .{ taskA, arg_a }, .b = .{ taskB, arg_b }, .c = .{ taskC, arg_c },});// Access: result.a, result.b, result.cClassic Fibonacci Example
Section titled “Classic Fibonacci Example”fn parallelFib(n: u64) u64 { // Switch to sequential below threshold if (n < 20) return fibSequential(n);
const r = blitz.join(.{ .a = .{ parallelFib, n - 1 }, .b = .{ parallelFib, n - 2 }, }); return r.a + r.b;}
fn fibSequential(n: u64) u64 { if (n <= 1) return n; return fibSequential(n - 1) + fibSequential(n - 2);}Parallel Sort
Section titled “Parallel Sort”var numbers = [_]i64{ 5, 2, 8, 1, 9, 3, 7, 4, 6 };
// Sort ascending (in-place, no allocation needed)blitz.sortAsc(i64, &numbers);// numbers is now [1, 2, 3, 4, 5, 6, 7, 8, 9]
// Sort descendingblitz.sortDesc(i64, &numbers);
// Custom comparatorblitz.sort(i64, &numbers, lessThan);
fn lessThan(a: i64, b: i64) bool { return a < b;}
// Sort by keyblitz.sortByKey(Person, u32, &people, getAge);
fn getAge(p: Person) u32 { return p.age; }Range Iteration
Section titled “Range Iteration”// Process indices 0..999 in parallelblitz.range(0, 1000).forEach(processIndex);
fn processIndex(i: usize) void { // Process index i}
// Sum a rangeconst sum = blitz.range(0, 1000).sum(i64, identity);
fn identity(i: usize) i64 { return @intCast(i); }Low-Level API
Section titled “Low-Level API”For full control, use the low-level parallel primitives:
// Parallel for with contextconst Context = struct { input: []const f64, output: []f64, scale: f64,};
blitz.parallelFor(data.len, Context, ctx, struct { fn body(c: Context, start: usize, end: usize) void { for (c.input[start..end], c.output[start..end]) |in, *out| { out.* = in * c.scale; } }}.body);
// Parallel reduceconst sum = blitz.parallelReduce( i64, // Result type data.len, // Element count 0, // Identity value []const i64, // Context type data, // Context value struct { fn map(d: []const i64, i: usize) i64 { return d[i]; } }.map, struct { fn combine(a: i64, b: i64) i64 { return a + b; } }.combine,);Complete Example
Section titled “Complete Example”const std = @import("std");const blitz = @import("blitz");
pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator();
// Generate data var data: [1_000_000]i64 = undefined; for (&data, 0..) |*v, i| { v.* = @intCast(i); }
// Parallel aggregations const sum = blitz.iter(i64, &data).sum(); const min = blitz.iter(i64, &data).min(); const max = blitz.iter(i64, &data).max(); std.debug.print("Sum: {}, Min: {?}, Max: {?}\n", .{ sum, min, max });
// Parallel transform blitz.iterMut(i64, &data).mapInPlace(square);
// Parallel search const found = blitz.iter(i64, &data).findAny(isLarge); std.debug.print("Found large value: {?}\n", .{found});
// Parallel sort var to_sort = [_]i64{ 5, 2, 8, 1, 9 }; blitz.sortAsc(i64, &to_sort); std.debug.print("Sorted: {any}\n", .{to_sort});
// Fork-join const result = blitz.join(.{ .a = .{ computeA, @as(u64, 10) }, .b = .{ computeB, @as(u64, 20) }, }); std.debug.print("Join result: a={}, b={}\n", .{ result.a, result.b });}
fn square(x: i64) i64 { return x * x; }fn isLarge(x: i64) bool { return x > 500_000_000; }fn computeA(n: u64) u64 { return n * n; }fn computeB(n: u64) u64 { return n + 100; }Real-World Taste
Section titled “Real-World Taste”Here’s what Blitz looks like on a realistic 10-million-element workload:
const std = @import("std");const blitz = @import("blitz");
pub fn main() !void { const allocator = std.heap.page_allocator;
// 10 million elements const n = 10_000_000; const data = try allocator.alloc(f64, n); defer allocator.free(data);
for (data, 0..) |*v, i| { v.* = @as(f64, @floatFromInt(i)) * 0.001; }
// Parallel stats: compute sum and max simultaneously const stats = blitz.join(.{ .sum = .{ struct { fn compute(d: []const f64) f64 { return blitz.iter(f64, d).sum(); } }.compute, @as([]const f64, data) }, .max = .{ struct { fn compute(d: []const f64) ?f64 { return blitz.iter(f64, d).max(); } }.compute, @as([]const f64, data) }, });
std.debug.print("Sum: {d:.2}, Max: {?d:.2}\n", .{ stats.sum, stats.max });
// Transform in-place: normalize all values by the max if (stats.max) |max_val| { var mut_data = data; const scale = 1.0 / max_val; blitz.parallelFor(mut_data.len, struct { d: []f64, s: f64 }, .{ .d = mut_data, .s = scale, }, struct { fn body(ctx: @This(), start: usize, end: usize) void { for (ctx.d[start..end]) |*v| { v.* *= ctx.s; } } }.body); }}Common Patterns
Section titled “Common Patterns”Pattern 1: Process and Aggregate
Section titled “Pattern 1: Process and Aggregate”// Process data, then aggregate resultsblitz.iterMut(f64, &data).mapInPlace(normalize);const total = blitz.iter(f64, &data).sum();Pattern 2: Search and Early Exit
Section titled “Pattern 2: Search and Early Exit”// Find first invalid entry (stops early if found)const invalid = blitz.iter(Record, &records).findAny(isInvalid);if (invalid) |record| { std.debug.print("Invalid record found: {}\n", .{record});}Pattern 3: Divide and Conquer
Section titled “Pattern 3: Divide and Conquer”fn mergeSort(data: []i32) void { if (data.len <= 1024) { std.sort.insertion(i32, data, {}, std.sort.asc(i32)); return; } const mid = data.len / 2; _ = blitz.join(.{ .left = .{ mergeSort, data[0..mid] }, .right = .{ mergeSort, data[mid..] }, }); merge(data, mid); // Combine sorted halves}Performance Tips
Section titled “Performance Tips”- Use iterators for data processing - Optimized parallel aggregations
- Use fork-join for recursive algorithms - Optimal work distribution
- Set appropriate thresholds - Switch to sequential below ~1000 elements
- Avoid false sharing - Don’t write to adjacent memory from different threads
- Trust the defaults - Auto grain size works well for most cases
What’s Next
Section titled “What’s Next”- Basic Concepts - Understand work stealing and fork-join
- Iterator API - Complete iterator reference
- Fork-Join - Divide and conquer patterns
- API Reference - Full API documentation