Skip to content
v1.0.0-zig0.15.2

Parallel Reduce

Map-reduce pattern for aggregating data in parallel.

const blitz = @import("blitz");
// Sum all elements
const data = [_]i64{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
const sum = blitz.parallelReduce(
i64, // Result type
data.len, // Element count
0, // Identity element
[]const i64, // Context type
&data, // Context value
struct {
fn map(d: []const i64, i: usize) i64 {
return d[i];
}
}.map,
struct {
fn combine(a: i64, b: i64) i64 {
return a + b;
}
}.combine,
);
// sum = 55
Data: [1, 2, 3, 4, 5, 6, 7, 8]
Step 1: Parallel map (each worker processes a chunk)
┌─────────────┬─────────────┬─────────────┬─────────────┐
│ Worker 0 │ Worker 1 │ Worker 2 │ Worker 3 │
│ map(0,1) │ map(2,3) │ map(4,5) │ map(6,7) │
│ 1+2=3 │ 3+4=7 │ 5+6=11 │ 7+8=15 │
└─────────────┴─────────────┴─────────────┴─────────────┘
Step 2: Parallel combine (tree reduction)
3 + 7 = 10 11 + 15 = 26
\ /
\ /
10 + 26 = 36 (final result)
ParameterPurpose
TResult type (must be copyable)
nNumber of elements to process
identityStarting value (0 for sum, 1 for product)
ContextType holding data/parameters
ctxContext instance
mapfn(Context, usize) -> T - Extract/transform element
combinefn(T, T) -> T - Merge two results
OperationIdentityWhy
Sum0x + 0 = x
Product1x * 1 = x
MinmaxIntmin(x, maxInt) = x
MaxminIntmax(x, minInt) = x
Andtruex && true = x
Orfalsex || false = x
const sum = blitz.parallelReduce(
i64, data.len, 0,
[]const i64, data,
struct { fn map(d: []const i64, i: usize) i64 { return d[i]; } }.map,
struct { fn add(a: i64, b: i64) i64 { return a + b; } }.add,
);
const max = blitz.parallelReduce(
i64, data.len, std.math.minInt(i64),
[]const i64, data,
struct { fn map(d: []const i64, i: usize) i64 { return d[i]; } }.map,
struct { fn max(a: i64, b: i64) i64 { return @max(a, b); } }.max,
);
const count = blitz.parallelReduce(
usize, data.len, 0,
[]const i64, data,
struct {
fn map(d: []const i64, i: usize) usize {
return if (d[i] > 0) 1 else 0;
}
}.map,
struct {
fn add(a: usize, b: usize) usize { return a + b; }
}.add,
);
const Context = struct { a: []const f64, b: []const f64 };
const dot = blitz.parallelReduce(
f64, a.len, 0.0,
Context, .{ .a = a, .b = b },
struct {
fn map(ctx: Context, i: usize) f64 {
return ctx.a[i] * ctx.b[i];
}
}.map,
struct {
fn add(x: f64, y: f64) f64 { return x + y; }
}.add,
);
const result = blitz.parallelReduceWithGrain(
T, n, identity,
Context, ctx,
mapFn, combineFn,
4096, // grain size
);

Process ranges instead of individual elements (optimized for SIMD-friendly inner loops):

const sum = blitz.parallelReduceChunked(
i64, // Result type
data.len, // Element count
0, // Identity
[]const i64, // Context type
data, // Context value
struct {
fn processChunk(d: []const i64, start: usize, end: usize) i64 {
var total: i64 = 0;
for (d[start..end]) |v| total += v;
return total;
}
}.processChunk,
struct {
fn combine(a: i64, b: i64) i64 { return a + b; }
}.combine,
4096, // Grain size (elements per chunk)
);

Important: The combine function must be associative:

combine(combine(a, b), c) == combine(a, combine(b, c))

Valid:

  • Addition: (1+2)+3 = 1+(2+3) = 6
  • Multiplication: (23)4 = 2(34) = 24
  • Min/Max: max(max(1,2),3) = max(1,max(2,3)) = 3

Invalid:

  • Subtraction: (5-3)-1 ≠ 5-(3-1)
  • Division: (8/4)/2 ≠ 8/(4/2)
// For small data, sequential may be faster
if (data.len < 10_000) {
// Sequential reduce
var sum: i64 = 0;
for (data) |v| sum += v;
return sum;
} else {
// Parallel reduce
return blitz.parallelReduce(...);
}

Use data.len >= blitz.DEFAULT_GRAIN_SIZE as a simple threshold for the parallel/sequential decision.