Skip to content
v1.0.0-zig0.15.2

Scope & Broadcast

Scope-based parallelism for spawning arbitrary tasks that must all complete before the scope exits, plus utilities for fire-and-forget background work and broadcasting to all worker threads.

A scope collects tasks during its body, then executes them all in parallel when the scope exits. This is the “collect and execute” model.

const blitz = @import("blitz");
blitz.scope(struct {
fn run(s: *blitz.Scope) void {
s.spawn(computeStatistics);
s.spawn(buildIndex);
s.spawn(validateData);
}
// All three tasks execute in parallel here, when scope exits
}.run);
// All tasks guaranteed complete at this point
blitz.scope(func):
┌─────────────────────────────────────────────────┐
│ 1. Create scope │
│ 2. Run func(scope) — collects tasks │
│ scope.spawn(A) → tasks = [A] │
│ scope.spawn(B) → tasks = [A, B] │
│ scope.spawn(C) → tasks = [A, B, C] │
│ │
│ 3. scope.wait() — execute all in parallel │
│ ┌─────┐ ┌─────┐ ┌─────┐ │
│ │ A │ │ B │ │ C │ │
│ │ W-0 │ │ W-1 │ │ W-2 │ │
│ └─────┘ └─────┘ └─────┘ │
│ │
│ 4. All complete → scope returns │
└─────────────────────────────────────────────────┘

Tasks are not started until the scope body returns (or wait() is called explicitly). This differs from Rayon’s immediate-spawn model but provides the same parallel execution semantics.

Pass data into the scope body using scopeWithContext:

const Config = struct {
data: []const f64,
threshold: f64,
};
const config = Config{
.data = sensor_data,
.threshold = 0.95,
};
blitz.scopeWithContext(Config, config, struct {
fn run(cfg: Config, s: *blitz.Scope) void {
// Access config inside the scope
if (cfg.data.len > 1000) {
s.spawn(heavyAnalysis);
s.spawn(buildReport);
} else {
s.spawn(quickSummary);
}
}
}.run);

A scope supports a maximum of 64 spawned tasks. This is a compile-time fixed limit for stack allocation efficiency.

// This will panic at runtime:
blitz.scope(struct {
fn run(s: *blitz.Scope) void {
for (0..65) |_| {
s.spawn(someTask); // Panics on the 65th spawn!
}
}
}.run);

For larger workloads, use parallelFor or parallelForRange instead, which split work adaptively without a fixed task limit:

// Process 10,000 items — no task limit
blitz.parallelFor(10_000, Context, ctx, bodyFn);

You can call wait() explicitly to execute tasks mid-scope, then spawn more:

blitz.scope(struct {
fn run(s: *blitz.Scope) void {
// Phase 1: data loading
s.spawn(loadDataA);
s.spawn(loadDataB);
s.wait(); // Execute and wait for phase 1
// Phase 2: processing (runs after phase 1 completes)
s.spawn(processResults);
s.spawn(generateReport);
// wait() called automatically when scope exits
}
}.run);

Execute a function over each index in a range [start, end):

// Process indices 100..500
blitz.parallelForRange(100, 500, struct {
fn process(i: usize) void {
updatePixel(i);
}
}.process);

Unlike parallelFor, where the body receives chunk boundaries (start, end), parallelForRange calls the function once per index. This is simpler when per-element work is the natural unit.

const ImageCtx = struct {
pixels: []Pixel,
brightness: f32,
};
blitz.parallelForRangeWithContext(
ImageCtx,
.{ .pixels = pixels, .brightness = 1.5 },
0,
pixels.len,
struct {
fn adjust(ctx: ImageCtx, i: usize) void {
ctx.pixels[i].r = @min(255, ctx.pixels[i].r * ctx.brightness);
ctx.pixels[i].g = @min(255, ctx.pixels[i].g * ctx.brightness);
ctx.pixels[i].b = @min(255, ctx.pixels[i].b * ctx.brightness);
}
}.adjust,
);
FeatureparallelForRangeparallelFor
Callback receivesSingle index iChunk (start, end)
Use whenPer-element work is naturalBatch processing is efficient
OverheadSlightly higher (one call per element)Lower (one call per chunk)
ContextparallelForRangeWithContextBuilt into parallelFor

Spawn a background task that runs asynchronously:

// Fire and forget — returns immediately
blitz.spawn(struct {
fn run() void {
writeAuditLog();
}
}.run);

With context:

const LogCtx = struct {
message: []const u8,
level: u8,
};
blitz.spawnWithContext(LogCtx, .{
.message = "operation completed",
.level = 2,
}, struct {
fn run(ctx: LogCtx) void {
appendToLog(ctx.message, ctx.level);
}
}.run);

Note: The spawned task must complete before program exit. If you need to wait for completion, use scope() instead.

Execute a function on every worker thread. Each invocation receives the worker index:

blitz.broadcast(struct {
fn run(worker_index: usize) void {
std.debug.print("Hello from worker {}\n", .{worker_index});
}
}.run);

Broadcast is ideal for initializing per-thread state:

// Per-worker scratch buffers (module-level storage)
var worker_buffers: [64][4096]u8 = undefined;
// Initialize all worker buffers in parallel
blitz.broadcast(struct {
fn init(worker_index: usize) void {
@memset(&worker_buffers[worker_index], 0);
}
}.init);
const SeedCtx = struct {
base_seed: u64,
};
blitz.broadcastWithContext(SeedCtx, .{ .base_seed = 12345 }, struct {
fn init(ctx: SeedCtx, worker_index: usize) void {
// Each worker gets a unique seed derived from base + index
initWorkerRng(ctx.base_seed + worker_index);
}
}.init);
PatternDescription
Thread-local initInitialize per-worker buffers or RNG seeds
Cache warmingPre-load data into each worker’s cache
Statistics resetClear per-worker counters before a benchmark
Health checkVerify each worker thread is responsive
GoalAPIWhy
Run 2-8 heterogeneous tasksjoin(.{...})Named results, different return types
Run 2-64 homogeneous tasksscope() + spawn()Dynamic task count
Process every element in a rangeparallelForRange()Per-element callback
Process array in chunksparallelFor()Chunk-based, lower overhead
Background workspawn()Fire-and-forget
Run on every workerbroadcast()Thread-local init
65+ independent tasksparallelFor()No task limit
const std = @import("std");
const blitz = @import("blitz");
// Per-worker accumulators
var worker_sums: [64]std.atomic.Value(i64) = init: {
var sums: [64]std.atomic.Value(i64) = undefined;
for (&sums) |*s| s.* = std.atomic.Value(i64).init(0);
break :init sums;
};
pub fn main() !void {
try blitz.init();
defer blitz.deinit();
// Step 1: Reset all worker accumulators
blitz.broadcast(struct {
fn reset(worker_index: usize) void {
worker_sums[worker_index].store(0, .release);
}
}.reset);
// Step 2: Use scope to run analysis phases
blitz.scope(struct {
fn run(s: *blitz.Scope) void {
s.spawn(analyzePhaseA);
s.spawn(analyzePhaseB);
s.spawn(analyzePhaseC);
}
fn analyzePhaseA() void {
// ... heavy computation ...
}
fn analyzePhaseB() void {
// ... heavy computation ...
}
fn analyzePhaseC() void {
// ... heavy computation ...
}
}.run);
// Step 3: All phases complete here
std.debug.print("Analysis complete\n", .{});
}