Add to Cursor Add to Claude Copy for LLM View as MD

Batch Processing

Batch processing involves running the same operation across a large set of items like images, documents, records, or API calls. We’ll structure batch workloads in Hatchet with fan-out, retry, and concurrency control.

At its core, batch processing is Fanout applied at scale. If your batch also has fixed stages (e.g., validate → transform → load), you can combine it with Pre-Determined Pipelines.

Step-by-step walkthrough

You’ll build a parent workflow that fans out to one child task per item and aggregates results.

Define the parent workflow

Create a parent workflow that receives a batch of item IDs and spawns one child per item.

examples/python/guides/batch_processing/worker.py

class BatchInput(BaseModel):
    items: list[str]


class ItemInput(BaseModel):
    item_id: str


parent_wf = hatchet.workflow(name="BatchParent", input_validator=BatchInput)
child_wf = hatchet.workflow(name="BatchChild", input_validator=ItemInput)


@parent_wf.durable_task()
async def spawn_children(input: BatchInput, ctx: Context) -> dict[str, Any]:
    """Parent fans out to one child per item."""
    results = await child_wf.aio_run_many(
        [child_wf.create_bulk_run_item(input=ItemInput(item_id=item_id)) for item_id in input.items]
    )
    return {"processed": len(results), "results": results}

examples/typescript/guides/batch-processing/workflow.ts

const parentTask = hatchet.durableTask<BatchInput>({
  name: 'spawn-children',
  fn: async (input) => {
    const results = await Promise.all(
      input.items.map((itemId) => childTask.run({ item_id: itemId }))
    );
    return { processed: results.length, results };
  },
});

examples/go/guides/batch-processing/main.go

parentTask := client.NewStandaloneDurableTask("spawn-children", func(ctx hatchet.DurableContext, input BatchInput) (map[string]interface{}, error) {
	inputs := make([]hatchet.RunManyOpt, len(input.Items))
	for i, itemID := range input.Items {
		inputs[i] = hatchet.RunManyOpt{Input: ItemInput{ItemID: itemID}}
	}
	runRefs, err := childTask.RunMany(ctx, inputs)
	if err != nil {
		return nil, err
	}
	results := make([]interface{}, len(runRefs))
	for i, ref := range runRefs {
		result, err := ref.Result()
		if err != nil {
			return nil, err
		}
		var parsed map[string]interface{}
		if err := result.TaskOutput("process-item").Into(&parsed); err != nil {
			return nil, err
		}
		results[i] = parsed
	}
	return map[string]interface{}{"processed": len(results), "results": results}, nil
})

The Ruby SDK is in early access, and may change. We'd love your feedback!

examples/ruby/guides/batch_processing/worker.rb

BATCH_PARENT_WF = HATCHET.workflow(name: 'BatchParent')
BATCH_CHILD_WF = HATCHET.workflow(name: 'BatchChild')

BATCH_PARENT_WF.durable_task(:spawn_children) do |input, _ctx|
  items = input['items'] || []
  results = BATCH_CHILD_WF.run_many(
    items.map { |item_id| BATCH_CHILD_WF.create_bulk_run_item(input: { 'item_id' => item_id }) }
  )
  { 'processed' => results.size, 'results' => results }
end

Process each item

Each child task processes a single item independently. Failed items are retried according to your retry policy.

examples/python/guides/batch_processing/worker.py

@child_wf.task()
async def process_item(input: ItemInput, ctx: Context) -> dict[str, str]:
    """Child processes a single item."""
    return {"status": "done", "item_id": input.item_id}

Run the worker

Register and start the worker with both parent and child workflows. For large batches, use durable workflows so the parent does not hold a slot while waiting.

examples/python/guides/batch_processing/worker.py

worker = hatchet.worker(
    "batch-worker",
    slots=20,
    workflows=[parent_wf, child_wf],
)
worker.start()

⚠️

For batches with thousands of items, use durable workflows so the parent task doesn’t hold a worker slot while waiting for all children to complete. See Durable Workflows for details.

Common Patterns

Pattern	Description
Image processing	Resize, transcode, or analyze images in parallel across workers
Data enrichment	Enrich records by calling external APIs (geocoding, company info, email validation)
Report generation	Generate per-customer reports in parallel, then aggregate into a summary
Database migrations	Process and migrate records in batches with retry and progress tracking
Notification delivery	Send emails, SMS, or push notifications to a user list with rate limiting