Node.js runs JavaScript on a single thread — the event loop. For most I/O-bound work (database queries, HTTP calls, file reads) this is fine: the thread waits for I/O while other requests are processed. But CPU-intensive work (image processing, cryptography, complex data transformations, machine learning inference) blocks the event loop and prevents all other requests from being served. Worker Threads and the cluster module are the two complementary solutions: Worker Threads parallelize CPU-bound tasks within a single process; Cluster distributes incoming connections across multiple Node.js processes to utilise all CPU cores.
Worker Threads vs Cluster
| Feature | Worker Threads | Cluster |
|---|---|---|
| Use case | CPU-intensive computation within a request | Scale I/O-bound servers across CPU cores |
| Parallelism | Multiple threads within one process | Multiple processes, one per core |
| Memory | Shared memory via SharedArrayBuffer |
Separate memory — communicate via IPC |
| Port sharing | No (still one server) | Yes — all workers share the same port |
| Crash isolation | Worker crash = process crash | Worker crash = respawn, primary survives |
| State sharing | Shared memory possible | No shared state (use Redis for sessions) |
postMessage / on('message')) or via SharedArrayBuffer for high-performance shared memory with Atomics for synchronisation. The overhead of creating a worker (~20–50ms) makes them unsuitable for small tasks — use a worker pool (keep workers alive, send them work) rather than creating a new worker per request.Piscina npm package for production worker pool management. It handles worker lifecycle (creation, destruction, reuse), task queueing, back-pressure, error propagation, and statistics — everything you would build yourself managing workers manually. piscina.run(data) returns a Promise that resolves when a worker completes the task. For an Express route that needs CPU-intensive work: const result = await piscina.run({ imageBuffer }).SO_REUSEPORT socket option, which does not guarantee round-robin distribution. Use Cluster with schedulingPolicy: SCHED_RR (round-robin) via cluster.schedulingPolicy = cluster.SCHED_RR before the first worker is forked. In production, prefer running multiple Node.js processes managed by PM2 or Docker (each on its own port) behind a load balancer over the built-in Cluster module.Complete Worker Thread and Cluster Examples
// ── Worker Threads — CPU-bound work ──────────────────────────────────────
// workers/image-processor.js — runs in worker thread
const { workerData, parentPort } = require('worker_threads');
const sharp = require('sharp');
async function processImage(buffer, options) {
return sharp(buffer)
.resize(options.width, options.height)
.webp({ quality: options.quality ?? 80 })
.toBuffer();
}
// Worker receives work via workerData or messages
parentPort.on('message', async ({ taskId, buffer, options }) => {
try {
const result = await processImage(Buffer.from(buffer), options);
parentPort.postMessage({ taskId, result: result.buffer, error: null },
[result.buffer]); // transfer ownership (zero-copy)
} catch (err) {
parentPort.postMessage({ taskId, result: null, error: err.message });
}
});
// ── Worker Pool — reuse workers across requests ────────────────────────────
// utils/worker-pool.js
const { Worker } = require('worker_threads');
const os = require('os');
class WorkerPool {
constructor(workerScript, poolSize = os.cpus().length) {
this.workerScript = workerScript;
this.poolSize = poolSize;
this.workers = [];
this.queue = [];
this.callbacks = new Map();
this.taskId = 0;
for (let i = 0; i < poolSize; i++) {
this._createWorker();
}
}
_createWorker() {
const worker = new Worker(this.workerScript);
worker.idle = true;
worker.on('message', ({ taskId, result, error }) => {
const { resolve, reject } = this.callbacks.get(taskId);
this.callbacks.delete(taskId);
worker.idle = true;
if (error) reject(new Error(error));
else resolve(result);
this._processQueue(); // pick up next queued task
});
worker.on('error', err => { /* log and respawn */ this._createWorker(); });
this.workers.push(worker);
}
run(data, transferList = []) {
return new Promise((resolve, reject) => {
const taskId = ++this.taskId;
this.callbacks.set(taskId, { resolve, reject });
this.queue.push({ taskId, data, transferList });
this._processQueue();
});
}
_processQueue() {
if (this.queue.length === 0) return;
const worker = this.workers.find(w => w.idle);
if (!worker) return;
const { taskId, data, transferList } = this.queue.shift();
worker.idle = false;
worker.postMessage({ taskId, ...data }, transferList);
}
destroy() {
this.workers.forEach(w => w.terminate());
}
}
// Usage in Express:
const imagePool = new WorkerPool('./workers/image-processor.js');
app.post('/api/v1/tasks/:id/thumbnail', upload.single('image'), async (req, res) => {
const buffer = req.file.buffer;
const result = await imagePool.run(
{ buffer: buffer.buffer, options: { width: 200, height: 200 } },
[buffer.buffer] // transfer ownership — no copy
);
res.set('Content-Type', 'image/webp').send(Buffer.from(result));
});
// ── Cluster — scale across CPU cores ─────────────────────────────────────
// src/cluster.js
const cluster = require('cluster');
const os = require('os');
const process = require('process');
if (cluster.isPrimary) {
const numCPUs = os.cpus().length;
console.log(`Primary ${process.pid}: starting ${numCPUs} workers`);
cluster.schedulingPolicy = cluster.SCHED_RR; // round-robin load balancing
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.warn(`Worker ${worker.process.pid} died (${signal || code}). Restarting...`);
cluster.fork(); // auto-respawn
});
cluster.on('online', worker => {
console.log(`Worker ${worker.process.pid} online`);
});
} else {
// Worker processes run the Express app
const app = require('./app');
app.listen(process.env.PORT || 3000, () => {
console.log(`Worker ${process.pid} listening`);
});
}
How It Works
Step 1 — Worker Threads Have Separate V8 Isolates
Each worker thread gets its own V8 instance, event loop, and JavaScript heap. Code in the main thread and worker threads runs truly in parallel on separate OS threads. This bypasses the GIL-equivalent single-threadedness of the event loop. The price is that you cannot directly share JavaScript objects — data must be serialised via postMessage (structured clone algorithm, like JSON but supports more types) or transferred as ArrayBuffer (zero-copy ownership transfer).
Step 2 — Zero-Copy Transfer via ArrayBuffer
Passing large binary data via postMessage would normally copy it — expensive for large buffers. Including the buffer in the transferList parameter transfers ownership instead of copying: the buffer becomes unavailable in the sender and accessible in the receiver. This makes image, audio, and binary data processing between threads efficient — no matter the buffer size, the transfer is O(1).
Step 3 — Cluster Forks Processes, Not Threads
The Cluster primary process forks child processes with cluster.fork(). Each child runs the same script but with cluster.isWorker === true. The primary holds the listening socket and distributes incoming connections to workers using the scheduling policy. Each worker is a full Node.js process — separate heap, separate event loop — providing true CPU utilisation across all cores.
Step 4 — Worker Crash Isolation in Cluster
The cluster.on('exit') handler detects when a worker process dies and calls cluster.fork() to spawn a replacement. This provides self-healing: an unhandled exception in one worker crashes that worker but does not affect the primary or other workers. The primary process handles zero application traffic (only connection distribution), making it very unlikely to crash.
Step 5 — Worker Pool Amortises Worker Startup Cost
Creating a Worker Thread takes 20–50ms — unacceptable for request latency if done per-request. A pool creates workers once at startup and reuses them across many requests. The pool maintains an idle worker list and a task queue. When a worker completes a task, it picks up the next queued item. This provides constant-time task dispatch and bounded memory (pool size × worker memory overhead).
Quick Reference
| Task | Code |
|---|---|
| Create worker | new Worker('./worker.js', { workerData: data }) |
| Send message to worker | worker.postMessage(data, [transferList]) |
| Receive in worker | parentPort.on('message', handler) |
| Send back from worker | parentPort.postMessage(result) |
| Zero-copy transfer | postMessage({ buf: buffer.buffer }, [buffer.buffer]) |
| Worker pool | Use piscina npm package |
| Fork cluster workers | if (cluster.isPrimary) { for (...) cluster.fork(); } |
| Round-robin scheduling | cluster.schedulingPolicy = cluster.SCHED_RR |
| Auto-respawn on crash | cluster.on('exit', () => cluster.fork()) |