I have an Express app that’s already deployed on AWS. We encountered a problem where, after a certain number of concurrent users were utilizing a Python microservice internally called by our Express app, the app crashed. Our whole EC2 instance went down, not just the node or the python container. Our node app had a cluster of workers, and within each of these workers, the Express app was running. This was the code before i got rid of the workers:
require('dotenv').config(); // Environment variables.
const cluster = require('cluster');
const { availableParallelism } = require('os');
// If the current process is the master process
if (cluster.isMaster) {
// Get the number of available CPUs
const numCPUs = availableParallelism();
// Create workers according to the number of CPUs
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
// Handle exit events of workers
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} exited with code: ${code} and signal: ${signal}`);
console.log('Creating a new worker...');
cluster.fork(); // Create a new worker to replace the exited one
});
// Get the number of workers
const numWorkers = Object.keys(cluster.workers).length;
console.log(`Number of workers: ${numWorkers}`);
} else {
// If the current process is a worker process
const morgan = require('morgan');
const express = require('express');
const session = require('express-session');
const compression = require('compression');
const cors = require('cors');
const hpp = require('hpp');
const lusca = require('lusca');
const helmet = require('helmet');
const rateLimit = require('express-rate-limit');
const config = require('./src/config');
// ==== Route Imports ==== //
// Health checker
const healthRouter = require('./src/routes/healthCheck');
// Initialize the App.
const app = express();
// Configure session and cookies.
app.use(session(config.sessionConfig));
// Global Variables.
app.set('port', process.env.PORT || 3001);
// Global Configurations.
app.use(morgan('dev')); // Dev displays info in a semi-condensed manner.
app.use(cors(config.corsOptions)); // Implement and configure CORS.
app.use(express.urlencoded({ extended: false })); // Middleware to decode form data (application/x-www-form-urlencoded).
app.use(express.json()); // Middleware to parse JSON formatted request bodies.
// Add hpp middleware.
app.use(hpp());
// Add express-rate-limit middleware.
app.use(rateLimit(config.rateLimitConfig)); // Apply rate limit to all requests.
// Add Lusca middleware.
app.use(lusca(config.luscaOptions)); // Apply Lusca configurations.
// Add Helmet middleware.
app.use(helmet(config.helmetOptions)); // Apply Helmet configurations.
// Add compression middleware.
app.use(compression(config.compressionOptions.default));
// ==== Routes ==== //
// Enable proxy
if (process.env.ENABLE_TRUST_PROXY) {
app.enable('trust proxy');
}
// Check if the server is running.
app.listen(app.get('port'), () => {
console.log(`Worker ${process.pid} running on port: ${app.get('port')}`);
});
}
What I don’t understand is why, when we got rid of the workers, this issue disappeared. The app stopped crashing. After reading the documentation, I noticed that workers are inefficient for async I/O operations, and it’s better to let Node handle those tasks since it’s optimized for them. Does this imply that the requests were causing blocking operations? I’m still puzzled by this aspect.