I am developing a web application using Node.js, Express, Puppeteer, and MongoDB. My goal is to create a scraper that sends real-time updates to connected clients via Server-Sent Events (SSE). However, I am encountering an issue where the scraper script does not correctly recognize the connected clients, even though the clients are connected to the server.
Project Structure:
- server.js: Manages the Express server, client connections, and SSE.
- notifyClients.js: Handles client connections using a Map object.
- scraper.js: Runs Puppeteer to scrape data and notify clients.
notifyClients.js:
const clientResponses = new Map();
const loadClients = () => {
const clients = Array.from(clientResponses.keys());
console.log(`Loaded clients: ${JSON.stringify(clients)}`);
return clients;
};
const notifyClients = (message) => {
const clients = loadClients();
console.log('Notifying clients');
clients.forEach((clientId) => {
const client = clientResponses.get(clientId);
if (client) {
try {
console.log(`Sending message to client ${clientId}: ${JSON.stringify(message)}`);
client.write(`data: ${JSON.stringify(message)}nn`);
} catch (error) {
console.error(`Failed to send message to client ${clientId}:`, error);
}
}
});
};
const addClient = (res) => {
const clientId = Date.now().toString(); // Generate a unique ID for the client
clientResponses.set(clientId, res);
console.log(`Client connected with ID ${clientId}, total clients: ${clientResponses.size}`);
res.on('close', () => {
clientResponses.delete(clientId);
console.log(`Client with ID ${clientId} disconnected, total clients: ${clientResponses.size}`);
});
};
const getClientCount = () => {
const count = clientResponses.size;
console.log(`Getting client count: ${count}`);
return count;
};
module.exports = {
notifyClients,
addClient,
getClientCount,
clientResponses // Export clientResponses for use in server.js
};
server.js
const express = require('express');
const mongoose = require('mongoose');
const cors = require('cors');
const bcrypt = require('bcryptjs');
const jwt = require('jsonwebtoken');
const { v4: uuidv4 } = require('uuid');
require('dotenv').config();
const path = require('path');
const { addClient, notifyClients, getClientCount, clientResponses } = require('./src/hooks/notifyClients');
const app = express();
const port = process.env.PORT || 5000;
const SECRET_KEY = process.env.SECRET_KEY || 'your_secret_key';
app.use(cors({
origin: 'http://localhost:3000', // Frontend URL
credentials: true,
}));
app.use(express.json());
mongoose.connect(process.env.MONGODB_URI, {
useNewUrlParser: true,
useUnifiedTopology: true,
})
.then(() => console.log('MongoDB Connected'))
.catch((error) => console.error('MongoDB connection error:', error));
// Other schema definitions and middlewares
// SSE endpoint to send updates to the client
app.get('/api/products/updates', (req, res) => {
console.log('SSE endpoint hit');
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.flushHeaders();
addClient(res); // Use addClient to manage the client connections
});
app.get('/api/check-clients', (req, res) => {
const clients = Array.from(clientResponses.keys());
res.json({
clientCount: clients.length,
clients: clients.map((clientId, index) => `Client ${index + 1}: ${clientId}`)
});
});
// Other routes and server startup code
app.listen(port, () => {
console.log(`Server is running on port ${port}`);
});
scraper.js
const puppeteer = require('puppeteer');
const mongoose = require('mongoose');
const dotenv = require('dotenv');
const fs = require('fs');
const path = require('path');
const { notifyClients, getClientCount, clients } = require('../hooks/notifyClients');
const axios = require('axios');
dotenv.config();
// MongoDB connection and other configurations
// Add a helper function to wait until clients are connected
const waitForClients = async (timeout = 30000, interval = 1000) => {
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const clientCount = getClientCount();
console.log(`Waiting for clients. Current client count: ${clientCount}`);
console.log(`Current clients array: ${JSON.stringify(clients)}`); // Log the current clients array for debugging
// Perform the HTTP request to check client count
try {
const response = await axios.get('http://localhost:5000/api/check-clients');
console.log(`HTTP request response: ${JSON.stringify(response.data)}`);
} catch (error) {
console.error(`HTTP request error: ${error.message}`);
}
if (clientCount > 0) {
return true;
}
await new Promise(resolve => setTimeout(resolve, interval));
}
console.log('No clients connected within the timeout period.');
return false;
};
(async () => {
const browser = await puppeteer.launch();
try {
// Other scraping logic
console.log('Waiting for clients to connect...');
const clientsConnected = await waitForClients();
if (!clientsConnected) {
console.log('No clients connected within the timeout period.');
return;
}
// Proceed with scraping and notifying clients
} catch (error) {
console.error('An error occurred:', error);
} finally {
await browser.close();
mongoose.connection.close();
}
})();
Logs:
Scraper Logs:
Waiting for clients. Current client count: 0
Current clients array: undefined
HTTP request response: {"clientCount":2,"clients":["Client 1: 1719764974304","Client 2: 1719764991201"]}
Getting client count: 0
Server Logs:
Server is running on port 5000
SSE endpoint hit
Client connected with ID 1719764974304, total clients: 1
MongoDB Connected
Client Logs:
Setting up EventSource connection
EventSource connection established
What I Tried:
- Verified that clients are correctly added to clientResponses in notifyClients.js.
- Added logging in server.js and scraper.js to debug the client connection status.
- Ensured that the clientResponses map is correctly imported and used in both server.js and scraper.js.
Expected Behavior:
The scraper should recognize the connected clients and send updates to them.
Actual Behavior:
The scraper does not recognize any connected clients and logs a client count of 0.
Question:
How can I ensure that the scraper script correctly recognizes and interacts with the connected clients managed by the server?
Botond Till is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.