I have NGINX set up on a Windows Server (let’s call it server 1) balancing two API’s hosted in other two Windows Servers (let’s call them server 2 and 3) through IIS.
When Active connections gets past 60, it starts throwing random Bad Gateway responses with this description in the logs: “upstream timed out (10060: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond) while reading response header from upstream”.
Both servers 2 and 3 looks just fine when the error starts striking. If I’m not using NGINX and trying to access them directly, even with high loads, I have no problems, which leads me to believe I’m missing something in the config file.
Here is my conf file:
worker_processes auto;
events {
worker_connections 3000;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
access_log off;
keepalive_timeout 65;
#gzip on;
upstream myapp {
server (server 2 here);
server (server 3 here);
}
server {
listen 8080;
location / {
proxy_pass http://myapp;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_connect_timeout 5s;
proxy_read_timeout 5s;
proxy_send_timeout 5s;
proxy_buffering on;
proxy_buffers 8 16k;
proxy_buffer_size 32k;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
location /nginx_status {
stub_status;
allow 127.0.0.1; # Only allow access from localhost
deny all; # Deny access from all other IPs
}
}
}
I tried increasing worker_connections and increasing all the timeouts to 30, but nothing appears to work.
I was able to reproduce it on my local machine running two HTML files with python server on 8081 and 8082 to mimic the windows servers, while running nginx on 8080. I used apache jmeter to make the load testing and was able to get the error when I set the number of users to 400 and looping it for 30 seconds. Here’s the changes I made locally but also didn’t made any difference:
worker_processes auto;
worker_rlimit_nofile 10000;
events {
worker_connections 10000;
use poll;
multi_accept on;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
# error_log logs/error.log crit;
keepalive_timeout 65;
keepalive_requests 100000;
gzip on;
upstream myapp {
server localhost:8081;
server localhost:8082;
}
server {
listen 8080;
location / {
proxy_pass http://myapp;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
# proxy_connect_timeout 5s;
# proxy_read_timeout 5s;
# proxy_send_timeout 5s;
proxy_buffer_size 16k;
proxy_buffers 4 32k;
proxy_busy_buffers_size 64k;
proxy_temp_file_write_size 64k;
add_header Cache-Control "no-store, no-cache, must-revalidate, proxy-revalidate, max-age=0";
add_header Pragma "no-cache";
expires -1;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
location /nginx_status {
stub_status;
allow 127.0.0.1;
deny all;
}
}
}