How to achieve 500K requests per second on my webserver?

Question

I recently gifted myself a new dedicated server and I am trying to squeeze maximum performance out of it, for fun and learning.

I am trying to achieve maximum possible requests per second this server can handle and aiming for 500K requests/sec as mentioned here - http://lowlatencyweb.wordpress.com/2012/03/20/500000-requestssec-modern-http-servers-are-fast/

Server Details

Intel® Xeon® E3-1270 4 Cores (8 HT) x 3.4 GHz

RAM 24 GB DDR3 ECC

Hard-disk space 2,000 GB (2 x 2,000 SATA) RAID Software RAID 1

Lan 100mbps

OS Centos 6.3 64 bit

Nginx

I am able to reach only 35K requests/sec for a static txt file. I am running the benchmark on the same machine. I am aware of NIC limits and network overhead

ab -n100000 -c200 http://localhost/test.txt

Update - 165K requests/sec

I tried another benchmarking tool called wrk and it gave me 165K requests/sec. So cool!

Update 2 - 250K requests/sec

nginx.conf

#######################################################################
#
# This is the main Nginx configuration file.
#
# More information about the configuration options is available on
#   * the English wiki - http://wiki.nginx.org/Main
#   * the Russian documentation - http://sysoev.ru/nginx/
#
#######################################################################

#----------------------------------------------------------------------
# Main Module - directives that cover basic functionality
#
#   http://wiki.nginx.org/NginxHttpMainModule
#
#----------------------------------------------------------------------

user              nginx;
worker_processes  8;
worker_rlimit_nofile 262144;

error_log  /var/log/nginx/error.log;
#error_log  /var/log/nginx/error.log  notice;
#error_log  /var/log/nginx/error.log  info;

pid        /var/run/nginx.pid;


#----------------------------------------------------------------------
# Events Module
#
#   http://wiki.nginx.org/NginxHttpEventsModule
#
#----------------------------------------------------------------------

events {
    worker_connections  16384;
    multi_accept on;
    use epoll;
}


#----------------------------------------------------------------------
# HTTP Core Module
#
#   http://wiki.nginx.org/NginxHttpCoreModule
#
#----------------------------------------------------------------------

http {
    include       /etc/nginx/mime.types;
    index    index.php index.html index.htm;

    default_type  application/octet-stream;

    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"';

    access_log  /var/log/nginx/access.log  main;

    sendfile        on;
    tcp_nopush     on;
    tcp_nodelay on;
    server_tokens off;
    client_max_body_size 24m;
    client_body_buffer_size 128k;
    #keepalive_timeout  0;
    keepalive_timeout  65;
    open_file_cache max=1000;
    open_file_cache_min_uses 10;
    open_file_cache_errors   on;

    gzip on;
        gzip_static on;
    gzip_comp_level 3;
    gzip_disable "MSIE [1-6]\.";
        gzip_http_version 1.1;
        gzip_vary on;
        gzip_proxied any;
        gzip_types text/plain text/css text/xml text/javascript text/x-component text/cache-manifest application/json application/javascript application/x-javascript application/xml application/rss+xml application/xml+rss application/xhtml+xml application/atom+xml application/wlwmanifest+xml application/x-font-ttf image/svg+xml image/x-icon font/opentype app/vnd.ms-fontobject;
        gzip_min_length  1000;

fastcgi_cache_path   /tmp  levels=1:2
                       keys_zone=NAME:10m
                       inactive=5m;

  fastcgi_cache_key "$scheme$request_method$host$request_uri";


server {
    listen       80;
    server_name  _;
        root /var/www/html;

    #charset koi8-r;

    #access_log  logs/host.access.log  main;

    location / {
        try_files $uri $uri/ /index.php?$args;
    }

    error_page  404              /404.html;
    location = /404.html {
        root   /var/www/error;
    }

    # redirect server error pages to the static page /50x.html
    #
    error_page   500 502 503 504  /50x.html;
    location = /50x.html {
        root   /var/www/error;
    }

    # proxy the PHP scripts to Apache listening on 127.0.0.1:80
    #
    #location ~ \.php$ {
    #    proxy_pass   http://127.0.0.1;
    #}

    # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
    #
    location ~ \.php$ {
    fastcgi_pass   127.0.0.1:9000;
    fastcgi_index  index.php;
    fastcgi_param  SCRIPT_FILENAME  $document_root$fastcgi_script_name;
    include        fastcgi_params;
    # checks to see if the visitor is logged in, a commenter,
    # or some other user who should bypass cache
    set $nocache "";
    if ($http_cookie ~ (comment_author_.*|wordpress_logged_in.*|wp-postpass_.*)) {
     set $nocache "Y";
   }
    # bypass cache if logged in.
    # Be sure that this is above all other fastcgi_cache directives
    fastcgi_no_cache $nocache;
    fastcgi_cache_bypass $nocache;

  fastcgi_cache   NAME;
  fastcgi_cache_valid   200 302  10m;
  fastcgi_cache_valid   301      1h;
  fastcgi_cache_valid   any      1m;
  fastcgi_cache_min_uses  10;
  fastcgi_cache_use_stale error  timeout invalid_header http_500;
    fastcgi_buffers 256 16k;
    }

location = /favicon.ico {
        log_not_found off;
        access_log off;
}

location = /robots.txt {
        allow all;
        log_not_found off;
        access_log off;
}

# Deny all attempts to access hidden files such as .htaccess, .htpasswd, .DS_Store (Mac).
location ~ /\. {
        deny all;
        access_log off;
        log_not_found off;
}

# Deny access to any files with a .php extension in the uploads directory
location ~* ^/wp-content/uploads/.*.php$ {
        deny all;
        access_log off;
        log_not_found off;
}

location ~* \.(jpg|jpeg|gif|png|flv|mp3|mpg|mpeg|js|css|ico)$ {
        expires                 max;
        log_not_found   off;
}
}

}

sounds like you wanna do some hacking or brute attack on someone's else web server :P — , Jul 17 '12 at 16:07
Serving more than 10-15K requests/second (reliably/continuously) is not a job for ONE server, it is a job for SEVERAL servers, ideally distributed across multiple network links (lest you saturate your uplink, or worse incur the wrath of bandwidth overage charges). — voretaq7, Jul 17 '12 at 16:16
@TimeToThine Haha, not really. Just trying to learn new things. — Arpit Tambi, Jul 17 '12 at 16:23
@voretaq7 yes I am aware of this but this guy did achieve 1M requests/sec - http://lowlatencyweb.wordpress.com/2012/03/26/500000-requestssec-piffle-1000000-is-better/ — Arpit Tambi, Jul 17 '12 at 16:25
REALISTIC requests or bullhit requests? I mean, seriously - yes, this is doable, for SMALL files, but not "real" site requests. — TomTom, Jul 17 '12 at 16:43
@tomtom - correct this isn't a "real" scenario, it just theoretical limit testing — Jim B, Jul 17 '12 at 16:44
@ArpitTambi with scant description of his test methodology, no table of results / demonstration of reproducibility, etc.? Those results are *less than worthless*. A theoretical benchmark on an ideal system talking only to itself (or a twin through a back-to-back copper connection) in a vacuum has effectively ***ZERO*** relation to practical results in the real world with databases, switches, routing, SSL (multiple protocols to multiple clients). — voretaq7, Jul 17 '12 at 16:52
@voretaq7 I agree with you that's why I opened this question to find if its really possible or not. So far I know that bottleneck is with system configs. — Arpit Tambi, Jul 17 '12 at 17:14
In terms of performance `gzip_comp_level 1;` would be much better. — VBart, Jul 18 '12 at 11:15

score 34 · Answer 1 · edited Dec 20 '13 at 13:48

34

Arpit, if you imagine that the absolutely smallest likely web response, even if it's a static text file is one Ethernet packet (~1,500 bytes) then 500,000 of them works out at around 750,000,000 bytes, or roughly 7.5 gigabit. So unless your server has very easily offloaded 10Gb NICs (and it doesnt't, the one you've got is one hundred times slower) and have setup the drivers and kernel to allow you to almost completely flood one of those links, plus the latencies of load-balancers, firewalls, routers and onward connections at that rate then you'll never be able to hit that kind of performance - even with a single packet response, which is unlikely. So ultimately 35k sounds not far off your limit.

edited Dec 20 '13 at 13:48

sysadmin1138

133,124
18
176
300

answered Jul 17 '12 at 16:11

Chopper3

101,299
9
108
239

actually you'd need multiple nics rather than a single nic due to port collisions at high transfer rates. – Jim B Jul 17 '12 at 16:14
2

? I do solid 10Gb out of single ZXTM NICs – Chopper3 Jul 17 '12 at 16:16
Its not impossible but I've had decidedly bad experinces (one particular regatta website comes to mind) where the request traffic essentially DOSd the nic – Jim B Jul 17 '12 at 16:21
3

"port collisions" - 1GbE and faster can't have collisions, it's fully bi-directional by design. – Chris S Jul 17 '12 at 16:21
I am testing on the same machine, I am not sure if http://localhost/text.txt goes through NICs. Does it? – Arpit Tambi Jul 17 '12 at 16:21
@ChrisS - so by definition how many simultaneous requests or responses can be handled by 1 nic? – Jim B Jul 17 '12 at 16:23
Even so Arpit, I hope you can see now that even 50k over your NIC is around the limit. – Chopper3 Jul 17 '12 at 16:25
@Jim NIC can send and receive 1 thing at a time, the same number that can be transmitted to them on the wire, and the same number as their PCIe interface. A switching buffer overrun is not the same an an old school network collision; nor is it the same as a buffer overflow. – Chris S Jul 17 '12 at 16:25
So how is this guy is achieving 1M requests/sec - http://lowlatencyweb.wordpress.com/2012/03/26/500000-requestssec-piffle-1000000-is-better/ – Arpit Tambi Jul 17 '12 at 16:27
4

@ArpitTambi: I recommend reading Chopper's answer. Your patience will be rewarded. – Scott Pack Jul 17 '12 at 16:35
I am benchmarking on the same machine, so its unclear to me why would network limits apply? – Arpit Tambi Jul 17 '12 at 16:36
@ChrisS Exactly - so in high volume situations the switch buffer gets over run AND packet get dropped. I think you are correct in that it's not an old school collision since the wire isn't shared. it also depends on how the system is wired up (eg switch in front). Note that in the OP there is no NIC issue since it's localhost. – Jim B Jul 17 '12 at 16:39
2

@ScottPack That answer is perfect for a real life use case, I am testing on the same machine so bottleneck cannot be network. Its either CPU, Memory or IO. – Arpit Tambi Jul 17 '12 at 16:46
Or much more likely code config, in situations like that it's always down to very specific config settings – Chopper3 Jul 17 '12 at 16:49
1

The post you link to doesn't mention the network card, but a 100mb card seems impossible. He also had 2 CPUs. – Zoredache Jul 17 '12 at 16:52
6

@ArpitTambi [`You should only ask practical, answerable questions based on actual problems that you face`](http://serverfault.com/faq#dontask) <- Part of a question being Practical is it being related to *Real Life Use Cases* -- There are lots of ways to rig a web server to be über fast. Many of them break (or become very fragile) in the real world, and you're probably close to your practical limit.. Aside hunting and killing obvious bottlenecks (see Jeff Ferland's answer) Chopper3 is *telling it like it is* -- Sometimes the correct answer is not what you want to hear. – voretaq7 Jul 17 '12 at 16:57
Good answer, but there's no need to make a joke out of the OP's name. Flagged as offensive. – Dec 20 '13 at 12:07
1

@AndrewH - it was a genuine mistake I'd not spotted until just now - unfortunately I can't edit the comments after this long - I sincerely apologise to Armit. – Chopper3 Dec 20 '13 at 13:16
1

@Chopper3 didn't seem like your style! :) – Dec 20 '13 at 16:49

score 13 · Accepted Answer · answered Jul 17 '12 at 21:03

13

First of all, you should write a new benchmarking tool. You're actually benchmarking ab not nginx.

answered Jul 17 '12 at 21:03

VBart

8,309
3
25
26

I guess you are right as there are no errors in logs and load, disk activity, memory usage all remain fairly low. – Arpit Tambi Jul 17 '12 at 21:10
2

Correct. I tried `wrk` and it gave me 165K requests/sec. – Arpit Tambi Jul 17 '12 at 21:39

score 8 · Answer 3 · answered Jul 17 '12 at 16:38

Let's identify the bottleneck. Since you're on the same machine, we can assume it's either CPU or disk activity. For the one text file, it shouldn't be disk activity, but at 35k connections, you may be generating 35MB of logging every second as well.

The examples you're showing don't run access logging, only errors. Your config, however, has much more going on, the logging in particular:

log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                  '$status $body_bytes_sent "$http_referer" '
                  '"$http_user_agent" "$http_x_forwarded_for"';

access_log  /var/log/nginx/access.log  main;

Start with disabling that logging and then figure out where you're getting hung up at next. Also consider that running the test client on the same machine can have a notable impact on the server daemon. Hypertheading can also turn ou to be harmful sometimes, so explore whether that works better for your load when on or off.

Thanks I disabled access logs totally. But same result. This time I carefully noted that disk activity remains zero. 20GB of ram is free and server load is less than 0.5. Looks like bottleneck is server configs. — Arpit Tambi, Jul 17 '12 at 17:09

score 1 · Answer 4 · answered Jul 17 '12 at 16:14

1

if you are just after the numbers [eg there's no real use case behind this test] - make ab use the keep alive feature of http - execute number of requests over already open TCP connection.

answered Jul 17 '12 at 16:14

pQd

29,981
6
66
109

I am testing on the same machine, does http://localhost/test.txt goes through NICs? I will try your suggest and post the results. – Arpit Tambi Jul 17 '12 at 16:28
You are not testing on the same machine and accessing via localhost doesn't go through the nics. Plus accessing from the host system means you are wasting resources on making the request in addition to serving the response. – Zoredache Jul 17 '12 at 16:54
2

TCP Keepalive has nothing to do with NICs - It's a function of the TCP/IP (transport) layer, not the physical layer, and it affects connections to `localhost` as much as it would a remote system. For proper benchmarking I always suggest leaving keepalive off (benchmarks should be pessimistic and assume the **worst**-case scenario, where we have to open a new connection for every request), but having all your connections use keepalive can certainly pump up your benchmark numbers. – voretaq7 Jul 17 '12 at 17:03
@voretaq7 - as you can see it's not a real-life scenario at all. – pQd Jul 17 '12 at 22:32
1

@pQd ...which means the whole question is *Off Topic* and I'm being nice by not closing it, but more importantly when someone comes to the site with a limited amount of Clue it behooves us to impart sufficient Clue upon that person to keep them from shooting their own foot off. Arpit does not seem to quite grasp the point of benchmarking, so telling him how to inflate the benchmark numbers without regard for how that will work out in the Real World is irresponsible IMHO. – voretaq7 Jul 18 '12 at 14:58
1

@voretaq7 - i'm sorry but i don't agree with you and i dislike the close-it-down rage that seems to be on the rise. i think that even trying to achieve such a strange objective can lead to some learning about inner workings of the operating system / particular server. – pQd Jul 18 '12 at 15:30
@pQd You're free to disagree. In fact it's really your only option as you're not going to change my opinion on this matter :-) If you would like to discuss this disagreement further you're welcome to pop in to [The Comms Room](http://chat.stackexchange.com/rooms/127/the-comms-room) or [open a discussion on Meta](http://meta.serverfault.com/questions/ask). – voretaq7 Jul 18 '12 at 16:20

How to achieve 500K requests per second on my webserver?

4 Answers4