I'm implementing an HTTP server using epoll
in C. The server parses the request and responds accordingly.
Everything works when I issue simple curl
requests (ie. no concurrency).
However when using concurrent clients and issue multiple requests, even though ab
reports that all responses were successful (and in fact, they are, since the reported bytes transferred seem correct), the server keeps the last 1 or 2 client FDs open, and the loop to epoll_wait
indefinitely reports EPOLLIN
to the last client fd. If I try to recv
in that FD I always get 0
which means the client closed the connection.
I'm curious as to why this happens only some times and not always and why only on the last remaining client fd.
The code is located at https://github.com/agis-/rea/tree/epoll but I'm also pasting it here for completeness (I'll give specific steps to reproduce later on):
rea.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <err.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <signal.h>
#include <netdb.h>
#include <sys/epoll.h>
#include "rea.h"
fd_set rfds;
fd_set wfds;
Server *server;
Client *clients[MAX_CLIENTS];
int epfd;
int cheat;
int responds;
int requests;
int main(int argc, char *argv[])
{
int status, i, fd, added;
Client *c;
if (argc != 2) {
fprintf(stderr, "Usage: %s <port>\n", argv[0]);
exit(EXIT_FAILURE);
}
setupAndListen(argv[1]);
// epoll
//
struct epoll_event ev;
struct epoll_event evs[MAX_EP_EVENTS];
int nfds;
epfd = epoll_create1(0);
if (epfd == -1) {
err(EXIT_FAILURE, "epoll_create error");
}
//deleteme
int maxclients =0;
ev.events = EPOLLIN;
ev.data.ptr = server;
status = epoll_ctl(epfd, EPOLL_CTL_ADD, server->fd, &ev);
if (status == -1) {
printf("%d\n", server->fd);
err(EXIT_FAILURE, "epoll_ctl error");
}
while(1) {
nfds = epoll_wait(epfd, evs, MAX_EP_EVENTS, -1);
if (nfds == -1) {
err(EXIT_FAILURE, "epoll_wait error");
}
printf("nfds: %d | on_message_complete: %d | responds: %d | request reads: %d\n", nfds, cheat, responds, requests);
for (i = 0; i < nfds; i++) {
if (((Server *)evs[i].data.ptr)->fd == server->fd) {
fd = accept4(server->fd, server->addr->ai_addr,
&(server->addr->ai_addrlen), SOCK_NONBLOCK);
if (fd < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
continue;
}
err(EXIT_FAILURE, "Socket accept error");
}
added = 0;
for (i = 0; i < MAX_CLIENTS; i++) {
if (clients[i] == 0) {
clients[i] = makeClient(fd);
added = 1;
break;
}
}
if (!added) {
fprintf(stderr, "Could not find room for client fd: %d\n", fd);
}
ev.events = EPOLLIN;
ev.data.ptr = clients[i];
status = epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev);
if (status == -1) {
printf("client fd: %d\n", fd);
err(EXIT_FAILURE, "epoll_ctl error");
}
printf("Accepted connection! (fd: %d)(%p)\n", clients[i]->fd, clients[i]);
printf("clients after accept: ");
for (i=0;i < MAX_CLIENTS; i++) {
if (clients[i]) {
printf("%d(%p) ", clients[i]->fd, clients[i]);
}
}
printf("\n");
} else {
c = (Client *)evs[i].data.ptr;
if ((evs[i].events & EPOLLIN) && c->cstate == CONNECTED) {
c->ev_delivered += 1;
if (c->ev_delivered > 1) {
printf("2 EVENTS!\n");
sleep(1);
//exit(1);
}
readRequest(c);
} else if ((evs[i].events & EPOLLOUT)) {
respond(c);
} else {
if (evs[i].events & EPOLLIN) {
printf("foo\n");
printf("client EPOLLIN %d(%p) %d\n", c->fd, c);
}
if (evs[i].events & EPOLLOUT) {
printf("client EPOLLOUT %d(%p)\n", c->fd, c);
}
if (evs[i].events & EPOLLERR) {
printf("client EPOLLERR %d(%p)\n", c->fd, c);
}
if (evs[i].events & EPOLLHUP) {
printf("client EPOLLHUP %d(%p)\n", c->fd, c);
}
c->ev_delivered += 1;
if (c->ev_delivered > 1) {
printf("2 EVENTS!\n");
sleep(1);
//exit(1);
}
for (i=0; i<MAX_CLIENTS;i++) {
if (clients[i]) {
maxclients += 1;
}
}
printf("nfds: %d | on_message_complete: %d | responds: %d | request reads: %d | maxclients: %d | c->replied: %d\n", nfds, cheat, responds, requests, maxclients, c->replied);
printf("clients after accept: ");
for (i=0;i < MAX_CLIENTS; i++) {
if (clients[i]) {
printf("%d(%p) ", clients[i]->fd, clients[i]);
}
}
printf("\n");
//exit(1);
}
}
}
}
}
void setupAndListen(char *port)
{
int status, fd;
struct addrinfo hints;
struct addrinfo *ai;
server = (Server *)malloc(sizeof(Server));
if (!server) {
fprintf(stderr, "Couldn't allocate memory for starting the server\n");
exit(EXIT_FAILURE);
}
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET; /* Only IPv4 for now */
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE; /* Listen on all network addresses */
if ((status = getaddrinfo(NULL, port, &hints, &ai)) != 0) {
fprintf(stderr, "getaddrinfo error: %s\n", gai_strerror(status));
exit(EXIT_FAILURE);
}
/*
* Normally only a single protocol exists to support a particular
* socket [type, protocol family] combination, so we can skip specifying it
*/
fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
if (fd < 0) {
err(EXIT_FAILURE, "Socket creation error");
}
status = bind(fd, ai->ai_addr, ai->ai_addrlen);
if (status != 0) {
err(EXIT_FAILURE, "Socket bind error");
}
status = listen(fd, RECV_BACKLOG);
if (status != 0) {
err(EXIT_FAILURE, "Socket listen error");
}
server->fd = fd;
server->addr = ai;
setupSighandlers();
printf("Listening on 0.0.0.0:%s ...\n", port);
}
int on_message_complete_cb(http_parser *p)
{
struct epoll_event ev;
Client *c = p->data;
c->pstate = SUCCESS;
ev.events = EPOLLOUT;
ev.data.ptr = c;
cheat += 1;
if (epoll_ctl(epfd, EPOLL_CTL_MOD, c->fd, &ev) == -1) {
err(EXIT_FAILURE, "epoll_ctl error");
}
printf("on msg complete called for client %d(%p)\n", c->fd, c);
return 0;
}
void readRequest(Client *c)
{
int nparsed, status;
struct epoll_event ev;
// TODO: remove me
int i;
requests += 1;
status = recv(c->fd, c->buf, RECV_BUFFER, 0);
if (status < 0 && errno != EAGAIN && errno != EWOULDBLOCK ) {
// TODO: Leave this on until we work on the possible errors
// from recv. In the future we should handle them.
err(EXIT_FAILURE, "Message recv error (client: %d)\n", c->fd);
} else {
if (status == 0) {
printf("Client %d(%p) closed the connection.\n", c->fd, c);
printf("clients after readRequest: ");
for (i=0;i < MAX_CLIENTS; i++) {
if (clients[i]) {
printf("%d(%p) ", clients[i]->fd, clients[i]);
}
}
printf("\n");
c->cstate = DISCONNECTED;
}
nparsed = http_parser_execute(c->parser, c->parser_settings, c->buf, status);
if (nparsed != status) {
c->pstate = ERROR;
ev.events = EPOLLOUT;
ev.data.ptr = c;
if (epoll_ctl(epfd, EPOLL_CTL_MOD, c->fd, &ev) == -1) {
err(EXIT_FAILURE, "epoll_ctl error");
}
printf("Parse error (client %d): %s\n",
c->fd, http_errno_description(HTTP_PARSER_ERRNO(c->parser)));
}
}
}
void respond(Client *c)
{
int status;
int i;
char *resp = "HTTP/1.1 200 OK\r\nContent-Length: 4\r\n\r\nCool\r\n\r\n";
char *resp400 = "HTTP/1.1 400 Bad Request\r\n\r\n";
printf("respond's client: %d(%p)\n", c->fd, c);
responds += 1;
printf("clients after respond: ");
for (i=0;i < MAX_CLIENTS; i++) {
if (clients[i]) {
printf("%d(%p) ", clients[i]->fd, clients[i]);
}
}
printf("\n");
if (c->replied) {
printf("ERROR: already replied");
exit(1);
}
if (c->pstate == ERROR) {
printf("ERROR: %d\n", c->fd);
status = send(c->fd, resp400, strlen(resp400), 0);
if (status == -1) {
err(EXIT_FAILURE, "send error (client: %d)", c->fd);
}
c->replied = 1;
status = epoll_ctl(epfd, EPOLL_CTL_DEL, c->fd, NULL);
if (status == -1) {
printf("%d\n", c->fd);
err(EXIT_FAILURE, "epoll_ctl error");
}
closeClient(c);
} else if (c->pstate == SUCCESS) {
status = send(c->fd, resp, strlen(resp), 0);
if (status == -1) {
printf("ccc\n");
err(EXIT_FAILURE, "send error! (client: %d)", c->fd);
}
c->replied = 1;
status = epoll_ctl(epfd, EPOLL_CTL_DEL, c->fd, NULL);
if (status == -1) {
printf("%d\n", c->fd);
err(EXIT_FAILURE, "epoll_ctl error");
}
closeClient(c);
} else {
printf("didn't respond at all to this client fd: %p\n", c);
exit(1);
}
}
Client *makeClient(int fd)
{
http_parser_settings *settings = malloc(sizeof(http_parser_settings));
http_parser *parser = malloc(sizeof(http_parser));
http_parser_settings_init(settings);
http_parser_init(parser, HTTP_REQUEST);
settings->on_message_complete = on_message_complete_cb;
Client *c = malloc(sizeof(Client));
if (!c) {
fprintf(stderr, "Couldn't allocate memory for connection %d\n", fd);
exit(EXIT_FAILURE);
}
c->fd = fd;
c->cstate = CONNECTED;
c->pstate = IN_PROGRESS;
c->replied = 0;
c->ev_delivered = 0;
memset(c->buf, 0, RECV_BUFFER);
c->parser_settings = settings;
c->parser = parser;
c->parser->data = c;
return c;
}
void closeClient(Client *c)
{
int i, found;
if (close(c->fd) < 0) {
err(EXIT_FAILURE, "close(2) error");
}
found = 0;
for (i = 0; i < MAX_CLIENTS; i++) {
if (clients[i] && clients[i] == c) {
clients[i] = 0;
found = 1;
break;
}
}
if (found != 1) {
err(EXIT_FAILURE, "Couldn't find client fd to close");
}
printf("clients after close %d(%p):", c->fd, c);
for (i=0;i < MAX_CLIENTS; i++) {
if (clients[i]) {
printf("%d(%p)", clients[i]->fd, clients[i]);
}
}
printf("\n");
free(c);
}
void setupSighandlers(void)
{
struct sigaction act;
act.sa_handler = shutdownServer;
int status = sigaction(SIGINT, &act, NULL);
if (status != 0) {
err(EXIT_FAILURE, "Error setting up signal handler\n");
}
}
void shutdownServer(int sig)
{
printf("\nShutting down...\n");
int maxclients = 0;
int i;
int status = close(server->fd);
if (status != 0) {
err(EXIT_FAILURE, "Socket cleanup error");
}
freeaddrinfo(server->addr);
for (i=0; i<MAX_CLIENTS;i++) {
if (clients[i]) {
maxclients += 1;
}
}
printf("on_message_complete: %d | responds: %d | request reads: %d | connected clients: %d\n", cheat, responds, requests,maxclients);
printf("Goodbye!\n");
exit(EXIT_SUCCESS);
}
Steps for reproducing
First, download and run the server:
$ git clone https://github.com/agis-/rea.git $ cd rea && make && ./rea 8005
Then from another terminal try to issue multiple requests with no concurrency:
$ ab -n1000 -c1 http://0.0.0.0:8005/foo
Everything works fine until now, the server reports that there are no active client file descriptors:
clients after respond: 5(0x9e101e0) clients after close 5(0x9e101e0): ^^^^^ no clients
This also can be verified by running an
lsof
and see that there are no TCP sockets active other than the server's listening socket.Then try to issue multiple requests from ab:
$ ab -n1000 -c10 http://0.0.0.0:8005/foo
You'll see something like this endlessly printed:
clients after accept: 5(0x926aa40) 11(0x9271d50) # ^^^^^^^^ clients are still around nfds: 2 | on_message_complete: 6000 | responds: 6000 | request reads: 6002 client EPOLLIN 5(0x926aa40) 6000 2 EVENTS! # means we accepted 2 events for the same fd
(If you don't get a similar output and everything is fine, then try running the
ab
again until you get something similar). This means that the client FD 5 has received anEPOLLIN
more than 1 time. And it also means that even though we responded to 6000 requests in total (we did 6000send(2)
s), the totalread(2)
s where 6002. It also means there are still 2 client FDs kept active in the process.
Any ideas on why this happens? Thanks in advance!