I have a stress test for server setup and tear-down. The main thread is the server thread, and the other thread is a client thread.
In the client thread, I call socket(), connect(), send(), recv(), shutdown(), then close(), all in a tight loop. I am intentionally creating lots of little connections. This loop terminates when any of my socket calls error out.
In the main thread, I call listen(), then launch the client thread. I accept() a small number of connections, and on each one of those connections, I call recv() once, and send() once before calling shutdown() and close() on the accepted socket. I then shutdown() and close() the listening socket, and join with the client thread.
Roughly once every 1500 iterations, my client thread will get stuck on recv() andt he main thread is stuck() in pthread_join(client_thread). "netstat -n -p tcp" shows two TCP/IP entries in ESTABLISHED, one for each side of the connection. My debug prints indicate that a successful client connection happened at almost the exact same time as the close() call on my listening socket. No call to accept() was made on the server for that client connection. If I then CTRL-C the program, the client side goes into FIN_WAIT_2, and the server side goes into CLOSE_WAIT. The FIN_WAIT_2 eventually goes away, but CLOSE_WAIT hangs around, even after a logout / login.
#include <iostream>
#include <pthread.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <unistd.h>
#include <fcntl.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <netdb.h>
#include <errno.h>
struct addrinfo *res = 0;
int count = 0;
struct tSocketCloser {
int s;
~tSocketCloser() {
printf("clientClosing: %d\n", s);
shutdown(s, SHUT_RDWR);
close(s);
printf("clientClosed: %d\n", s);
}
};
int start_server() {
int listenSocket = -1;
listenSocket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
printf("listenSocket: %d\n", listenSocket);
int ret = 0;
int one = 1;
ret = ::setsockopt(listenSocket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
if(ret)
{
printf("SO_REUSEADDR %d\n", ret);
exit(-1);
}
/* SO_LINGER {1,0} and TCP_NODELAY were here */
// O_NONBLOCK code was here
ret = ::bind(listenSocket, res->ai_addr, static_cast<int>(res->ai_addrlen));
if(ret)
{
printf("Bind %d\n", ret);
exit(-1);
}
ret = ::listen(listenSocket, 1024);
if (ret)
{
printf("listen %d\n", ret);
exit(-1);
}
return listenSocket;
}
void stop_server(int listenSocket) {
int iters = rand() % 3;
for(int i = 0; i < iters; ++i)
{
struct sockaddr_storage clientAddress;
int size = sizeof(clientAddress);
tSocketCloser otherSock;
otherSock.s = ::accept(
listenSocket,
(struct sockaddr *) &clientAddress,
(socklen_t *) &size);
printf("accept: %d\n", otherSock.s);
int ret = 0;
int one = 1;
ret = setsockopt(otherSock.s, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(one));
if(ret)
{
printf("SO_NOSIGPIPE %d\n", ret);
break;
}
char buffer[2048] = {0};
ret = recv(otherSock.s, buffer, sizeof(buffer), 0);
if(ret == -1)
break;
ret = send(otherSock.s, buffer, sizeof(buffer), 0);
if(ret == -1)
break;
}
int sleep_time = abs(rand()%1000);
usleep(sleep_time);
printf("serverClosing: %d\n", listenSocket);
shutdown(listenSocket, SHUT_RDWR);
close(listenSocket);
printf("serverClosed: %d\n", listenSocket);
}
void *short_connect(void *)
{
while(true) {
++count;
int connectSocket = -1;
int ret = 0;
int one = 1;
connectSocket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
tSocketCloser closer = {connectSocket};
/* SO_LINGER {1,0} and TCP_NODELAY were here */
ret = setsockopt(connectSocket, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(one));
if(ret)
{
printf("client SO_NOSIGPIPE %d\n", ret);
return NULL;
}
// O_NONBLOCK code was here
ret = connect(connectSocket, res->ai_addr, static_cast<int>(res->ai_addrlen));
if(ret)
{
printf("bad connect %d\n", ret);
return NULL;
}
printf("good connect %d\n",connectSocket);
char buffer[1024] = {0};
ret = send(connectSocket, buffer, sizeof(buffer), 0);
printf("%d: send %d\n", count, ret);
if(ret == -1)
return NULL;
ret = recv(connectSocket, buffer, sizeof(buffer), 0);
printf("%d: recv %d\n", count, ret);
if(ret == -1)
return NULL;
printf("Success!\n");
}
}
int main() {
struct addrinfo hints;
int error;
char port[sizeof("65536") + 1] = "9999";
std::memset(&hints, 0, sizeof(hints));
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
// Wildcard address
error = getaddrinfo(NULL, port, &hints, &res);
if (error) {
printf("getaddrinfo %d\n", error);
exit(error);
}
for(int i = 0; i < 1000; ++i)
{
int sock = start_server();
pthread_t clientThread = 0;
pthread_create(&clientThread, NULL, short_connect, NULL);
stop_server(sock);
void* ignore;
pthread_join(clientThread, &ignore);
}
return 0;
}
Here is some slightly annotated output:
listenSocket: 4 //what a good run looks like...
good connect 5
accept: 6
42: send 1024
clientClosing: 6
42: recv 1024
Success!
clientClosing: 5
clientClosed: 5
clientClosed: 6
good connect 5
accept: 6
43: send 1024
clientClosing: 6
43: recv 1024
clientClosed: 6
Success!
clientClosing: 5
clientClosed: 5
good connect 5 //client connects
44: send 1024
serverClosing: 4 //server starting close...
serverClosed: 4 //server done closing
44: recv -1 //recv errors out, as it should. Note the lack of accept() calls
clientClosing: 5 //client teardown
clientClosed: 5
listenSocket: 4 //what a bad run looks like...
good connect 5
accept: 6
45: send 1024
clientClosing: 6
45: recv 1024
clientClosed: 6
Success!
clientClosing: 5
clientClosed: 5
good connect 5
accept: 6
46: send 1024
clientClosing: 6
clientClosed: 6
46: recv 1024
Success!
clientClosing: 5
clientClosed: 5
serverClosing: 4 //server starting close...
good connect 5 //client connect
serverClosed: 4 //server done closing
47: send 1024 //successful send from client
//stuck in recv(), so we get no further prints
So the big question is... how can I close my listening socket without getting into this state? The CLOSE_WAIT state suggests that I need to close an accepted connection, but I don't have a socket / file descriptor to close. It also seems very odd that killing the program that hosts the server and the client doesn't cause the sockets to get cleaned up (the CLOSE_WAIT sockets were still in netstat hours later).
This all happened on OS X 10.8.3 x86_64.