1

My problem is the following: I'm programming an Interface in Linux to control a GPIB Controller via Ethernet. To do so I open a TCP socket and just send the commands to the Controller. This is working fine so far. The problem I have occured at writing some kind of unit test for my Interface: To check I am using a tcp acceptor from boost lib in a seperate thread and just connect to it instead of the actual controller. This is working too, but only as long as the connect() call from the interface is blocking. However since I need a specified timeout for the connect() call I had to connect with the select() function:

    // Open TCP Socket
    m_Socket = socket(PF_INET,SOCK_STREAM,0);
    if( m_Socket < 0 )
    {
        m_connectionStatus = STATUS_CLOSED;
        return ERR_NET_SOCKET;
    }

    struct sockaddr_in addr;
    inet_aton(m_Host.c_str(), &addr.sin_addr);
    addr.sin_port = htons(m_Port);
    addr.sin_family = PF_INET;

    // Set timeout values for socket
    struct timeval timeouts;
    timeouts.tv_sec = SOCKET_TIMEOUT_SEC ;   // const -> 5
    timeouts.tv_usec = SOCKET_TIMEOUT_USEC ; // const -> 0
    uint8_t optlen = sizeof(timeouts);

    if( setsockopt( m_Socket, SOL_SOCKET, SO_RCVTIMEO,&timeouts,(socklen_t)optlen) < 0 )
    {
        m_connectionStatus = STATUS_CLOSED;
        return ERR_NET_SOCKET;
    }

    // Set the Socket to TCP Nodelay ( Send immediatly after a send / write command )
    int flag_TCP_nodelay = 1;
    if ( (setsockopt( m_Socket, IPPROTO_TCP, TCP_NODELAY,
            (char *)&flag_TCP_nodelay, sizeof(flag_TCP_nodelay))) < 0)
    {
        m_connectionStatus = STATUS_CLOSED;
        return ERR_NET_SOCKET;
    }
    // Save Socket Flags
    int opts_blocking = fcntl(m_Socket, F_GETFL);
    if ( opts_blocking < 0 )
    {
        return ERR_NET_SOCKET;
    }
    int opts_noblocking = (opts_blocking | O_NONBLOCK);
    // Set Socket to Non-Blocking
    if (fcntl(m_Socket, F_SETFL, opts_noblocking)<0)
    {
        return ERR_NET_SOCKET;
    }
    // Connect
    if ( connect(m_Socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
    {
        // EINPROGRESS always appears on Non Blocking connect
        if ( errno != EINPROGRESS )
        {
            m_connectionStatus = STATUS_CLOSED;
            return ERR_NET_SOCKET;
        }
        // Create a set of sockets for select
        fd_set socks;
        FD_ZERO(&socks);
        FD_SET(m_Socket,&socks);
        // Wait for connection or timeout
        int fdcnt = select(m_Socket+1,NULL,&socks,NULL,&timeouts);
        if ( fdcnt < 0 )
        {
            return ERR_NET_SOCKET;
        }
        else if ( fdcnt == 0 )
        {
            return ERR_TIMEOUT;
        }
    }
    //Set Socket to Blocking again
    if(fcntl(m_Socket,F_SETFL,opts_blocking)<0)
    {
        return ERR_NET_SOCKET;
    }

    m_connectionStatus = STATUS_OPEN;
    return x2e::OK;

If I use this function I can still connect on the real controller and communicate with it. But if I use my testserver I just can't connect, select just leaves with a return value of 0. So now someone may say that my testserver just doesn't work....but If I use a blocking connect() call I can send to my testserver without any problems... Maybe someone has an idea what I could do...?

Toby
  • 3,815
  • 14
  • 51
  • 67
  • A return value of zero indicates a timeout. This may or may not be the issue, but you don't seem to be setting your timeouts struct within the posted excerpt. To quote the manual page "On Linux, select() modifies timeout to reflect the amount of time not slept...Consider timeout to be undefined after select() returns." which would cause problems if you call it again without resetting the value. – Chris Stratton Aug 17 '11 at 07:22
  • Sorry my fault , I forgot to add the definition of the timeouts struct to my code!!! Gonna fix that...but that also means that's not the problem :/ // So now the code contains the whole function which handles the connect – Toby Aug 17 '11 at 07:23
  • Are you aware that you are not closing the socket anywhere? You are not reusing it, calling connect several times with the same socket, do you? – rodrigo Aug 17 '11 at 08:03
  • The code I just posted is part of a global "connect function" it actually shall not implement the closing of the socket. The socket will be closed later with another function call, but before I do that I want to do some communication. – Toby Aug 17 '11 at 08:09
  • But note that if any socket function fails, you cannot reliably use it again, you must close it and create a new one. – rodrigo Aug 17 '11 at 10:22
  • True but still that doesnt affect me in this particular case since I don't get any errors on the other function calls. – Toby Aug 17 '11 at 10:28
  • What is the value of the timeout you are using? If you make it "unreasonably large" does the code end up working? Is there anything that could be causing your other thread to be slow about accepting the connection? Can you tcpdump the attempt? Or test running the acceptor in a different process instead of just a different thread? Or test against netcat? – Chris Stratton Aug 18 '11 at 08:14
  • I have used non-blocking connect()+select() on Linux in the past with no problem. Are you _absolutely_ certain you are initializing the `timeouts` struct correctly? (And the other arguments to `select`...) I would try printing the actual value of `.tv_sec` immediately before the call to `select`. `select` returning zero means timeout, period. – Nemo Aug 18 '11 at 13:52
  • Have you tried also adding the file descriptor to the read set? I know linux man pages say to check for "writability", but at least Stevens does both read and write set in his non-blocking connect example. – Nikolai Fetissov Aug 19 '11 at 02:49

1 Answers1

2

with nonblocking socket connect() call may return 0 with the connection is still not ready the connect() code section, may be written like this(my connect wraper code segment learnt from the python implementation):

    if (FAIL_CHECK(connect(sock, (struct sockaddr *) &channel, sizeof(channel)) &&
            errno != EINPROGRESS))
    {
        gko_log(WARNING, "connect error");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }

    /** Wait for write bit to be set **/
#if HAVE_POLL
    {
        struct pollfd pollfd;

        pollfd.fd = sock;
        pollfd.events = POLLOUT;

        /* send_sec is in seconds, timeout in ms */
        select_ret = poll(&pollfd, 1, (int)(send_sec * 1000 + 1));
    }
#else
    {
        FD_ZERO(&wset);
        FD_SET(sock, &wset);
        select_ret = select(sock + 1, 0, &wset, 0, &send_timeout);
    }
#endif /* HAVE_POLL */
    if (select_ret < 0)
    {
        gko_log(FATAL, "select/poll error on connect");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }
    if (!select_ret)
    {
        gko_log(FATAL, "connect timeout on connect");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }

python version code segment:

res = connect(s->sock_fd, addr, addrlen);
if (s->sock_timeout > 0.0) {
    if (res < 0 && errno == EINPROGRESS && IS_SELECTABLE(s)) {
        timeout = internal_select(s, 1);
        if (timeout == 0) {
            /* Bug #1019808: in case of an EINPROGRESS,
               use getsockopt(SO_ERROR) to get the real
               error. */
            socklen_t res_size = sizeof res;
            (void)getsockopt(s->sock_fd, SOL_SOCKET,
                             SO_ERROR, &res, &res_size);
            if (res == EISCONN)
                res = 0;
            errno = res;
        }
        else if (timeout == -1) {
            res = errno;            /* had error */
        }
        else
            res = EWOULDBLOCK;                      /* timed out */
    }
}

if (res < 0)
    res = errno;
auxten
  • 832
  • 7
  • 11