0

I have been working with different languages (Java, PHP, ASP, JS ...) for years, but never got around to C. Always thought about it, but there was always tomorrow. Now I finally did do it and have been playing around with it for some time, and wanted to try out working with sockets. So I made a small test program that would read a file and send it to the client. This worked fine using epoll to deal with the fd states. Then I thought I would extend this test to include some more fun stuff and decided to add fork() to the mix. Then instead of reading a file normally, I would simply 'cat' the file content and read the output from the pipes that I setup.

It was meant to be simple and then later on I could add some more or other stuff as well, but this has proven to be a lot more difficult than I thought. Changing a normal file descriptor with a process pipe was not that easy. At least not if you want it to work properly.

The file I tested with contains 60000+ bytes, but I get random bytes count around 8000 to 14000 before the connection is closed. Both pipes to the process as well as the client socket itself, receives 'EPOLLHUP' while there is still data to be read from the process output and the connection between server and client is fully established. I have debugged the S*** out of this and rewritten the whole thing multiple times to try different approaches. But when it comes to pulling data from a process and sending it to a client via a socket, I keep failing big.

This time it seams to be the epoll event flags that keeps bugging me. If I remove any management of the 'EPOLLRDHUP|EPOLLHUP|EPOLLERR' flags, I get the full 60000 bytes transferred just fine.

Server Code

typedef struct stream_fd_info {
    int fd;
    uint32_t events;

} stream_fd_info_t;

typedef struct stream_info {
    char buffer[1024];
    ssize_t rc;
    ssize_t wc;
    ssize_t ofs;
    stream_fd_info_t output;
    stream_fd_info_t input;

} stream_info_t;

#define EPOLLDIED (EPOLLRDHUP|EPOLLHUP|EPOLLERR)

int main(int argc, char **argv) {

    struct epoll_event event, events[6];
    int epoll = epoll_create1(0);

    int sockfd = socket(AF_UNIX, SOCK_STREAM, 0), clifd;
    struct sockaddr_un sockaddr, cliaddr;
    socklen_t clilen;

    sockaddr.sun_path[0] = '\0';
    char* sockuri = "/tmp/ctest.sock";
    sockaddr.sun_family = AF_UNIX;

    memcpy(&sockaddr.sun_path[1], sockuri, strlen(sockuri));
    socklen_t socklen = sizeof(sockaddr.sun_family) + strlen(sockuri) + 1;

    if (bind(sockfd, (struct sockaddr *) &sockaddr, socklen) < 0 || listen(sockfd, 1) < 0) {
        perror("Socket"); exit(1);
    }

    clifd = accept(sockfd, (struct sockaddr *) &cliaddr, &clilen);

    if (clifd < 0) {
        perror("Client"); exit(1);
    }

    int pipeout[2];
    int pipein[2];

    // Create the pipe FD's
    pipe(pipeout);
    pipe(pipein);

    pid_t pid = fork();

    if (pid < 0) {
        perror("Fork"); exit(1);

    } else if(!pid) {
        fprintf(stderr, "Starting client process shell\n");

        dup2(pipeout[0], 0);
        dup2(pipein[1], 1);
        dup2(1, 2);

        close(pipein[0]);
        close(pipein[1]);
        close(pipeout[0]);
        close(pipeout[1]);

        char *args[] = {"cat", {"/opt/php/cli/php.ini"}};

        execvp(args[0], args);

    } else {
        sleep(1);

        close(pipein[1]);
        close(pipeout[0]);

        stream_info_t streams[2];

        streams[0].input.fd = clifd;
        streams[0].input.events = EPOLLDIED|EPOLLIN;
        streams[0].output = streams[0].input;

        streams[1].input.fd = pipein[0];
        streams[1].output.fd = pipeout[1];
        streams[1].input.events = EPOLLDIED|EPOLLIN;
        streams[1].output.events = EPOLLDIED;

        fprintf(stderr, "Monitoring uni-directional client descriptor %d\n", streams[0].output.fd);
        fcntl(streams[0].input.fd, F_SETFL, fcntl(streams[0].input.fd, F_GETFL, 0) | O_NONBLOCK);
        event.events = streams[0].input.events;
        event.data.fd = streams[0].input.fd;
        epoll_ctl(epoll, EPOLL_CTL_ADD, streams[0].input.fd, &event);

        fprintf(stderr, "Monitoring process output descriptor %d\n", streams[1].output.fd);
        fcntl(streams[1].output.fd, F_SETFL, fcntl(streams[1].output.fd, F_GETFL, 0) | O_NONBLOCK);
        event.events = streams[1].output.events;
        event.data.fd = streams[1].output.fd;
        epoll_ctl(epoll, EPOLL_CTL_ADD, streams[1].output.fd, &event);

        fprintf(stderr, "Monitoring process input descriptor %d\n", streams[1].input.fd);
        fcntl(streams[1].input.fd, F_SETFL, fcntl(streams[1].input.fd, F_GETFL, 0) | O_NONBLOCK);
        event.events = streams[1].input.events;
        event.data.fd = streams[1].input.fd;
        epoll_ctl(epoll, EPOLL_CTL_ADD, streams[1].input.fd, &event);

        int ready, i, x;
        int totalRead = 0, totalWrite = 0;

        fprintf(stderr, "Setting up client connection\n");

        int running = 1;
        while (running) {
            ready = epoll_wait(epoll, events, 6, 30000);

            // fprintf(stderr, "There are %d descriptor(s) ready\n", ready);

            for(i = 0; i < ready; i++) {
                // fprintf(stderr, "Checking flags %d on descriptor %d\n", events[i].events, events[i].data.fd);

                if ((events[i].events & EPOLLOUT) != 0) {
                    x = events[i].data.fd == streams[0].input.fd ? 0 : 1;

                    if (streams[x].wc > 0) {
                        streams[x].wc = write(events[i].data.fd, &streams[x].buffer[ streams[x].ofs ], (size_t) streams[x].rc);

                        if (streams[x].wc < 0) {
                            perror("Write"); exit(1);

                        } else if (streams[x].wc > 0) {
                            fprintf(stderr, "Wrote %d byte(s) of %d to descriptor %d\n", streams[x].wc, streams[x].rc, events[i].data.fd);

                            streams[x].rc -= streams[x].wc;
                            streams[x].ofs += streams[x].wc;
                            totalWrite += streams[x].wc;
                        }

                        if (streams[x].rc <= 0) {
                            streams[x].output.events &= ~EPOLLOUT;
                            event.events = streams[x].output.events;
                            event.data.fd = streams[x].output.fd;

                            fprintf(stderr, "Removed EPOLLOUT from descriptor %d\n", event.data.fd);
                            epoll_ctl(epoll, EPOLL_CTL_MOD, event.data.fd, &event);
                        }
                    }
                }

                if ((events[i].events & EPOLLIN) != 0) {
                    x = events[i].data.fd == streams[0].input.fd ? 1 : 0;

                    if (streams[x].rc <= 0) {
                        streams[x].rc = read(events[i].data.fd, streams[x].buffer, sizeof streams[x].buffer);
                        streams[x].ofs = 0;

                        if (streams[x].rc < 0) {
                            perror("Read"); exit(1);

                        } else if (streams[x].rc > 0) {
                            fprintf(stderr, "Read %d byte(s) from descriptor %d into %d bytes cache\n", streams[x].rc, events[i].data.fd, sizeof streams[x].buffer);

                            totalRead += streams[x].rc;

                            streams[x].output.events |= EPOLLOUT;
                            event.events = streams[x].output.events;
                            event.data.fd = streams[x].output.fd;

                            fprintf(stderr, "Added EPOLLOUT to descriptor %d\n", event.data.fd);
                            epoll_ctl(epoll, EPOLL_CTL_MOD, event.data.fd, &event);
                        }
                    }
                }

                if ((events[i].events & EPOLLDIED) != 0) {

                }
            }
        }

        int pidres;

        if (waitpid(pid, &pidres, WNOHANG) == 0) {
            // TODO: Cleaner shutdown
            fprintf(stderr, "Terminating child process %d\n", pid);
            kill(pid, SIGKILL);
        }

        fprintf(stderr, "Connection was close with total read count of %d bytes and write count of %d bytes\n", totalRead, totalWrite);

        close(streams[1].input.fd);
        close(streams[1].output.fd);
    }

    close(clifd);
    close(sockfd);
    close(epoll);
}

Client Code

#!/usr/bin/env php
<?php

echo "Establishing connection to the server...\n";
$socket = socket_create(AF_UNIX, SOCK_STREAM, 0);

if (!$socket) {
    die("Could not connect to the server socket\n");

} else {
    socket_connect($socket, "\0/tmp/ctest.sock");
}

$totalRead = 0;

for (;;) {
    echo "Reading from server...\n";
    $buffer = socket_read($socket, 1024);

    if ($socket !== FALSE && strcmp($buffer, '') != 0) {
        $totalRead += strlen($buffer);
        //echo $buffer;
        echo "Total read count $totalRead bytes\n";

        sleep(1);

    } else {
        break;
    }
}

echo "Connection was closed with a total read of $totalRead bytes\n";

socket_close($socket);

C is like a foreign language to me and I am still learning my ABC. So if some C guru out there was able to spot my newbie mistakes and through a hint, that would be awesome. This thing below may look simple to you, but I have spent weeks on this problem and I am getting nowhere :o

Thanks

EDIT: Adding 'EPOLLIN|EPOLLOUT' checks before closing anything fixes some of the issues.

                if ((events[i].events & EPOLLDIED) != 0
                        && (events[i].events & (EPOLLIN|EPOLLOUT)) == 0) {

This makes the process fds act as they should. The input stream closes at the beginning, which is fine since 'cat' does not use it when outputting data and the output stream is closed when the last byte has been read.

But now it does not detect socket connection loss, since 'EPOLLIN' is always on, no mater what. This I can find a work-around to, but is this really how these flags should function? False positives? If the socket is closed, it is not ready to be read from.

EDIT 2: So I re-structured a lot of this to make it more viewable and fixed some errors here and there that I stumbled across during the cleanup. Added a lot of checks here and there, and now it finally seams to work in all the scenarios that I tested. Also added handling of zero read/write, just to be safe, as suggested.

typedef struct stream_fd_info {
    uint32_t events;
    int fd;
    int epoll;

} stream_fd_info_t;

typedef struct stream_info {
    char buffer[4096];
    ssize_t rc;
    ssize_t wc;
    ssize_t ofs;
    stream_fd_info_t output;
    stream_fd_info_t input;

} stream_info_t;

#define EPOLLDIED (EPOLLRDHUP|EPOLLHUP|EPOLLERR)

int process_validator(void* args) {
    pid_t*  pid = (pid_t*) args;

    return waitpid(*pid, NULL, WNOHANG) == 0 ? 1 : 0;
}

void* stream_handler(stream_info_t* streams, int (*validator)(void*), void* args) {  // stream_info_t[2]

    stream_fd_info_t        fdinfo;
    struct epoll_event      event, events[4];
    int                     epoll = epoll_create1(0);
    int                     i, x, y;
    int                     ready, running = 1;

    /*
     * Start epoll monitoring on all FDs
     */
    for (i=0; i < 2; i++) {
        for (x=0; x < 2; x++) {
            fdinfo = x == 0 ? streams[i].input : streams[i].output;
            fprintf(stderr, "Monitoring %s descriptor %d\n", x == 0 ? "input" : "output", fdinfo.fd);
            event.events = fdinfo.events;
            event.data.fd = fdinfo.fd;
            epoll_ctl(epoll, EPOLL_CTL_ADD, fdinfo.fd, &event);
            fcntl(fdinfo.fd, F_SETFL, fcntl(fdinfo.fd, F_GETFL, 0) | O_NONBLOCK);

            fdinfo.epoll = 1;
        }
    }

    while (running) {
        ready = epoll_wait(epoll, events, 4, 20000);

        /*
         * If both sets of FD's can only read or only write, then there is no
         * way for them to communicate. As such we can consider this connection closed/broken.
         */
        if (ready == 0
                && (( (!streams[0].input.epoll || !streams[1].output.epoll) && (!streams[0].output.epoll || !streams[1].input.epoll) )
                        || !validator(args))) {

            fprintf(stderr, "The connection has timed-out\n");
            running = 0;

        } else if (ready < 0) {
            fprintf(stderr, "Epoll crashed on error code %d\n", errno);
            running = 0;
        }

        for(i = 0; i < ready; i++) {
            if ((events[i].events & EPOLLOUT) != 0) {
                x = events[i].data.fd == streams[0].output.fd ? 0 : 1;

                if (streams[x].rc > 0) {
                    streams[x].wc = write(events[i].data.fd, &streams[x].buffer[ streams[x].ofs ], (size_t) streams[x].rc);

                    if (streams[x].wc < 0
                            && errno != EWOULDBLOCK && errno != EAGAIN) {

                        running = 0;

                    } else if (streams[x].wc > 0) {
                        fprintf(stderr, "Wrote %d byte(s) of %d to descriptor %d\n", streams[x].wc, streams[x].rc, events[i].data.fd);

                        streams[x].rc -= streams[x].wc;
                        streams[x].ofs += streams[x].wc;

                    } else if (streams[x].wc == 0) {
                        // Lets deal with this at the end
                        if ((events[i].events & EPOLLDIED) == 0) {
                            events[i].events |= EPOLLDIED;
                        }

                        events[i].events &= ~EPOLLOUT;
                    }

                    if (streams[x].rc <= 0) {
                        streams[x].output.events &= ~EPOLLOUT;
                        event.events = streams[x].output.events;
                        event.data.fd = streams[x].output.fd;

                        fprintf(stderr, "Removed EPOLLOUT from descriptor %d\n", event.data.fd);
                        epoll_ctl(epoll, EPOLL_CTL_MOD, event.data.fd, &event);
                    }
                }
            }

            if ((events[i].events & EPOLLIN) != 0) {
                x = events[i].data.fd == streams[0].input.fd ? 1 : 0;

                if (streams[x].rc <= 0) {
                    streams[x].rc = read(events[i].data.fd, streams[x].buffer, sizeof streams[x].buffer);
                    streams[x].ofs = 0;

                    if (streams[x].rc < 0
                            && errno != EWOULDBLOCK && errno != EAGAIN) {

                        running = 0;

                    } else if (streams[x].rc > 0) {
                        fprintf(stderr, "Read %d byte(s) from descriptor %d into %d bytes cache\n", streams[x].rc, events[i].data.fd, sizeof streams[x].buffer);

                        streams[x].output.events |= EPOLLOUT;
                        event.events = streams[x].output.events;
                        event.data.fd = streams[x].output.fd;

                        fprintf(stderr, "Added EPOLLOUT to descriptor %d\n", event.data.fd);
                        epoll_ctl(epoll, EPOLL_CTL_MOD, event.data.fd, &event);

                    } else if (streams[x].rc == 0) {
                        // Lets deal with this at the end
                        if ((events[i].events & EPOLLDIED) == 0) {
                            events[i].events |= EPOLLDIED;
                        }

                        events[i].events &= ~EPOLLIN;
                    }
                }
            }

            if ((events[i].events & EPOLLDIED) != 0
                    && (events[i].events & (EPOLLIN|EPOLLOUT)) == 0) {

                fprintf(stderr, "Descriptor %d has closed. Flags %d\n", events[i].data.fd, events[i].events);

                x = events[i].data.fd == streams[0].input.fd
                        || events[i].data.fd == streams[0].output.fd ? 1 : 0;

                y = x == 0 ? 1 : 0;

                if ((events[i].events & (EPOLLHUP|EPOLLERR)) != 0
                        && streams[x].rc <= 0) {

                    running = 0;

                } else {
                    fprintf(stderr, "Removing descriptor %d from the poll\n", events[i].data.fd);
                    epoll_ctl(epoll, EPOLL_CTL_DEL, events[i].data.fd, NULL);

                    fdinfo = streams[y].input.fd == events[i].data.fd
                                ? streams[y].input : streams[y].output;

                    fdinfo.epoll = 0;
                }
            }
        }
    }

    close(epoll);
}

int main(int argc, char **argv) {

    int                     sockfd = socket(AF_UNIX, SOCK_STREAM, 0), clifd;
    struct sockaddr_un      sockaddr, cliaddr;
    socklen_t               socklen, clilen;
    char*                   sockuri = "/tmp/ctest.sock";
    int                     pipeout[2], pipein[2];
    stream_info_t           streams[2];
    pid_t                   pid;
    int                     pidres;

    sockaddr.sun_path[0] = '\0';
    sockaddr.sun_family = AF_UNIX;
    memcpy(&sockaddr.sun_path[1], sockuri, strlen(sockuri));
    socklen = sizeof(sockaddr.sun_family) + strlen(sockuri) + 1;

    if (bind(sockfd, (struct sockaddr *) &sockaddr, socklen) < 0 || listen(sockfd, 1) < 0) {
        perror("Socket"); exit(1);
    }

    clifd = accept(sockfd, (struct sockaddr *) &cliaddr, &clilen);

    if (clifd < 0) {
        perror("Client"); exit(1);
    }

    fprintf(stderr, "Setting up client environment\n");

    pipe(pipeout);
    pipe(pipein);

    pid = fork();

    if (pid < 0) {
        perror("Fork"); exit(1);

    } else if(!pid) {
        fprintf(stderr, "Starting client sub-process\n");

        dup2(pipeout[0], 0);
        dup2(pipein[1], 1);
        dup2(1, 2);

        close(pipein[0]);
        close(pipein[1]);
        close(pipeout[0]);
        close(pipeout[1]);

        char *args[] = {"cat", {"/opt/php/cli/php.ini"}};

        execvp(args[0], args);

    } else {
        close(pipein[1]);
        close(pipeout[0]);

        streams[0].input.fd = clifd;
        streams[0].input.events = EPOLLDIED|EPOLLIN;
        streams[0].input.epoll = 0;
        streams[0].output = streams[0].input;
        streams[0].rc = 0;
        streams[0].wc = 0;
        streams[0].ofs = 0;

        streams[1].input.fd = pipein[0];
        streams[1].output.fd = pipeout[1];
        streams[1].input.events = EPOLLDIED|EPOLLIN;
        streams[1].output.events = EPOLLDIED;
        streams[1].input.epoll = 0;
        streams[1].output.epoll = 0;
        streams[1].rc = 0;
        streams[1].wc = 0;
        streams[1].ofs = 0;

        /*
         * Now let the stream handler deal with I/O
         */
        stream_handler(streams, process_validator, &pid);

        if (waitpid(pid, &pidres, WNOHANG) == 0) {
            fprintf(stderr, "Terminating child process %d\n", pid);
            kill(pid, SIGKILL);
        }

        close(streams[1].input.fd);
        close(streams[1].output.fd);
    }

    close(clifd);
    close(sockfd);

    fprintf(stderr, "Connection has been closed\n");
}

Not sure if it's all correct or if I am missing something, but it is working as of now.

domsson
  • 4,553
  • 2
  • 22
  • 40
Crushnik
  • 1
  • 1
  • Q: It sounds like there might be a concurrency problem between the two processes (the fork/exec, close/dupe file handles)? Q: What is the OS? – FoggyDay Dec 13 '19 at 22:06
  • Mint 19.2 (Ubuntu 18.04) running kernel 5.3 – Crushnik Dec 13 '19 at 22:14
  • makes me think of https://stackoverflow.com/questions/27175281/epollrdhup-not-reliable – LoneWanderer Dec 13 '19 at 22:25
  • You are handling the imaginary case where `write()` returns zero,, but not the real case where `read()` returns zero. – user207421 Dec 13 '19 at 22:30
  • I don't handle zero in either read() nor write(). I check 'rc' count (Read Count) to see if anything is left for writing, if not I remove "EPOLLOUT" until next time a read() happen and there is actually something to write. Besides, my problem is not detecting connection loss or errors. It's that I get connection closed flags when in fact the fds are very much alive and has data to read. – Crushnik Dec 13 '19 at 22:36
  • You need to handle zero returned from read. No two ways about it. – user207421 Dec 13 '19 at 22:57
  • If for some reason you don't want to handle `read` correctly: when you get `EPOLLIN`, you read what's already in the kernel buffer, *then* if there's also a HUP you break the connection since you know nothing more will be added to the kernel buffer. – o11c Dec 13 '19 at 23:13
  • I tried adding log, and I never receive any 0 count read. And 'EPOLLHUP' is added early. I can do a lot of writes and reads until I reach the end. So it's not just one final read. – Crushnik Dec 14 '19 at 00:52
  • `if (streams[x].rc < 0) { perror("Read"); exit(1); }` Two words: `EAGAIN` , `EWOULDBLOCK` – wildplasser May 15 '20 at 12:50

0 Answers0