1

I implemented semaphore using futex. The following program often fails at the assertion in sem_post(). While the return value is supposed to be 1, it sometimes returns 0. How can this happen?

When I use POSIX semaphore the program always finishes successfully.

I'm using Linux 2.6.32-642.6.1.el6.x86_64

#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <ctime>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>

using namespace std;

#if 0
 #include <semaphore.h>
#else
typedef volatile int sem_t;

void sem_init(sem_t* sem, int shared, int value)
{
    *sem = value;
}

void sem_post(sem_t* sem)
{
    while (1)
    {
        int value = *sem;
        if (__sync_bool_compare_and_swap(sem, value, value >= 0 ? value+1 : 1))
        {
            if (value < 0)      // had contender
            {
                int r = syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0);
                if (r != 1)
                    fprintf(stderr, "post r=%d err=%d sem=%d %d\n", r,errno,value,*sem);
                assert(r == 1);
            }
            return;
        }
    }
}

int sem_wait(sem_t* sem)
{
    while (1)
    {
        int value = *sem;
        if (value > 0   // positive means no contender
            && __sync_bool_compare_and_swap(sem, value, value-1))
            return 0;
        if (value <= 0
            && __sync_bool_compare_and_swap(sem, value, -1))
        {
            int r= syscall(SYS_futex, sem, FUTEX_WAIT, -1, NULL, 0, 0);
            if (!r) {
                assert(__sync_fetch_and_sub(sem, 1) > 0);
                return 0;
            }
            printf("wait r=%d errno=%d sem=%d %d\n", r,errno, value,*sem);
        }
    }
}

void sem_getvalue(sem_t* sem, int* value)
{
    *value = *sem;
}

#endif

// return current time in ns
unsigned long GetTime()
{
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return ts.tv_sec*1000000000ul + ts.tv_nsec;
}

void Send(sem_t* sem, unsigned count)
{
    while (count--)
        sem_post(sem);
}

void Receive(sem_t* sem, unsigned count)
{
    while (count--)
        sem_wait(sem);
}


int main()
{
    sem_t* sem = reinterpret_cast<sem_t*>(mmap(NULL, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0));
    assert(sem != MAP_FAILED);
    sem_init(sem, 1, 0);
    unsigned count = 10485760;

    int pid = fork();
    assert(pid != -1);
    if (!pid)   // child
    {
        Send(sem, count);
        _exit(EXIT_SUCCESS);
    }
    else    // parent
    {
        unsigned long t0 = GetTime();
        Receive(sem, count);
        printf("t=%g ms\n", (GetTime()-t0)*1e-6);
        wait(NULL);
        int v;
        sem_getvalue(sem, &v);
        assert(v == 0);
    }
}
zhao
  • 232
  • 3
  • 15

2 Answers2

1

The call to syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0) will return 0 when there is no thread waiting on sem. In your code this is possible because you call that futex line in sem_post when *sem is negative which can be the case without that any thread is actually sleeping:

If *sem is zero when calling sem_wait you continue to execute __sync_bool_compare_and_swap(sem, value, -1) which sets *sem to -1. At that point this thread is not yet sleeping however. So, when another thread calls sem_post at that point (before the thread that is calling sem_wait enters the futex syscall) your assert failure will happen.

Carlo Wood
  • 5,648
  • 2
  • 35
  • 47
0

it seems that __sync_bool_compare_and_swap(sem, value, -1) and __sync_fetch_and_sub(sem, 1) are problematic. We need to keep in mind that sem_wait may be called concurrently by multiple threads (although in your test case there is only one thread calling it).

If we can afford the overhead of busy polling, we can remove the futex and result in the following code. It is also faster than the futex version (t=347 ms, while the futex version is t=914 ms).

void sem_post(sem_t* sem)
{
    int value = __sync_fetch_and_add(sem, 1);
}

int sem_wait(sem_t* sem)
{
    while (1)
    {
        int value = *sem;
        if (value > 0) // positive means no contention
        {
            if (__sync_bool_compare_and_swap(sem, value, value-1)) {
                return 0; // success
            }
        }
        // yield the processor to avoid deadlock
        sched_yield();
    }
}

The code works as follows: The shared variable *sem is always non-negative. When a thread posts the semaphore from 0 to 1, all threads waiting on the semaphore may try, but exactly one thread will succeed in compare_and_swap.

Bojie Li
  • 1
  • 1
  • Your code works but it didn't answer my original question. I know sched_yield() is faster, but in situations where cpu usage is more important (not enough cores and less demanding on latency) futex is preferred. In my code, __sync_bool_compare_and_swap(sem, value, -1) and __sync_fetch_and_sub(sem,1) are in one thread, so how could the assert() fail? – zhao Apr 16 '18 at 09:00