POSIX semaphore doesn't work under high contention/load

Question

Using C++11 on Linux kernel 4.4.0-57, I'm trying to run two busy-looping processes (say p1, p2) pinned (pthread_setaffinity_np) on the same core and making sure the interleaving execution order by using POSIX semaphore (semaphore.h) and sched_yield(). But it did not work out well.

Below is the parent code (parent-task) that spawns 2 processes and each executes child-task code.

#include <stdio.h>                                                         
#include <cstdlib>                                                         
#include <errno.h>      // errno                                           
#include <iostream>     // cout cerr                                       
#include <semaphore.h>  // semaphore                                       
#include <fcntl.h>      // O_CREAT                                         
#include <unistd.h>     // fork                                            
#include <string.h>     // cpp string                                      
#include <sys/types.h>  //                                                 
#include <sys/wait.h>   // wait()                                          

int init_semaphore(){                                           
   std::string sname = "/SEM_CORE";                                        
   sem_t* sem = sem_open ( sname.c_str(), O_CREAT, 0644, 1 );              
   if ( sem == SEM_FAILED ) {                                              
      std::cerr << "sem_open failed!\n";                                   
      return -1;                                                           
   }                                                                       

   sem_init( sem, 0, 1 );                                                  
   return 0;                                                               
}                                                                          

// Fork and exec child-task.                                                  
// Return pid of child                                                     
int fork_and_exec( std::string pname, char* cpuid ){                       
   int pid = fork();                                                       
   if ( pid == 0) {                                                        
      // Child                                                             
      char* const params[] = { "./child-task", "99", strdup( pname.c_str() ), cpuid, NULL };                              
      execv( params[0], params );                                          
      exit(0);                                                             
   }                                                                       
   else {                                                                  
      // Parent                                                            
      return pid;                                                          
   }                                                                       
}                                                                          

int main( int argc, char* argv[] ) {                                       
   if ( argc <= 1 )                                                        
      printf( "Usage ./parent-task <cpuid> \n" );                        

   char* cpuid = argv[1];                                                  
   std::string pnames[2] = { "p111", "p222" };                             

   init_semaphore();                                                       

   int childid[ 2 ] = { 0 };                                               
   int i = 0;                                                              
   for( std::string pname : pnames ){                                      
      childid[ i ] = fork_and_exec( pname, cpuid ); 
   }                                                                       

   for ( i=0; i<2; i++ )                                                   
      if ( waitpid( childid[i], NULL, 0 ) < 0 )                            
         perror( "waitpid() failed.\n" );                                  

   return 0;                                                               
}

The child-task code looks like this:

#include <cstdlib>                                                              
#include <stdio.h>                                                              
#include <sched.h>                                                              
#include <pthread.h>                                                            
#include <stdint.h>                                                             
#include <errno.h>                                                         
#include <semaphore.h>                                                          
#include <iostream>                                                             
#include <sys/types.h>                                                          
#include <fcntl.h>      // O_CREAT                                              

sem_t* sm;                                                                      

int set_cpu_affinity( int cpuid ) {                                             
   pthread_t current_thread = pthread_self();                                   
   cpu_set_t cpuset;                                                            
   CPU_ZERO( &cpuset );                                                         
   CPU_SET( cpuid, &cpuset );                                                   
   return pthread_setaffinity_np( current_thread,                               
                                  sizeof( cpu_set_t ), &cpuset );               
}                                                                               

int lookup_semaphore() {                                                        
   sm = sem_open( "/SEM_CORE", O_RDWR );                                        
   if ( sm == SEM_FAILED ) {                                                    
      std::cerr << "sem_open failed!" << std::endl ;                            
      return -1;                                                                
   }                                                                            
}                                                                               

int main( int argc, char* argv[] ) {                                            
   printf( "Usage: ./child-task <PRIORITY> <PROCESS-NAME> <CPUID>\n" );            
   printf( "Setting SCHED_RR and priority to %d\n", atoi( argv[1] ) );          

   set_cpu_affinity( atoi( argv[3] ) );                                         

   lookup_semaphore();                                                          

   int res;                                                                     
   uint32_t n = 0;                                                              
   while ( 1 ) {                                                                
      n += 1;                                                                   
      if ( !( n % 1000 ) ) {                                                                                                                         
         res = sem_wait( sm );  

         if( res != 0 ) {                                                       
            printf(" sem_wait %s. errno: %d\n", argv[2], errno);                
         }                                                                   
         printf( "Inst:%s RR Prio %s running (n=%u)\n", argv[2], argv[1], n );  
         fflush( stdout );                                                      

         sem_post( sm );                                                        

         sched_yield();                                                         
      }                                                                         

      sched_yield();                                                            
   }                                                                            

   sem_close( sm );                                                             
}

In the child-task code, I have if ( !( n % 1000 ) ) to experiment reducing the contention/load in waiting and posting the semaphore. The outcome I got is that when n % 1000, one of the child process will be always in Sleep state (from top) and the other child process executes properly. However, if I set n % 10000, i.e. less load/contention, both processes will run and printout the output interleavingly which is my expected outcome.

Does anyone know if this is the limitaion of semaphore.h or there's a better way to ensure processes execution order?

Do you mean to set scheduling policy and priority, as the child's comment in `main` suggests? Where do you mean to set those? — pilcrow, Dec 11 '17 at 23:03

Erik Alapää · Answer 1 · 2017-12-12T15:13:23.847

Updated: I did a simple example with threads and semaphore, note that sched_yield may help avoiding unnecessary wakeups of the thread that is not 'in turn' to do work, but yielding is not a guarantee. I also show an example with mutex/condvar that is guaranteed to work, no yield necessary.

#include <stdexcept>
#include <semaphore.h>
#include <pthread.h>
#include <thread>
#include <iostream>

using std::thread;
using std::cout;

sem_t sem;
int count = 0;

const int NR_WORK_ITEMS = 10;

void do_work(int worker_id)
{
    cout << "Worker " << worker_id << '\n';
}

void foo(int work_on_odd)
{
    int result;
    int contention_count = 0;
    while (count < NR_WORK_ITEMS)
    {
        result = sem_wait(&sem);
        if (result) {
            throw std::runtime_error("sem_wait failed!");
        }
        if (count % 2 == work_on_odd)
        {
            do_work(work_on_odd);
            count++;
        }
        else
        {
            contention_count++;
        }
        result = sem_post(&sem);
        if (result) {
            throw std::runtime_error("sem_post failed!");
        }
        result = sched_yield();
        if (result < 0) {
            throw std::runtime_error("yield failed!");
        }
    }
    cout << "Worker " << work_on_odd << " terminating. Nr of redundant wakeups from sem_wait: " <<
        contention_count << '\n';
}

int main()
{
    int result = sem_init(&sem, 0, 1);

    if (result) {
        throw std::runtime_error("sem_init failed!");
    }

    thread t0 = thread(foo, 0);
    thread t1 = thread(foo, 1);

    t0.join();
    t1.join();

    return 0;
}

Here is one way to do it with condition variables and mutexes. Translating from C++ std threads to pthreads should be trivial. To do it between processes, you would have to use a pthread mutex type that can be shared between processes. Maybe the condvar and the mutex can both be placed in shared memory, to achieve the same thing I do below with threads.

See also the manpage pthread_condattr_setpshared (3) or http://manpages.ubuntu.com/manpages/wily/man3/pthread_condattr_setpshared.3posix.html

On the other hand, maybe it is simpler to just use a SOCK_STREAM unix domain socket between the two worker processes, and just block on the socket until the peer worker pings you (i.e. send one char) over the socket.

#include <cassert>
#include <iostream>
#include <thread>
#include <condition_variable>
#include <unistd.h>

using std::thread;
using std::condition_variable;
using std::mutex;
using std::unique_lock;
using std::cout;

condition_variable cv;
mutex mtx;
int count;

void dowork(int arg)
{
    std::thread::id this_id = std::this_thread::get_id();

    cout << "Arg: " << arg << ", thread id: " << this_id << '\n';
}

void tfunc(int work_on_odd)
{
    assert(work_on_odd < 2);

    auto check_can_work = [&count, &work_on_odd](){ return ((count % 2) ==
                                                            work_on_odd); };
    while (count < 10)
    {
        unique_lock<mutex> lk(mtx);
        cv.wait (lk, check_can_work);
        dowork(work_on_odd);
        count++;
        cv.notify_one();
        // Lock is unlocked automatically here, but with threads and condvars,                                                                                                                                                                                
        // it is actually better to unlock manually before notify_one.                                                                                                                                                                                        
    }
}

int main()
{
    count = 0;
    thread t0 = thread(tfunc, 0);
    thread t1 = thread(tfunc, 1);
    sleep(1);
    cv.notify_one();

    t0.join();
    t1.join();
}

Thanks Erik for your suggestion, but it isn't really solving my problem. — anthonyaje, Jan 03 '18 at 15:30

POSIX semaphore doesn't work under high contention/load

1 Answers1