I have a piece of code that I use to test various containers (e.g. deque and a circular buffer) when passing data from a producer (thread 1) to a consumer (thread 2). A data is represented by a struct with a pair of timestamps. First timestamp is taken before push in the producer, and the second one is taken when data is popped by the consumer. The container is protected with a pthread spinlock.
The machine runs redhat 5.5 with 2.6.18 kernel (old!), it is a 4-core system with hyperthreading disabled. gcc 4.7 with -std=c++11 flag was used in all tests.
Producer acquires the lock, timestamps the data and pushes it into the queue, unlocks and sleeps in a busy loop for 2 microseconds (the only reliable way I found to sleep for precisely 2 micros on that system).
Consumer locks, pops the data, timestamps it and generates some statistics (running mean delay and standard deviation). The stats is printed every 5 seconds (M is the mean, M2 is the std dev) and reset. I used gettimeofday() to obtain the timestamps, which means that the mean delay number can be thought of as the percentage of delays that exceed 1 microsecond.
Most of the time the output looks like this:
CNT=2500000 M=0.00935 M2=0.910238
CNT=2500000 M=0.0204112 M2=1.57601
CNT=2500000 M=0.0045016 M2=0.372065
but sometimes (probably 1 trial out of 20) like this:
CNT=2500000 M=0.523413 M2=4.83898
CNT=2500000 M=0.558525 M2=4.98872
CNT=2500000 M=0.581157 M2=5.05889
(note the mean number is much worse than in the first case, and it never recovers as the program runs).
I would appreciate thoughts on why this could happen. Thanks.
#include <iostream>
#include <string.h>
#include <stdexcept>
#include <sys/time.h>
#include <deque>
#include <thread>
#include <cstdint>
#include <cmath>
#include <unistd.h>
#include <xmmintrin.h> // _mm_pause()
int64_t timestamp() {
struct timeval tv;
gettimeofday(&tv, 0);
return 1000000L * tv.tv_sec + tv.tv_usec;
}
//running mean and a second moment
struct StatsM2 {
StatsM2() {}
double m = 0;
double m2 = 0;
long count = 0;
inline void update(long x, long c) {
count = c;
double delta = x - m;
m += delta / count;
m2 += delta * (x - m);
}
inline void reset() {
m = m2 = 0;
count = 0;
}
inline double getM2() { // running second moment
return (count > 1) ? m2 / (count - 1) : 0.;
}
inline double getDeviation() {
return std::sqrt(getM2() );
}
inline double getM() { // running mean
return m;
}
};
// pause for usec microseconds using busy loop
int64_t busyloop_microsec_sleep(unsigned long usec) {
int64_t t, tend;
tend = t = timestamp();
tend += usec;
while (t < tend) {
t = timestamp();
}
return t;
}
struct Data {
Data() : time_produced(timestamp() ) {}
int64_t time_produced;
int64_t time_consumed;
};
int64_t sleep_interval = 2;
StatsM2 statsm2;
std::deque<Data> queue;
bool producer_running = true;
bool consumer_running = true;
pthread_spinlock_t spin;
void producer() {
producer_running = true;
while(producer_running) {
pthread_spin_lock(&spin);
queue.push_back(Data() );
pthread_spin_unlock(&spin);
busyloop_microsec_sleep(sleep_interval);
}
}
void consumer() {
int64_t count = 0;
int64_t print_at = 1000000/sleep_interval * 5;
Data data;
consumer_running = true;
while (consumer_running) {
pthread_spin_lock(&spin);
if (queue.empty() ) {
pthread_spin_unlock(&spin);
// _mm_pause();
continue;
}
data = queue.front();
queue.pop_front();
pthread_spin_unlock(&spin);
++count;
data.time_consumed = timestamp();
statsm2.update(data.time_consumed - data.time_produced, count);
if (count >= print_at) {
std::cerr << "CNT=" << count << " M=" << statsm2.getM() << " M2=" << statsm2.getDeviation() << "\n";
statsm2.reset();
count = 0;
}
}
}
int main(void) {
if (pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE) < 0)
exit(2);
std::thread consumer_thread(consumer);
std::thread producer_thread(producer);
sleep(40);
consumer_running = false;
producer_running = false;
consumer_thread.join();
producer_thread.join();
return 0;
}