1

I'm trying to benchmark Xenomai, PREEMPT_RT and Linux as-is in a Raspberry Pi, and I'm having some trouble.

I'm trying to create a variable number of threads, with RT priority, and a variable sleep time. One of the threads has higher RT priority, and reads from a GPIO, and writes to another GPIO (to read the latency on an oscyloscope or something like this), and the other threads, with lower RT priority, only do arithmetic operations. The GPIOs are mapped on memory, so protocol overhead can be avoided.

I was thinking in this tests: 1 thread 1 us 50 thread 10 us 100 thread 100 us 150 thread 1 ms 200 thread 10 ms 250 thread 100 ms

The problem is that only some threads are created. And when the number of threads is high, there's a memory problem and the process halts. So, how should I proceed? Is the code wrong, is the time too short, or is the number of threads too high?

This is the code:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <pthread.h>

#define BCM2708_PERI_BASE        0x20000000
#define GPIO_BASE                (BCM2708_PERI_BASE + 0x200000) /* GPIO controller */
#define PAGE_SIZE (4*1024)
#define BLOCK_SIZE (4*1024)

void *gpio_map;
volatile unsigned *gpio;

#define INP_GPIO(g) *(gpio+((g)/10)) &= ~(7<<(((g)%10)*3))
#define OUT_GPIO(g) *(gpio+((g)/10)) |=  (1<<(((g)%10)*3))

#define GPIO_SET *(gpio+7)  // sets   bits which are 1 ignores bits which are 0
#define GPIO_CLR *(gpio+10) // clears bits which are 1 ignores bits which are 0
#define GPIO_READ(g) (*(gpio + 13)&(1<<(g)))>>(g)

void *default_thread(void *arg)
{
    struct timespec delay;
    int digits = 2000;
    int arr[digits + 1];
    int i, j, sum, carry;  
    unsigned long over;

    delay.tv_sec = 0;
    delay.tv_nsec = *(long *)arg;

    while(1)
    {
        carry = 0;
        for (i = digits; i > 0; i-= 14) {  
            sum = 0;  
            for (j = i; j > 0; --j) {  
                sum = sum * j + 10000;  
                sum %= (j * 2 - 1);
            }
            carry = sum % 10000;  
        }
        clock_nanosleep(CLOCK_REALTIME, 0, &delay, NULL);
    }
    return NULL;
}

void *productive_thread(void *arg)
{
    struct timespec delay, sleep;
    unsigned long over;

    delay.tv_sec = 0;
    delay.tv_nsec = 10000; // 10 usec

    sleep.tv_sec = 0;
    sleep.tv_nsec = *(long *)arg;

    while(1)
    {
        while(GPIO_READ(4) != 1);
        GPIO_SET = 1 << 17;
        clock_nanosleep(CLOCK_REALTIME, 0, &sleep, NULL);
        GPIO_CLR = 1 << 17;
        clock_nanosleep(CLOCK_REALTIME, 0, &sleep, NULL);
    }
    return NULL;
}

int main(int argc, char* argv[]) {
    int n,i,t,fd;
    pthread_t *threads;
    struct sched_param prio1, prio2;
    struct timespec time, interval;

    mlockall(MCL_CURRENT | MCL_FUTURE);

    if (argc != 5)
    {
        printf ("Usage: %s n t p1 p2\n", argv[0]);
        printf("n: number of threads\n");
        printf("t: time period in ns (less than 1s)\n");
        printf("p1: priority of the gpio task from %d to %d\n", sched_get_priority_min(SCHED_FIFO), sched_get_priority_max(SCHED_FIFO));
        printf("p2: priority of the sytem load tasks from %d to %d\n", sched_get_priority_min(SCHED_FIFO), sched_get_priority_max(SCHED_FIFO));
        exit(1);
    }

    // Use GPIO PB31 (A1) as input, and PB13 (A2) as output.
    fd = open("/dev/mem", O_RDWR|O_SYNC);
    if(fd < 0)
        exit(1);
    gpio_map = mmap(0, BLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, GPIO_BASE);

    if(gpio_map == MAP_FAILED)
        exit(1);

    close(fd);

    gpio = (volatile unsigned *)gpio_map;

    INP_GPIO(4);
    INP_GPIO(17);
    OUT_GPIO(17);

    n=atoi(argv[1]);
    t = atoi(argv[2]);

    // Set priorites
    prio1.sched_priority = atoi(argv[3]);
    prio2.sched_priority = atoi(argv[4]);

    // Initialize thread vector
    threads=(pthread_t *)malloc(n*sizeof(pthread_t));

    // Start threads
    pthread_create(&threads[0], NULL, productive_thread, &t);
    pthread_setschedparam(threads[0], SCHED_FIFO, &prio1);

    for (i=1; i<n; i++)
    {
        pthread_create(&threads[i], NULL, default_thread, &t);
        //pthread_setschedparam(threads[i], SCHED_FIFO, &prio2);
    }

    // Wait for ending
    for (i=0; i<n; i++)
    {
        pthread_join(threads[i],NULL);
    }

    munmap(gpio_map, BLOCK_SIZE);

    return 1;
}
Claudio
  • 10,614
  • 4
  • 31
  • 71
markmb
  • 852
  • 4
  • 12
  • 32
  • How much stack does each thread have allocated (by default `ulimit -s`)? – ninjalj Nov 13 '13 at 12:53
  • I didn't know that command. I understand everything now... Each thread allocates 8192 KBytes (I suppose the unit is KBytes), so this is 8 MB. The platform I'm using has 512 MB of RAM, which means 64 threads (without everything else). Should I reduce that value to the minimum? Or should I change the measures in the benchmark? – markmb Nov 13 '13 at 19:41
  • You can use `pthread_attr_setstacksize()` when creating a thread to use a non-default stack size. You should choose a value that you know is plenty enough for your application. – ninjalj Nov 14 '13 at 09:28

0 Answers0