I'm trying to benchmark Xenomai, PREEMPT_RT and Linux as-is in a Raspberry Pi, and I'm having some trouble.
I'm trying to create a variable number of threads, with RT priority, and a variable sleep time. One of the threads has higher RT priority, and reads from a GPIO, and writes to another GPIO (to read the latency on an oscyloscope or something like this), and the other threads, with lower RT priority, only do arithmetic operations. The GPIOs are mapped on memory, so protocol overhead can be avoided.
I was thinking in this tests: 1 thread 1 us 50 thread 10 us 100 thread 100 us 150 thread 1 ms 200 thread 10 ms 250 thread 100 ms
The problem is that only some threads are created. And when the number of threads is high, there's a memory problem and the process halts. So, how should I proceed? Is the code wrong, is the time too short, or is the number of threads too high?
This is the code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <pthread.h>
#define BCM2708_PERI_BASE 0x20000000
#define GPIO_BASE (BCM2708_PERI_BASE + 0x200000) /* GPIO controller */
#define PAGE_SIZE (4*1024)
#define BLOCK_SIZE (4*1024)
void *gpio_map;
volatile unsigned *gpio;
#define INP_GPIO(g) *(gpio+((g)/10)) &= ~(7<<(((g)%10)*3))
#define OUT_GPIO(g) *(gpio+((g)/10)) |= (1<<(((g)%10)*3))
#define GPIO_SET *(gpio+7) // sets bits which are 1 ignores bits which are 0
#define GPIO_CLR *(gpio+10) // clears bits which are 1 ignores bits which are 0
#define GPIO_READ(g) (*(gpio + 13)&(1<<(g)))>>(g)
void *default_thread(void *arg)
{
struct timespec delay;
int digits = 2000;
int arr[digits + 1];
int i, j, sum, carry;
unsigned long over;
delay.tv_sec = 0;
delay.tv_nsec = *(long *)arg;
while(1)
{
carry = 0;
for (i = digits; i > 0; i-= 14) {
sum = 0;
for (j = i; j > 0; --j) {
sum = sum * j + 10000;
sum %= (j * 2 - 1);
}
carry = sum % 10000;
}
clock_nanosleep(CLOCK_REALTIME, 0, &delay, NULL);
}
return NULL;
}
void *productive_thread(void *arg)
{
struct timespec delay, sleep;
unsigned long over;
delay.tv_sec = 0;
delay.tv_nsec = 10000; // 10 usec
sleep.tv_sec = 0;
sleep.tv_nsec = *(long *)arg;
while(1)
{
while(GPIO_READ(4) != 1);
GPIO_SET = 1 << 17;
clock_nanosleep(CLOCK_REALTIME, 0, &sleep, NULL);
GPIO_CLR = 1 << 17;
clock_nanosleep(CLOCK_REALTIME, 0, &sleep, NULL);
}
return NULL;
}
int main(int argc, char* argv[]) {
int n,i,t,fd;
pthread_t *threads;
struct sched_param prio1, prio2;
struct timespec time, interval;
mlockall(MCL_CURRENT | MCL_FUTURE);
if (argc != 5)
{
printf ("Usage: %s n t p1 p2\n", argv[0]);
printf("n: number of threads\n");
printf("t: time period in ns (less than 1s)\n");
printf("p1: priority of the gpio task from %d to %d\n", sched_get_priority_min(SCHED_FIFO), sched_get_priority_max(SCHED_FIFO));
printf("p2: priority of the sytem load tasks from %d to %d\n", sched_get_priority_min(SCHED_FIFO), sched_get_priority_max(SCHED_FIFO));
exit(1);
}
// Use GPIO PB31 (A1) as input, and PB13 (A2) as output.
fd = open("/dev/mem", O_RDWR|O_SYNC);
if(fd < 0)
exit(1);
gpio_map = mmap(0, BLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, GPIO_BASE);
if(gpio_map == MAP_FAILED)
exit(1);
close(fd);
gpio = (volatile unsigned *)gpio_map;
INP_GPIO(4);
INP_GPIO(17);
OUT_GPIO(17);
n=atoi(argv[1]);
t = atoi(argv[2]);
// Set priorites
prio1.sched_priority = atoi(argv[3]);
prio2.sched_priority = atoi(argv[4]);
// Initialize thread vector
threads=(pthread_t *)malloc(n*sizeof(pthread_t));
// Start threads
pthread_create(&threads[0], NULL, productive_thread, &t);
pthread_setschedparam(threads[0], SCHED_FIFO, &prio1);
for (i=1; i<n; i++)
{
pthread_create(&threads[i], NULL, default_thread, &t);
//pthread_setschedparam(threads[i], SCHED_FIFO, &prio2);
}
// Wait for ending
for (i=0; i<n; i++)
{
pthread_join(threads[i],NULL);
}
munmap(gpio_map, BLOCK_SIZE);
return 1;
}