I want to parallelize a for loop that includes some operations from PARI/GP library, along with some other operations. However, using OpenMP with PARI/GP does not seem very straightforward it appears. The only similar example I found is this, which in fact seems to use sections
from OpenMP. I tried to do something using parallel for
clause as you can see below, however, it throws segmentation fault on PARI/GP.
#include <omp.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include "pari/pari.h"
#define MAX_THREAD 8
#define MAX_COUNT 8
#define CLOCK_PRECISION 1E9
typedef struct vector {
void **items;
unsigned int capacity;
unsigned int size;
} vector_st;
typedef vector_st *vector_t;
vector_t vector_init(unsigned int capacity) {
if (capacity <= 0) {
fprintf(stderr, "Error: invalid vector capacity.\n");
exit(1);
}
vector_t v = (vector_t) calloc(1, sizeof(vector_st));
v->capacity = capacity;
v->size = 0;
v->items = malloc(sizeof(void *) * v->capacity);
if (v->items == NULL) {
fprintf(stderr, "Error: could not allocate memory.\n");
exit(1);
}
return v;
}
unsigned int vector_size(vector_t v) {
return v->size;
}
void *vector_get(vector_t v, unsigned int index) {
if (index >= v->size) {
fprintf(stderr, "Error: index larger than the vector size.\n");
exit(1);
}
return v->items[index];
}
void vector_resize(vector_t v, unsigned int capacity) {
#ifdef DEBUG
printf("Vector resize, from %u to %u.\n", v->capacity, capacity);
#endif
void **items = realloc(v->items, sizeof(void *) * capacity);
if (items) {
v->items = items;
v->capacity = capacity;
} else {
fprintf(stderr, "Error: could not re-allocate memory.\n");
exit(1);
}
}
void vector_add(vector_t v, void *item) {
if (v->capacity == v->size) {
vector_resize(v, v->capacity * 2);
}
v->items[v->size++] = item;
}
void vector_copy(vector_t c, vector_t a) {
unsigned int i;
for (i = 0; i < vector_size(a); i++) {
vector_add(c, vector_get(a, i));
}
}
void vector_free(vector_t v) {
if (v) {
if (v->items) {
free(v->items);
}
v->capacity = 0;
v->size = 0;
free(v);
v = NULL;
}
}
typedef struct {
GEN sk;
GEN pk;
} cl_key_pair_st;
typedef cl_key_pair_st *cl_key_pair_t;
#define cl_key_pair_null(key_pair) key_pair = NULL;
#define cl_key_pair_new(key_pair) \
do { \
key_pair = malloc(sizeof(cl_key_pair_st)); \
if (key_pair == NULL) { \
exit(1); \
} \
} while (0)
#define cl_key_pair_free(key_pair) \
do { \
free(key_pair); \
key_pair = NULL; \
} while (0)
uint64_t mtimer(void) {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (uint64_t) (time.tv_sec * CLOCK_PRECISION + time.tv_nsec);
}
void test(vector_t keys, const unsigned count) {
GEN bound = strtoi("25413151665722220203610173826311975594790577398151861612310606875883990655261658217495681782816066858410439979225400605895077952191850577877370585295070770312182177789916520342292660169492395314400288273917787194656036294620169343699612953311314935485124063580486497538161801803224580096");
GEN g_q_a = strtoi("4008431686288539256019978212352910132512184203702279780629385896624473051840259706993970111658701503889384191610389161437594619493081376284617693948914940268917628321033421857293703008209538182518138447355678944124861126384966287069011522892641935034510731734298233539616955610665280660839844718152071538201031396242932605390717004106131705164194877377");
GEN g_q_b = negi(strtoi("3117991088204303366418764671444893060060110057237597977724832444027781815030207752301780903747954421114626007829980376204206959818582486516608623149988315386149565855935873517607629155593328578131723080853521348613293428202727746191856239174267496577422490575311784334114151776741040697808029563449966072264511544769861326483835581088191752567148165409"));
GEN g_q_c = strtoi("7226982982667784284607340011220616424554394853592495056851825214613723615410492468400146084481943091452495677425649405002137153382700126963171182913281089395393193450415031434185562111748472716618186256410737780813669746598943110785615647848722934493732187571819575328802273312361412673162473673367423560300753412593868713829574117975260110889575205719");
GEN g_q = qfi(g_q_a, g_q_b, g_q_c);
size_t numthread = MAX_THREAD;
if (numthread > count) {
numthread = count;
omp_set_num_threads(numthread);
}
struct pari_thread pth[numthread];
for (size_t i = 0; i < numthread; i++) {
pari_thread_alloc(&pth[i], 100000000, NULL);
}
#pragma omp parallel
{
int thnum = omp_get_thread_num();
if (thnum) {
(void) pari_thread_start(&pth[thnum]);
}
vector_t keys_private = vector_init(count / numthread);
#pragma omp for schedule(static)
for (size_t i = 0; i < count; i++) {
cl_key_pair_t key_pair_i;
cl_key_pair_null(key_pair_i);
cl_key_pair_new(key_pair_i);
key_pair_i->sk = randomi(bound);
key_pair_i->pk = nupow(g_q, key_pair_i->sk, NULL);
vector_add(keys_private, key_pair_i);
}
#pragma omp for ordered
for (size_t i = 0; i < numthread; i++) {
#pragma omp ordered
{
vector_copy(keys, keys_private);
}
}
if (thnum) {
pari_thread_close();
}
}
for (size_t i = 0; i < numthread; i++) {
if (&pth[i] != NULL) pari_thread_free(&pth[i]);
}
}
int main(void) {
pari_init(1000000000, 2);
setrand(getwalltime());
vector_t keys = vector_init(MAX_COUNT);
uint64_t start_time = mtimer();
test(keys, MAX_COUNT);
uint64_t stop_time = mtimer();
double total_time = ((stop_time - start_time) / CLOCK_PRECISION);
printf("Elapsed time: %.5lf s\n", total_time);
printf("\nvector_size(keys): %u\n", vector_size(keys));
for (size_t i = 0; i < vector_size(keys); i++) {
cl_key_pair_t key_pair_i = (cl_key_pair_t) vector_get(keys, i);
printf("%zu->sk: %s\n", i, GENtostr(key_pair_i->sk));
printf("%zu->pk: %s\n", i, GENtostr(key_pair_i->pk));
}
vector_free(keys);
pari_close();
return 0;
}
Any ideas how to use GEN
inside struct
and with OpenMP
?