I'm confused about the performance of my C
program that implements a Segmented Sieve. I originally used only int
as the datatype but to support bigger numbers I decided to switch to unsigned long long
. I expected some performance penalty due to overhead, but when I try the segmented sieve with upper limit 100 billion, the int
approach takes 23 seconds whereas the unsigned long long
doesn't even finish (or at least takes too long for me to wait)
here's the segmented sieve with just int
datatype, with N (upper bound) preset to 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
int size = 5;
int current_slot = 0;
int* primes_arr;
void append(int data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
int* newarr = realloc(primes_arr, (size += 5) * sizeof(int));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
// The following is just a standard approach to segmented sieve, nothing interesting
void simpleSieve(int limit)
{
int p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in simpleSieve\n");
exit(1);
}
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (int i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
// printf("%d ", p);
}
}
}
void segmentedSieve(int n)
{
int limit = (int)floor(sqrt(n)) + 1;
simpleSieve(limit);
int low = limit;
int high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
for (int i = 0; i < current_slot; i++)
{
int lower_lim = (int)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (int j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (int i = low; i < high; i++)
{
if (mark[i - low] == true)
{
// printf("%d ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(int));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double) (t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
and here's the segmented sieve with just unsigned long long
datatype, with N (upper bound) preset to 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
unsigned long size = 5, current_slot = 0;
unsigned long long* primes_arr;
void append(unsigned long long data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
unsigned long long* newarr = realloc(primes_arr, (size += 5l) * sizeof(unsigned long long));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
void simpleSieve(unsigned long limit)
{
unsigned long long p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (unsigned long long i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
//printf("%llu ", p);
}
}
}
void segmentedSieve(unsigned long long n)
{
unsigned long limit = (unsigned long)floor(sqrt(n)) + 1l;
simpleSieve(limit);
unsigned long low = limit;
unsigned long high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (unsigned long long i = 0; i < current_slot; i++)
{
unsigned long long lower_lim = (unsigned long)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (unsigned long long j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (unsigned long long i = low; i < high; i++)
{
if (mark[i - low])
{
//printf("%llu ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(unsigned long long));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double)(t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
I fail to see why this is happening, am I doing something wrong?
Edit: I also realize that int
shouldn't be capable of handling 100 billion anyway and yet the program executes with no errors and even prints the final time report. Meanwhile the unsigned long long
program doesn't even finish in double the time it takes for the int
one.
On the other hand, trying to set the upper bound to 1B on both, actually return pretty similar results. Why?