I've been trying to make an efficient prime generator that successfully utilises wheel factorisation to be faster than a basic sieve of Eratosthenes implementation but my code ends up being a lot slower due to the overhead cost. I want to know why my implementation ends up being so slow, if there is a way to improve it, and how other prime generators end up being much faster.
Here is my code, which is quite long due to having to hard-code the modulo 30 wheel
import numpy as np
def wheel_factorisation(number):
wheel = [1, 7, 11, 13, 17, 19, 23, 29]
difference = [2, 6, 4, 2, 4, 2, 4, 6]
squareroot = math.isqrt((number // 30) * 8)
isprime = np.ones(squareroot,dtype=bool)
isprime[0] = False
multiples = []
primes = [2,3,5]
factor = 7
index = 1
DIVindex = 0
while factor * factor <= number:
if isprime[DIVindex * 8 + index]:
primes.append(factor)
if index == 0: #1%30
multiples.append([8*DIVindex*(factor+1), 0, 0, DIVindex])
#offset = [0, 1, 2, 3, 4, 5, 6, 7]
offset = np.ones(8,dtype = int)
#DIVoffset = [1, 0, 0, 0, 0, 0, 0, 0]
if index == 1: # 7%30
multiples.append([8*DIVindex * (factor + 7) + 13, 1, 1, DIVindex])
#offset = [1, 5, 4, 0, 7, 3, 2, 6]
offset = np.array([3,12,7,4,7,4,7,12])
#DIVoffset = [1, 1, 1, 1, 0, 1, 1, 1]
if index == 2: #11%30
multiples.append(
[8*DIVindex * (factor + 11) + 32, 2, 2, DIVindex])
#offset = [2, 4, 0, 6, 1, 7, 3, 5]
offset = np.array([5,18,12,6,11,6,12,18])
#DIVoffset = [1, 2, 2, 0, 2, 0, 2, 2]
if index == 3: #13%30
multiples.append(
[8*DIVindex * (factor + 13) + 45, 3, 3, DIVindex])
#offset = [3, 0, 6, 5, 2, 1, 7, 4]
offset = np.array([7,21,14,7,13,7,14,21])
#DIVoffset = [1, 3, 1, 1, 2, 1, 1, 3]
if index == 4: #17%30
multiples.append(
[8*DIVindex * (factor + 17) + 77, 4, 4, DIVindex])
#offset = [4, 7, 1, 2, 5, 6, 0, 3]
offset = np.array([9,27,18,9,19,9,18,27])
#DIVoffset = [1, 3, 3, 1, 2, 1, 3, 3]
if index == 5: #19%30
multiples.append(
[8*DIVindex * (factor + 19) + 96, 5, 5, DIVindex])
#offset = [5, 3, 7, 1, 6, 0, 4, 2]
offset = np.array([11,30,20,10,21,10,20,30])
#DIVoffset = [1, 4, 2, 2, 2, 2, 2, 4]
if index == 6: #23%30
multiples.append(
[8*DIVindex * (factor + 23) + 141, 6, 6, DIVindex])
#offset = [6, 2, 3, 7, 0, 4, 5, 1]
offset=np.array([13,36,25,12,25,12,25,36])
#DIVoffset = [1, 5, 3, 1, 4, 1, 3, 5]
if index == 7:
multiples.append(
[8*DIVindex * (factor + 29) + 224, 7, 7, DIVindex])
#offset = [7, 6, 5, 4, 3, 2, 1, 0]
offset = np.array([15,47,31,15,31,15,31,47])
#DIVoffset = [1, 6, 4, 2, 4, 2, 4, 6]
while multiples[-1][0] < squareroot:
isprime[multiples[-1][0]] = False
multiples[-1][1] += 1
if multiples[-1][1] == 8:
multiples[-1][1] = 0
multiples[-1][0] += offset[multiples[-1][1]] + 8*DIVindex*difference[multiples[-1][1]]
index += 1
if index == 8:
DIVindex += 1
index = 0
factor += difference[index]
prime = [DIVindex, index]
while prime[0] * 8 + prime[1] < squareroot:
if isprime[prime[0] * 8 + prime[1]]:
primes.append(prime[0] * 30 + wheel[prime[1]])
prime[1] += 1
if prime[1] == 8:
prime[1] = 0
prime[0] += 1
low = squareroot
limit = low + low
while low <= (number * 8) // 30:
isprime = np.ones(squareroot,dtype=bool)
if limit > (number * 8) // 30:
limit = (number * 8) // 30
factor = 0
while factor < len(multiples) and primes[factor +
3] <= math.isqrt(number):
index = multiples[factor][2]
DIVindex = multiples[factor][3]
if multiples[factor][0] < limit:
if index == 0: #1%30
#offset = [0,1,2,3,4,5,6,7]
offset = np.ones(8,dtype = int)
#DIVoffset = [1, 0, 0, 0, 0, 0, 0, 0]
if index == 1: # 7%30
#offset = [1, 5, 4, 0, 7, 3, 2, 6]
offset = np.array([3,12,7,4,7,4,7,12])
#DIVoffset = [1, 1, 1, 1, 0, 1, 1, 1]
if index == 2: #11%30
#offset = [2, 4, 0, 6, 1, 7, 3, 5]
offset = np.array([5,18,12,6,11,6,12,18])
#DIVoffset = [1, 2, 2, 0, 2, 0, 2, 2]
if index == 3: #13%30
#offset = [3, 0, 6, 5, 2, 1, 7, 4]
offset = np.array([7,21,14,7,13,7,14,21])
#DIVoffset = [1, 3, 1, 1, 2, 1, 1, 3]
if index == 4: #17%30
#offset = [4, 7, 1, 2, 5, 6, 0, 3]
offset = np.array([9,27,18,9,19,9,18,27])
#DIVoffset = [1, 3, 3, 1, 2, 1, 3, 3]
if index == 5: #19%30
#offset = [5, 3, 7, 1, 6, 0, 4, 2]
offset = np.array([11,30,20,10,21,10,20,30])
#DIVoffset = [1, 4, 2, 2, 2, 2, 2, 4]
if index == 6: #23%30
#offset = [6, 2, 3, 7, 0, 4, 5, 1]
offset=np.array([13,36,25,12,25,12,25,36])
#DIVoffset = [1, 5, 3, 1, 4, 1, 3, 5]
if index == 7: #29%30
#offset = [7, 6, 5, 4, 3, 2, 1, 0]
offset = np.array([15,47,31,15,31,15,31,47])
#DIVoffset = [1, 6, 4, 2, 4, 2, 4, 6]
while multiples[factor][0] < limit:
isprime[multiples[factor][0]-low] = False
multiples[factor][1] +=1
if multiples[factor][1]==8:
multiples[factor][1]=0
multiples[factor][0]+= 8*DIVindex * difference[
multiples[factor][1]]
multiples[factor][0] += offset[multiples[factor][1]]
factor += 1
while prime[0]*8+prime[1]< limit:
if isprime[prime[0]*8+prime[1] - low]:
primes.append(prime[0] * 30 + wheel[prime[1]])
prime[1] += 1
if prime[1] == 8:
prime[1] = 0
prime[0] += 1
low += squareroot
limit += squareroot
return primes
The code is quite long, but really the part that I'm interested in is the while loop to cross off primes as this part takes up the majority of run time:
while multiples[factor][0] < limit:
isprime[multiples[factor][0]-low] = False
multiples[factor][1] +=1
if multiples[factor][1]==8:
multiples[factor][1]=0
multiples[factor][0]+= 8*DIVindex * difference[
multiples[factor][1]]
multiples[factor][0] += offset[multiples[factor][1]]
and the code that adds the primes from the array:
while prime[0]*8+prime[1]< limit:
if isprime[prime[0]*8+prime[1] - low]:
primes.append(prime[0] * 30 + wheel[prime[1]])
prime[1] += 1
if prime[1] == 8:
prime[1] = 0
prime[0] += 1
[This script ends up taking around 7.3 seconds to generate primes up to 1 million, but a simple sieve of Eratosthenes can do this in only 2 seconds, with what is technically a modulo 2 wheel (and also missing out the factor 2 but I decided to leave it out for best-case speed):]
If I get rid of the segmentation it ends up taking around 0.05 seconds, but there are other approaches below that end up being faster.
def normal_sieve(number):
primes = np.ones(number,dtype=bool)
for factor in range(3,math.isqrt(number)+1,2):
if primes[factor - 1]:
for i in range(factor*factor,number+1,2*factor):
primes[i - 1] = False
return [i + 1 for i in range(2, number, 2) if primes[I]]
And this prime sieve I found somewhere can do this in just 0.003 seconds, which is a huge difference even though the other implementations also use numpy arrays:
def primesfrom2to(n):
""" Input n>=6, Returns a array of primes, 2 <= p < n """
sieve = numpy.ones(n//3 + (n%6==2), dtype=bool)
for i in range(1,int(n**0.5)//3+1):
if sieve[i]:
k=3*i+1|1
sieve[ k*k//3 ::2*k] = False
sieve[k*(k-2*(i&1)+4)//3::2*k] = False
return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)]
There is also a library called primesieve, which manages to count primes up to 100 billion within around a minute, and I'm really not sure where it gets its speed, besides being made in c++ instead of python. Why is my version so much slower than these, and how do I get a wheel factorisation method to work?