I have tried to implement a C QuickSelect algorithm as described in this post (3 way quicksort (C implementation)). However, all I get are performances 5 to 10 times less than the default qsort (even with an initial shuffling). I tried to dig into the original qsort source code as provide here (https://github.com/lattera/glibc/blob/master/stdlib/qsort.c), but it's too complex. Does anybody have a simpler, and better algorithm? Any idea is welcomed. Thanks, NB: my original problem is to try to get the Kth smallest values of an array to the first Kth indices. So I planned to call quickselect K times EDIT 1: Here is the Cython Code as copied and adapted from the link above
cdef void qswap(void* a, void* b, const size_t size) nogil:
cdef char temp[size]# C99, use malloc otherwise
#char serves as the type for "generic" byte arrays
memcpy(temp, b, size)
memcpy(b, a, size)
memcpy(a, temp, size)
cdef void qshuffle(void* base, size_t num, size_t size) nogil: #implementation of Fisher
cdef int i, j, tmp# create local variables to hold values for shuffle
for i in range(num - 1, 0, -1): # for loop to shuffle
j = c_rand() % (i + 1)#randomise j for shuffle with Fisher Yates
qswap(base + i*size, base + j*size, size)
cdef void partition3(void* base,
size_t *low, size_t *high, size_t size,
QComparator compar) nogil:
# Modified median-of-three and pivot selection.
cdef void *ptr = base
cdef size_t lt = low[0]
cdef size_t gt = high[0] # lt is the pivot
cdef size_t i = lt + 1# (+1 !) we don't compare pivot with itself
cdef int c = 0
while (i <= gt):
c = compar(ptr + i * size, ptr + lt * size)
if (c < 0):# base[i] < base[lt] => swap(i++,lt++)
qswap(ptr + lt * size, ptr + i * size, size)
i += 1
lt += 1
elif (c > 0):#base[i] > base[gt] => swap(i, gt--)
qswap(ptr + i * size, ptr + gt* size, size)
gt -= 1
else:#base[i] == base[gt]
i += 1
#base := [<<<<<lt=====gt>>>>>>]
low[0] = lt
high[0] = gt
cdef void qselectk3(void* base, size_t lo, size_t hi,
size_t size, size_t k,
QComparator compar) nogil:
cdef size_t low = lo
cdef size_t high = hi
partition3(base, &low, &high, size, compar)
if ((k - 1) < low): #k lies in the less-than-pivot partition
high = low - 1
low = lo
elif ((k - 1) >= low and (k - 1) <= high): #k lies in the equals-to-pivot partition
qswap(base, base + size*low, size)
return
else: # k > high => k lies in the greater-than-pivot partition
low = high + 1
high = hi
qselectk3(base, low, high, size, k, compar)
"""
A selection algorithm to find the nth smallest elements in an unordered list.
these elements ARE placed at the nth positions of the input array
"""
cdef void qselect(void* base, size_t num, size_t size,
size_t n,
QComparator compar) nogil:
cdef int k
qshuffle(base, num, size)
for k in range(n):
qselectk3(base + size*k, 0, num - k - 1, size, 1, compar)
I use python timeit to get the performance of both method pyselect(with N=50) and pysort. Like this
def testPySelect():
A = np.random.randint(16, size=(10000), dtype=np.int32)
pyselect(A, 50)
timeit.timeit(testPySelect, number=1)
def testPySort():
A = np.random.randint(16, size=(10000), dtype=np.int32)
pysort(A)
timeit.timeit(testPySort, number=1)