5

I'd like to accelerate the code written in Python and NumPy. I use Gray-Skott algorithm (http://mrob.com/pub/comp/xmorphia/index.html) for reaction-diffusion model, but with Numba and Cython it's even slower! Is it possible to speed it up? Thanks in advance!

Python+NumPy

def GrayScott(counts, Du, Dv, F, k):
    n = 300
    U = np.zeros((n+2,n+2), dtype=np.float_)
    V = np.zeros((n+2,n+2), dtype=np.float_)
    u, v = U[1:-1,1:-1], V[1:-1,1:-1]

    r = 20
    u[:] = 1.0
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
    u += 0.15*np.random.random((n,n))
    v += 0.15*np.random.random((n,n))

    for i in range(counts):
        Lu = (                 U[0:-2,1:-1] +
              U[1:-1,0:-2] - 4*U[1:-1,1:-1] + U[1:-1,2:] +
                               U[2:  ,1:-1] )
        Lv = (                 V[0:-2,1:-1] +
              V[1:-1,0:-2] - 4*V[1:-1,1:-1] + V[1:-1,2:] +
                               V[2:  ,1:-1] )
        uvv = u*v*v
        u += Du*Lu - uvv + F*(1 - u)
        v += Dv*Lv + uvv - (F + k)*v

    return V

Numba

from numba import jit, autojit

@autojit
def numbaGrayScott(counts, Du, Dv, F, k):
    n = 300
    U = np.zeros((n+2,n+2), dtype=np.float_)
    V = np.zeros((n+2,n+2), dtype=np.float_)
    u, v = U[1:-1,1:-1], V[1:-1,1:-1]

    r = 20
    u[:] = 1.0
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
    u += 0.15*np.random.random((n,n))
    v += 0.15*np.random.random((n,n))

    Lu = np.zeros_like(u)
    Lv = np.zeros_like(v)

    for i in range(counts):
        for row in range(n):
            for col in range(n):
                Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
                Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]

        uvv = u*v*v
        u += Du*Lu - uvv + F*(1 - u)
        v += Dv*Lv + uvv - (F + k)*v

    return V

Cython

%%cython
cimport cython
import numpy as np
cimport numpy as np

cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
    cdef int n = 300
    cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
    cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
    cdef np.ndarray u = U[1:-1,1:-1]
    cdef np.ndarray v = V[1:-1,1:-1]

    cdef int r = 20
    u[:] = 1.0
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
    u += 0.15*np.random.random((n,n))
    v += 0.15*np.random.random((n,n))

    cdef np.ndarray Lu = np.zeros_like(u)
    cdef np.ndarray Lv = np.zeros_like(v)
    cdef int i, row, col
    cdef np.ndarray uvv

    for i in range(counts):
        for row in range(n):
            for col in range(n):
                Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
                Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]

        uvv = u*v*v
        u += Du*Lu - uvv + F*(1 - u)
        v += Dv*Lv + uvv - (F + k)*v

    return V

Usage example:

GrayScott(4000, 0.16, 0.08, 0.04, 0.06)
snotna
  • 133
  • 7

2 Answers2

8

Here is steps to speedup the cython version:

  • cdef np.ndarray doen't make element access faster, you need use memoryview in cython: cdef double[:, ::1] bU = U.

  • Turn off boundscheck and wraparound.

  • Do all the calculations in for-loop.

Here is the modified cython code:

%%cython
#cython: boundscheck=False
#cython: wraparound=False
cimport cython
import numpy as np
cimport numpy as np

cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
    cdef int n = 300
    cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
    cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
    cdef np.ndarray u = U[1:-1,1:-1]
    cdef np.ndarray v = V[1:-1,1:-1]

    cdef int r = 20
    u[:] = 1.0
    U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
    V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
    u += 0.15*np.random.random((n,n))
    v += 0.15*np.random.random((n,n))

    cdef np.ndarray Lu = np.zeros_like(u)
    cdef np.ndarray Lv = np.zeros_like(v)
    cdef int i, c, r1, c1, r2, c2
    cdef double uvv

    cdef double[:, ::1] bU = U
    cdef double[:, ::1] bV = V
    cdef double[:, ::1] bLu = Lu
    cdef double[:, ::1] bLv = Lv

    for i in range(counts):
        for r in range(n):
            r1 = r + 1
            r2 = r + 2
            for c in range(n):
                c1 = c + 1
                c2 = c + 2
                bLu[r,c] = bU[r1,c2] + bU[r1,c] + bU[r2,c1] + bU[r,c1] - 4*bU[r1,c1]
                bLv[r,c] = bV[r1,c2] + bV[r1,c] + bV[r2,c1] + bV[r,c1] - 4*bV[r1,c1]

        for r in range(n):
            r1 = r + 1
            for c in range(n):
                c1 = c + 1
                uvv = bU[r1,c1]*bV[r1,c1]*bV[r1,c1]
                bU[r1,c1] += Du*bLu[r,c] - uvv + F*(1 - bU[r1,c1])
                bV[r1,c1] += Dv*bLv[r,c] + uvv - (F + k)*bV[r1,c1]

    return V

It's about 11x faster than the numpy version.

HYRY
  • 94,853
  • 25
  • 187
  • 187
6

Aside from the looping and the sheer volume of operations involved, what is most likely killing performance in your case is array allocation. I don't know why your Numba and Cython versions are not living up to your expectation, but you can make your numpy code 2x faster (at the cost of some readability), by doing all operations in-place, i.e. replacing your current loop with:

Lu, Lv, uvv = np.empty_like(u), np.empty_like(v), np.empty_like(u)

for i in range(counts):
    Lu[:] = u
    Lu *= -4
    Lu += U[:-2,1:-1]
    Lu += U[1:-1,:-2]
    Lu += U[1:-1,2:]
    Lu += U[2:,1:-1]
    Lu *= Du

    Lv[:] = v
    Lv *= -4
    Lv += V[:-2,1:-1]
    Lv += V[1:-1,:-2]
    Lv += V[1:-1,2:]
    Lv += V[2:,1:-1]
    Lv *= Dv

    uvv[:] = u
    uvv *= v
    uvv *= v
    Lu -= uvv
    Lv += uvv

    u *= 1 - F
    u += F
    u += Lu

    v *= 1 - F - k
    v += Lv
Jaime
  • 65,696
  • 17
  • 124
  • 159
  • Very useful trick - on my machine it gives about 40-50% speed up. But of course readability is lost... – snotna Nov 09 '14 at 15:08