I want to speed up a quite simple Python code by converting some functions into cython. However, in the loop body, I need to find the min and max values of an array and that seems to be the critical point. According to the .html file, these lines need to be translated into very much c-code.. Why is that?
That is the entire code, below I list the lines that give me headaches:
import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
cdef bint isOverlapping = False
cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax
xmin = min(contourData[:, 0, 1])
xmax = max(contourData[:, 0, 1])
width = xmax - xmin
boxXmin = boxTopLeftXY[0]
boxXmax = boxTopLeftXY[0] + boxSize
if xmin > (boxXmin-width/2):
if xmax < (boxXmax+width/2):
ymin = min(contourData[:, 0, 1])
ymax = max(contourData[:, 0, 1])
height = ymax - ymin
boxYmin = boxTopLeftXY[1]
boxYmax = boxTopLeftXY[1] + boxSize
if ymin > (boxYmin-height/2):
if ymax < (boxYmax+width/2):
isOverlapping = True
return isOverlapping
@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
cdef Py_ssize_t i, j
cdef unsigned int counter, numParticles, numTopLefts
numParticles = len(contours)
numTopLefts = topLefts.shape[0]
cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
cdef unsigned int[:, :, :] currentContour
counter = 0
for i in range(numParticles):
currentContour = contours[i]
for j in range(numTopLefts):
if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
overlappingIndices[counter] = i
counter += 1
break
return overlappingIndices[:counter]
The function takes a list of contours (np.ndarray, as retrieved from cv2) and an array, representing a certain number of xy-Coordinates, where rectangles are placed with the indicated boxsize. The function is supposed to iterate through the contours and return the indices of contours that are overlapping with one of the boxes. These lines here seem to make the entire process horribly slow (it is, in fact, slower than the purely Python version..):
+13: xmin = min(contourData[:, 0, 1])
+14: xmax = max(contourData[:, 0, 1])
and, similarly:
+21: ymin = min(contourData[:, 0, 1])
+22: ymax = max(contourData[:, 0, 1])
Other lines that are problematic (but a little less) without me understanding why:
+48: if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
Why is the function call already so complicated? The data types match, everything is unsigned integer..
And also already returning the bool value; I expanded what the compiler made out of it:
+31: return isOverlapping
__Pyx_XDECREF(__pyx_r);
__pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_isOverlapping); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_r = __pyx_t_2;
__pyx_t_2 = 0;
goto __pyx_L0;
Any help would be highly appreciated! I still don't really understand how cython works, as it seems :/ If required, I can give further information!
Many thanks!!! :)
EDIT: Here is what Cython makes out of the np.min() line...: Any ideas?
+21: ymin = np.min(contourData[:, 0, 1])
__Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 21, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_min); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_t_4.data = __pyx_v_contourData.data;
__pyx_t_4.memview = __pyx_v_contourData.memview;
__PYX_INC_MEMVIEW(&__pyx_t_4, 0);
__pyx_t_4.shape[0] = __pyx_v_contourData.shape[0];
__pyx_t_4.strides[0] = __pyx_v_contourData.strides[0];
__pyx_t_4.suboffsets[0] = -1;
{
Py_ssize_t __pyx_tmp_idx = 0;
Py_ssize_t __pyx_tmp_stride = __pyx_v_contourData.strides[1];
if ((0)) __PYX_ERR(0, 21, __pyx_L1_error)
__pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
{
Py_ssize_t __pyx_tmp_idx = 1;
Py_ssize_t __pyx_tmp_stride = __pyx_v_contourData.strides[2];
if ((0)) __PYX_ERR(0, 21, __pyx_L1_error)
__pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}
__pyx_t_2 = __pyx_memoryview_fromslice(__pyx_t_4, 1, (PyObject *(*)(char *)) __pyx_memview_get_unsigned_int, (int (*)(char *, PyObject *)) __pyx_memview_set_unsigned_int, 0);; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 21, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 1);
__pyx_t_4.memview = NULL;
__pyx_t_4.data = NULL;
__pyx_t_5 = NULL;
if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
__pyx_t_5 = PyMethod_GET_SELF(__pyx_t_3);
if (likely(__pyx_t_5)) {
PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
__Pyx_INCREF(__pyx_t_5);
__Pyx_INCREF(function);
__Pyx_DECREF_SET(__pyx_t_3, function);
}
}
__pyx_t_1 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_5, __pyx_t_2) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2);
__Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 21, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
__pyx_t_6 = __Pyx_PyInt_As_unsigned_int(__pyx_t_1); if (unlikely((__pyx_t_6 == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 21, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_v_ymin = __pyx_t_6;