I was trying to compute Local Binary Patterns for a image on my GPU, utilising cuda module in python for the same. But the results produced by execution of similar algorithm on CPU and GPU is producing different results. Can you help me figure out the problem ?
Below is the snippet of code I was trying to execute :
from __future__ import division
from skimage.io import imread, imshow
from numba import cuda
import time
import math
import numpy
# CUDA Kernel
@cuda.jit
def pointKernelLBP(imgGPU, histVec, pos) :
''' Computes Point Local Binary Pattern '''
row, col = cuda.grid(2)
if row+1 < imgGPU.shape[0] and col+1 < imgGPU.shape[1] and col-1>=0 and row-1>=0 :
curPos = 0
mask = 0
for i in xrange(-1, 2) :
for j in xrange(-1, 2) :
if i==0 and j==0 :
continue
if imgGPU[row+i][col+j] > imgGPU[row][col] :
mask |= (1<<curPos)
curPos+=1
histVec[mask]+=1
#Host Code for computing LBP
def pointLBP(x, y, img) :
''' Computes Local Binary Pattern around a point (x,y),
considering 8 nearest neighbours '''
pos = [0, 1, 2, 7, 3, 6, 5, 4]
curPos = 0
mask = 0
for i in xrange(-1, 2) :
for j in xrange(-1, 2) :
if i==0 and j==0 :
continue
if img[x+i][y+j] > img[x][y] :
mask |= (1<<curPos)
curPos+=1
return mask
def LBPHistogram(img, n, m) :
''' Computes LBP Histogram for given image '''
HistVec = [0] * 256
for i in xrange(1, n-1) :
for j in xrange(1, m-1) :
HistVec[ pointLBP(i, j, img) ]+=1
return HistVec
if __name__ == '__main__' :
# Reading Image
img = imread('cat.jpg', as_grey=True)
n, m = img.shape
start = time.time()
imgHist = LBPHistogram(img, n, m)
print "Computation time incurred on CPU : %s seconds.\n" % (time.time() - start)
print "LBP Hisogram Vector Using CPU :\n"
print imgHist
print type(img)
pos = numpy.ndarray( [0, 1, 2, 7, 3, 6, 5, 4] )
img_global_mem = cuda.to_device(img)
imgHist_global_mem = cuda.to_device(numpy.full(256, 0, numpy.uint8))
pos_global_mem = cuda.to_device(pos)
threadsperblock = (32, 32)
blockspergrid_x = int(math.ceil(img.shape[0] / threadsperblock[0]))
blockspergrid_y = int(math.ceil(img.shape[1] / threadsperblock[1]))
blockspergrid = (blockspergrid_x, blockspergrid_y)
start = time.time()
pointKernelLBP[blockspergrid, threadsperblock](img_global_mem, imgHist_global_mem, pos_global_mem)
print "Computation time incurred on GPU : %s seconds.\n" % (time.time() - start)
imgHist = imgHist_global_mem.copy_to_host()
print "LBP Histogram as computed on GPU's : \n"
print imgHist, len(imgHist)