Why is the image being partially processed?

Question

It is been hours writing scripts and I think I am tired overlooking something simple. I have the following pycuda script

import cv2
import numpy as np
import time
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray


def apply_threshold(img_src,img_width, img_height, img_dest, mythreshold):
    mod = SourceModule("""
        __global__ void ThresholdKernel(
            const int src_sizeX,  //< source image size. x: width,
            const unsigned char* src,   //< source image pointer
            const int dst_sizeX,  //< destination image size. x: width, y: height
            const int dst_sizeY,
            unsigned char* dst,         //< destination image pointer
            const int mythreshold) {
                int col = blockIdx.x * blockDim.x + threadIdx.x;
                int row = blockIdx.y * blockDim.y + threadIdx.y;
                if (dst_sizeX <= col || dst_sizeY <= row) return;

                auto src_val = src[row * src_sizeX + col];
                unsigned char dst_val = src_val > mythreshold ? 255 : 0;
                dst[row * dst_sizeX + col] = dst_val;
            }
    """)

    block_dim =(32,8,1)
    grid_dim_x = (img_width + block_dim[0] -1) // block_dim[0]
    grid_dim_y = (img_width + block_dim[1] -1) // block_dim[1]

    print(grid_dim_x,grid_dim_y)
    

    thresholdkernel = mod.get_function("ThresholdKernel")

    thresholdkernel(np.int32(img_width), img_src, np.int32(img_width),np.int32(img_height), 
                    img_dest,np.int32(mythreshold),
                    block = block_dim , grid = (grid_dim_x,grid_dim_y))
    

mythreshold = 128

img_path = "../images/lena_gray.png"
img = cv2.imread(img_path)

if img is None:
    print("Image not found")
    exit()
else:
    height,width,channels = img.shape
    print("Hegiht, width and channels",height,width,channels)
    print(type(width))

img_gpu = cuda.mem_alloc(img.nbytes)
cuda.memcpy_htod(img_gpu,img)

dtype=img.dtype

# dest_img=gpuarray.empty_like(img.shape,dtype=dtype)
dest_img = cuda.mem_alloc(img.nbytes)


apply_threshold(img_gpu,width,height,dest_img  ,mythreshold )

image_result= np.empty_like(img)
cuda.memcpy_dtoh(image_result,dest_img )

cv2.imshow("Original image",img)
cv2.imshow("Thresholded",image_result)
cv2.waitKey(0)
cv2.destroyAllWindows()

When I run it I get a binarized picture but this one

What am I overlooking that makes the kernel only process part of the image? It must be something really simple

EDIT: I found the problem. The way I am reading the image.

It should be

img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE)

Now it works, although for some reason it takes 10 times the time of a similar script I have that does the same... well...

PCDSandwichMan · Answer 1 · 2023-07-23T06:02:48.317

1

I assume it's because you are using img_width for both grid_dim_x and grid_dim_y. But you probably meant to use img_height for grid_dim_y.

Give this a shot:

grid_dim_x = (img_width + block_dim[0] -1) // block_dim[0]
grid_dim_y = (img_height + block_dim[1] -1) // block_dim[1]

edited Jul 23 '23 at 06:02

answered Jul 23 '23 at 05:55

PCDSandwichMan

1,964
1
12
23

Yes, that was bad!. Unfortunately it seems that is not the only place, since the results are the same... – KansaiRobot Jul 23 '23 at 08:08

Why is the image being partially processed?

1 Answers1