How to convert binary grid images to 2D arrays?

Question

I've got some images of binary (black and white) grids that look like this:

Now, I want to convert such images to regular 2D NumPy arrays, where each cell must correspond to 0, if the source cell is white (or uncolored) and 1 if the cell is black. That is, the expected output is:

[[0,1,0,0,1],
 [0,0,0,0,1],
 [0,1,0,0,0],
 [0,0,0,0,0],
 [0,0,0,0,0],
 [0,0,0,1,0],
 [0,0,1,0,0]]

I've looked at a number of suggestions including this one, but they don't say anything about how I must reduce the raw pixels to a regular grid.

My current code:

import numpy as np
from PIL import Image

def from_img(imgfile, size, keep_ratio=True, reverse=False):
    def resample(img_, size):
        return img.resize(size, resample=Image.BILINEAR)            
    def makebw(img, threshold=200):
        edges = (255 if reverse else 0, 0 if reverse else 255)
        return img.convert('L').point(lambda x: edges[1] if x > threshold else edges[0], mode='1')
    img = Image.open(imgfile)
    if keep_ratio:
        ratio = max(size) / max(img.size)
        size = tuple(int(sz * ratio) for sz in img.size)
    return np.array(makebw(resample(img, size)), dtype=int)

This code might be ok for images that don't contain borders between the cells, and only when specifying the number of rows and columns manually. But I am sure there must be a way of automating this routine by edge detection / resampling techniques...

Update

While there are good solutions (see suggested below) for even, regular black and white grids like shown above, the task is more difficult for uneven, noisy images with multiple non-BW colors like this one:

I'm now looking at an opencv implementation that detects contours and tries to single out the cell size to reconstruct the grid matrix. My current code:

import matplotlib.pyplot as plt
import numpy as np
import cv2

def find_contours(fpath, gray_thresh=150, extent_param=0.85, area_param=(0.0003, 0.3), ratio_param=(0.75, 1.33)):
    """
    Finds contours (shapes) in an image (loading it from a file) and filters the contours
    according to a number of parameters.
    gray_thresh: grayscale threshold
    extent_param: minimum extent of contour (see https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.html#extent)
    area_param: min and max ratio of contour area to image area
    ratio_param: min and max ratio of contour (see https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.html#aspect-ratio)
    """
    
    image = cv2.imread(fpath)
    # grayscale image
    imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(imgray, gray_thresh, 255, 0)
    # get all contours (see https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_contours/py_contours_begin/py_contours_begin.html)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)    
    
    # get min and max contour area in pixels (from given ratios)
    if area_param:
        area = imgray.shape[0] * imgray.shape[1]
        min_area = float(area) * area_param[0]
        max_area = float(area) * area_param[1]
    # filtered contours
    contours2 = []
    # contour sizes
    sizes = []
    # contour coords
    pos = []
    # iterate by found contours
    for c in contours:
        # get contour area
        c_area = cv2.contourArea(c)
        # get bounding rect
        rect = cv2.boundingRect(c)
        # get extent (ratio of contour area to bounding rect area)
        extent = float(c_area) / (rect[2] * rect[3])  
        # get aspect ratio of bounding rect
        ratio = float(rect[2]) / rect[3]
        # perform filtering (leave rect-shaped contours or filter by extent)
        if (len(c) == 4 or not extent_param or extent >= extent_param) and \
           (not area_param or (c_area >= min_area and c_area <= max_area)) and \
           (not ratio_param or (ratio >= ratio_param[0] and ratio <= ratio_param[1])):
            # add filtered contour to list, as well as its size and pos
            contours2.append(c)
            sizes.append(rect[-2:])
            pos.append(rect[:2])
            
    # get most frequent block size (w, h), first and last block
    size_mode = max(set(sizes), key=sizes.count) 
    first_pos = min(pos)
    last_pos = max(pos)

    # return original image, grayscale image, most frequent contour size, first and last contour coords
    return image, imgray, contours2, size_mode, first_pos, last_pos

def get_mean_colors_of_contours(img, imgray, contours):
    """
    Returns the mean colors of given contours and one common mean.
    """
    l_means = []
    for c in contours:
        mask = np.zeros(imgray.shape, np.uint8)
        cv2.drawContours(mask, [c], 0, 255, -1)
        l_means.append(cv2.mean(img, mask=mask)[0])
    return np.mean(l_means), l_means

def get_color(x):
    if x == 'r':
        return (255, 0, 0)
    elif x == 'g':
        return (0, 255, 0)
    elif x == 'b':
        return (0, 0, 255)
    return x

def text_in_contours(img, contours, values, val_format=None, text_color='b', text_scale=1.0):
    """
    Prints stuff inside given contours.
    img: original image (array)
    contours: identified contours
    values: stuff to print (iterable of same length as contours)
    val_format: optional callback function to format a single value before printing
    text_color: color of output text (default = blue)
    text_scale: initial font scale (font will be auto adjusted)
    """
    text_color = get_color(text_color)
    if not text_color: return
    for c, val in zip(contours, values):
        rect = cv2.boundingRect(c)
        center = (rect[0] + rect[2] // 2, rect[1] + rect[3] // 2)
        txt = val_format(val) if val_format else str(val)
        if not txt: continue
        font = cv2.FONT_HERSHEY_DUPLEX
        fontScale = min(rect[2:]) * text_scale / 100
        lineType = 1
        text_size, _ = cv2.getTextSize(txt, font, fontScale, lineType)
        text_origin = (center[0] - text_size[0] // 2, center[1] + text_size[1] // 2)
        cv2.putText(img, txt, text_origin, font, fontScale, text_color, lineType, cv2.LINE_AA)
    return img

def draw_contours(fpath, contour_color='r', contour_width=1, **kwargs):
    """
    Finds contours in image and draws their outlines.
    fpath: path to image file
    contour_color: color used to outline contours (r,g,b, tuple or None)
    contour_width: outline width
    kwargs: args passed to find_contours()
    """
    if not contour_color: return
    contour_color = get_color(contour_color)     
    img, imgray, contours, size_mode, first_pos, last_pos = find_contours(fpath, **kwargs)    
    cv2.drawContours(img, contours, -1, contour_color, contour_width)                
    return img, imgray, contours, size_mode, first_pos, last_pos
    
def show_image(img, fig_height_inches=8):
    """
    Shows an image in iPython notebook.
    """
    height, width = img.shape[:2]
    aspect = width / height
    fig = plt.figure(figsize=(fig_height_inches * aspect, fig_height_inches))
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(img, interpolation='nearest', aspect='equal')
    plt.show()

Now this helps me already identify the white cells in most cases, e.g.

img, imgray, contours, size_mode, first_pos, last_pos = draw_contours('sss4.jpg')
mean_col, cols = get_mean_colors_of_contours(img, imgray, contours)
print(f'mean color = {mean_col}')
on_contour = lambda val: str(int(val)) if (val / mean_col) >= 0.9 else None
img = text_in_contours(img, contours, cols, on_contour)
show_image(img, 15)

Output

mean color = 252.54154936140293

So, I only need now some way to reconstruct the grid with ones and zeros, adding ones in the missing spots (where no white cells were identified).

score 1 · Answer 1 · answered May 04 '21 at 14:18

Given that you have a very nice grid with a regular shape, we can figure out the size of each tile by randomly sampling around and checking the size of our flood-filled area.

I used the mode of the counts I received back from the sample, but if you know some of the grids have a lot of black tiles, then you should probably take the smallest size returned by stipple() since anytime we hit a black tile, it'll include the entire background of the image which could overwhelm the count of white tiles.

Once we have the size of our tile, we can use that to index a pixel from each tile and check if it's white or black.

import cv2
import numpy as np
import random
import math

# stipple search
def stipple(mask, iters):
    # get resolution
    height, width = mask.shape[:2];

    # do random checks
    counts = [];
    for a in range(iters):
        # get random position
        copy = np.copy(mask);
        x = random.randint(0, width-1);
        y = random.randint(0, height-1);

        # fill
        cv2.floodFill(copy, None, (x, y), 100);

        # count
        count = np.count_nonzero(copy == 100);
        counts.append(count);
    return counts;

# load image
gray = cv2.imread("tiles.jpg", cv2.IMREAD_GRAYSCALE);

# mask
mask = cv2.inRange(gray, 100, 255);
height, width = mask.shape[:2];

# check
sizes = stipple(mask, 10);
print(sizes);

# get most common size // or search for the smallest size
size = max(set(sizes), key=sizes.count);

# get side size
side = math.sqrt(size);

# get grid dimensions
grid_width = int(round(width / side));
grid_height = int(round(height / side));
print([grid_width, grid_height]);

# recalculate size to nearest rounded whole number
side = int(width / grid_width);
print(side);

# make grid
grid = [];
start_index = int(side / 2.0);
for y in range(start_index, height, side):
    row = [];
    for x in range(start_index, width, side):
        row.append(mask[y,x] == 255);
    grid.append(row[:]);

# print
out_str = "";
for row in grid:
    for elem in row:
        out_str += str(int(elem));
    out_str += "\n";
print(out_str);

# show
cv2.imshow("Mask", mask);
cv2.waitKey(0);

Thanks for your suggestion! This solution works seamlessly for even, black and white grids like shown in the initial example. But for unprepared noisy images it tends to add additional rows and columns or not identify black / white cells. E.g. for 'good' inputs: [screen](https://disk.yandex.ru/i/f4EWcnEdPygMlQ) For more complex inputs: [screen](https://disk.yandex.ru/i/DB1G2JUQQeLMAg) — s0mbre, May 05 '21 at 23:37

score 1 · Answer 2 · answered May 05 '21 at 12:27

My idea would be convert the input image to mode '1', somehow detect the tiles' width and height, resize the input image w.r.t. these, and simply convert to some NumPy array.

Detecting the tiles' width and height might work like this:

Detect changes between neighbouring pixels using np.diff, and create a union image from these information:
Calculate the distances between these detected changes, again using np.diff, np.sum, and np.nonzero.
Finally, get the median value of these distances using np.median, and from that, determine the number of rows and columns of the grid, and resize the input image accordingly.

Here's the full code:

import numpy as np
from PIL import Image

# Open image, convert to black and white mode
image = Image.open('grid.png').convert('1')
w, h = image.size

# Temporary NumPy array of type bool to work on
temp = np.array(image)

# Detect changes between neighbouring pixels
diff_y = np.diff(temp, axis=0)
diff_x = np.diff(temp, axis=1)

# Create union image of detected changes
temp = np.zeros_like(temp)
temp[:h-1, :] |= diff_y
temp[:, :w-1] |= diff_x

# Calculate distances between detected changes
diff_y = np.diff(np.nonzero(np.diff(np.sum(temp, axis=0))))
diff_x = np.diff(np.nonzero(np.diff(np.sum(temp, axis=1))))

# Calculate tile height and width
ht = np.median(diff_y[diff_y > 1]) + 2
wt = np.median(diff_x[diff_x > 1]) + 2

# Resize image w.r.t. tile height and width
array = (~np.array(image.resize((int(w/wt), int(h/ht))))).astype(int)
print(array)

For the given input image, we get the desired/expected output:

[[0 1 0 0 1]
 [0 0 0 0 1]
 [0 1 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 1 0]
 [0 0 1 0 0]]

Full black columns or rows don't matter:

[[0 1 0 0 1]
 [0 0 0 0 1]
 [0 1 0 0 1]
 [0 0 0 0 1]
 [0 0 0 0 1]
 [0 0 0 1 1]
 [0 0 1 0 1]]

And, even single white tiles are enough:

[[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 0 1 1 1]
 [1 1 1 1 1]]

For testing, I thresholded your input image, and saved it as single-channel PNG. For arbitrary JPG input images, you might want to have some thresholding before converting to mode '1' to avoid artifacts.

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.16299-SP0
Python:        3.9.1
PyCharm:       2021.1.1
NumPy:         1.20.2
Pillow:        8.2.0
----------------------------------------

Thanks, this is an awesome solution! However, like the previous one, it only seems to work on 'good' inputs, with even, strictly regular cells. For more complex stuff (with multiple colors, uneven cells and varied borders) it tends to add additional columns and rows (mistaking borders for rows / cols). See [SCREENSHOTS](https://disk.yandex.ru/i/5smtzrSi0JJAkA) — s0mbre, May 05 '21 at 23:51

How to convert binary grid images to 2D arrays?

Update

Output

2 Answers2