Understand the dataset:
- the size of the image is 32 x 32
- there are 46 different characters/alphabets
['character_10_yna', 'character_11_taamatar', 'character_12_thaa', 'character_13_daa', 'character_14_dhaa', 'character_15_adna', 'character_16_tabala', 'character_17_tha', 'character_18_da', 'character_19_dha', 'character_1_ka', 'character_20_na', 'character_21_pa',
'character_22_pha', 'character_23_ba', 'character_24_bha', 'character_25_ma',
'character_26_yaw', 'character_27_ra', 'character_28_la', 'character_29_waw', 'character_2_kha', 'character_30_motosaw', 'character_31_petchiryakha', 'character_32_patalosaw', 'character_33_ha', 'character_34_chhya',
'character_35_tra', 'character_36_gya', 'character_3_ga', 'character_4_gha', 'character_5_kna', 'character_6_cha', 'character_7_chha', 'character_8_ja',
'character_9_jha', 'digit_0', 'digit_1', 'digit_2', 'digit_3', 'digit_4', 'digit_5', 'digit_6', 'digit_7', 'digit_8', 'digit_9']
As your images are in categorized in a folder

so keras implementation will be:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import pathlib
dataDir = "/xx/xx/xx/xx/datasets/Devanagari/drive-download-20210601T224146Z-001/Train"
data_dir = keras.utils.get_file(dataDir, 'file://'+dataDir)
data_dir = pathlib.Path(data_dir)
image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)
batch_size = 32
img_height = 180 # scale it up for better performance
img_width = 180 # scale it up for better performance
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
class_names = train_ds.class_names
print(class_names) # 46 classes
For caching and normalization
refer tensorflow tutorial
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
print(np.min(first_image), np.max(first_image))
model setup compile and training
num_classes = 46
model = Sequential([
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
epochs=10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
this will result in as following( very promising!)
Epoch 10/10
1955/1955 [==============================] - 924s 472ms/step - loss: 0.0201 - accuracy: 0.9932 - val_loss: 0.2267 - val_accuracy: 0.9504
Save the model (this will take time to train, so better save the model)
!mkdir -p saved_model
model.save('saved_model/my_model')
load the model:
loaded_model = tf.keras.models.load_model('saved_model/my_model')
# Check its architecture
loaded_model.summary()
now the final task, get the prediction. One way is as following:
import cv2
im2=cv2.imread('datasets/Devanagari/drive-download-20210601T224146Z-001/Test/character_3_ga/3711.png')
im2=cv2.resize(im2, (180,180)) # resize to 180,180 as that is on which model is trained on
print(im2.shape)
img2 = tf.expand_dims(im2, 0) # expand the dims means change shape from (180, 180, 3) to (1, 180, 180, 3)
print(img2.shape)
predictions = loaded_model.predict(img2)
score = tf.nn.softmax(predictions[0]) # # get softmax for each output
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
) # get the np.argmax, means give me the index where probability is max, in this case it got 29. This answers the response
# you got from your instructor. that is "greatest weight"
(180, 180, 3)
(1, 180, 180, 3)
This image most likely belongs to character_3_ga with a 100.00 percent confidence.
another way is through online. the one you are trying to achive. the image shape need to be in (1, 180, 180, 3) for this example or can be (1, 32, 32, 3) if no resize was done. and then feed it to predict. somthing like below
out=im.resize(size)
out = tf.expand_dims(out, 0)
predictions = loaded_model.predict(out)
score = tf.nn.softmax(predictions[0]) # # get softmax for each output
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)