I am trying to do semantic segmentation on satellite images using keras with tensorflow backend. I am using a SEGNET basic model for image segmentation.
I am using python 3.6
My question is regarding repetitive patterns that I am getting in output image regardless of the input image. What am I doing wrong and how to fix it. What are the next steps that I should take. My Model takes over 1 hour to train, is it normal?
Details about my problem and full code are as follows.
import random
import tifffile
import cv2 as cv
import numpy as np
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import *
from keras.layers import *
from keras import backend as K
I am using 1600 images of size 256x256 as training set.
image1 = tifffile.imread("D:\Programs\Ankit\satellite8.tif")
image2 = tifffile.imread("D:\Programs\Ankit\satellite8w.tif")
cv.imwrite("image1.jpg", image1)
cv.imwrite("image2.jpg", image2)
image3 = cv.imread("D:\Programs\Ankit\image1.jpg")
dim1 = image3.shape
dim2 = image2.shape
size = 256
ints1 = list(range(0,dim1[0],size))
ints2 = list(range(0,dim1[1],size))
print (len(ints1))
print (len(ints2))
print(dim1, dim2)
print(ints1)
print(ints2)
i = len(ints1)
j= len(ints2)
My input image and mask image(labels) are geotiff of too large sizes to be included .
Following code iteratively reads the 256X256 images from a mosaic
img = np.zeros(((i-1)*(j-1),size,size,3))
print(img.shape)
m=1
for k in range(1,i):
for n in range(1,j):
img[m-1, :,:,:] = image3[ints1[k-1]:ints1[k],ints2[n-
1]:ints2[n],0:3]
m += 1
#print(m)
print(img.shape)
m = 1
clt =np.zeros(((i-1)*(j-1),size,size))
for k in range(1,i):
for n in range(1,j):
clt[m-1, :,:] = image2[ints1[k-1]:ints1[k],ints2[n-1]:ints2[n]]
m += 1
#print(m)
print(clt.shape)
#Setting train and test data
train_X = img[0:1600,:,:,:]
test_X = img[1600:,:,:,:]
train_y = clt[0:1600,:,:]
test_y = clt[1600:,:,:]
train_labels = train_y.reshape(1600,size*size)
print(train_labels.shape)
train_labels = to_categorical(train_labels)
print(train_labels.shape)
Variables for SEGNET basic model
early_stopping_monitor = EarlyStopping(patience=3)
kernel = 3
filter_size = 64
pad = 1
pool_size = 2
input_height = size
input_width = size
nClasses =5
My SEGNET basic model variant is as follows
model = Sequential()
model.add(Layer(input_shape=(input_height , input_width, 3)))
# encoder
#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(filter_size, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(128, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(256, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(Conv2D(512, (kernel, kernel), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add( Conv2D(512, (kernel, kernel), padding = 'same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))
model.add( Conv2D(256, (kernel, kernel), padding='same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))
#model.add( ZeroPadding2D(padding=(pad,pad)))
model.add( Conv2D(128,(kernel, kernel), padding='same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))
#model.add( ZeroPadding2D(padding=(pad,pad)))
model.add( Conv2D(filter_size,(kernel, kernel), padding='same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))
model.add(Conv2D( nClasses , (1, 1), padding='same',))
model.outputHeight = model.output_shape[-2]
model.outputWidth = model.output_shape[-1]
model.add(Reshape(( nClasses , model.output_shape[2]*model.output_shape[1]
), input_shape=( nClasses , model.output_shape[-2], model.output_shape[-1]
)))
model.add(Permute((2, 1)))
model.add(Activation('softmax'))
#model.add(Dense(3))
#model.add(Reshape((256,256, 3), input_shape=( nClasses ,
model.output_shape[-2], model.output_shape[-1])))
model.compile(loss="categorical_crossentropy", optimizer= 'adadelta' ,
metrics=['accuracy'] )
model.summary()
model.fit(train_X, train_labels, epochs =1,verbose = 1, callbacks =
[early_stopping_monitor], validation_split = 0.2, shuffle = True)
This model takes over 1 hour to train and gives 25% validation accuracy Prediction part and Post processing are as follows
ypreds = model.predict(test_X, verbose = 1)
print(ypreds.shape)
#ypreds.reshape(2,256,256,17)
#print(ypreds.shape)
pred1 = ypreds[3,0:size*size,0:nClasses]
pred1 = pred1.reshape(size,size,nClasses)
pred = np.argmax(pred1, axis = 2)
colors = [(random.randint(0,255),random.randint(0,255),random.randint(0,255)
) for _ in range(nClasses) ]
seg_img = np.zeros( ( size, size, 3 ) )
for c in range(nClasses):
seg_img[:,:,0] += ( (pred[:,: ] == c )*( colors[c][0] )).astype('uint8')
seg_img[:,:,1] += ((pred[:,: ] == c )*( colors[c][1] )).astype('uint8')
seg_img[:,:,2] += ((pred[:,: ] == c )*( colors[c][2] )).astype('uint8')
seg_img = cv.resize(seg_img , (size, size))
cv.imwrite( "pred.jpg" , seg_img )
print(seg_img.shape)
print(seg_img)
Am I Missing some steps after this?
My output image is this