I want to generate the class activation map (CAM) for a binary classification problem. The data I have does not have any bounding boxes or contain any kind of annotation and it is a simple binary classification problem. The sample data input and output is generated in X and y in the code below.
I am using following referenced code implemented using Grad CAM approach (https://github.com/gorogoroyasu/mnist-Grad-CAM) on MNIST classification dataset. Grad CAM used in the code is mentioned in the paper (https://arxiv.org/pdf/1610.02391.pdf).
import numpy as np
np.random.seed(37)
import pandas as pd
tf.set_random_seed(89)
import random as rn
rn.seed(1254)
import keras
import tensorflow as tf
from keras import backend as k
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
sess = tf.Session(graph = tf.get_default_graph(), config = session_conf)
k.set_session(sess)
import matplotlib.pyplot as plt
from keras.layers import *
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
import itertools
import math
from keras.models import Sequential, Model
from keras.layers import Input, Flatten, Dense, Dropout, Convolution2D, Conv2D, MaxPooling2D, Lambda, GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Activation, AveragePooling2D, Concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import np_utils
from keras.callbacks import CSVLogger
#%matplotlib inline
keras.backend.set_image_data_format('channels_last')
from keras import initializers
X = np.random.random_integers(0, 1, size=(2237, 95, 95, 1))
y = np.random.random_integers(0, 1, size=(2237, 1))
print("X.shape : ", X.shape) ## (2237, 95, 95, 1)
print("y.shape : ", y.shape) ## (2237, 1)
model_21 = Sequential()
model_21.add(Conv2D(20, kernel_size = (3, 3), strides=(1, 1), activation = 'relu', padding = 'valid', input_shape = X.shape[1:],kernel_initializer=initializers.glorot_uniform(seed=90), bias_initializer='zeros', name = 'conv_lyr_1'))
model_21.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model_21.add(Conv2D(20, kernel_size = (3, 3), strides=(1, 1), activation = 'relu', padding= 'valid', kernel_initializer=initializers.glorot_uniform(seed=90), bias_initializer='zeros', name = 'conv_lyr_2'))
model_21.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model_21.add(GlobalAveragePooling2D())
model_21.add(Dense(1, activation = 'sigmoid'))
model_21.compile(optimizer=Adam(lr = 0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False), loss = 'binary_crossentropy', metrics = ['accuracy'],)
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=1)
hist = model_21.fit(X[:2000], y[:2000], batch_size = 64, epochs = 10, verbose = 2, shuffle=True, validation_split=0.05, callbacks=[early_stop])
The training of the model did not give us very good result with global average pooling (Although the mentioned code does not use global average pooling but I have read other papers which use global average pooling (GAP) for the Class Activation Mapping e.g. https://github.com/metalbubble/CAM). This implementation did not use GAP in model building but used it in following lines of code : (shown in next section of code)
# global average pooling
weights = np.mean(conv_grad, axis = (0, 1))
cam = np.zeros(conv_output.shape[0 : 2], dtype = np.float32).
## RESULT OF ABOVE TRAINING
Train on 1900 samples, validate on 100 samples
Epoch 1/10
- 20s - loss: 0.6940 - acc: 0.4947 - val_loss: 0.6932 - val_acc: 0.5100
Epoch 2/10
- 20s - loss: 0.6939 - acc: 0.4711 - val_loss: 0.6934 - val_acc: 0.4500
Epoch 3/10
- 19s - loss: 0.6932 - acc: 0.5026 - val_loss: 0.6937 - val_acc: 0.4900
Epoch 4/10
- 19s - loss: 0.6933 - acc: 0.5011 - val_loss: 0.6935 - val_acc: 0.4900
Epoch 5/10
- 21s - loss: 0.6933 - acc: 0.5026 - val_loss: 0.6935 - val_acc: 0.4900
Epoch 6/10
- 21s - loss: 0.6933 - acc: 0.4905 - val_loss: 0.6936 - val_acc: 0.4900
Epoch 7/10
- 19s - loss: 0.6933 - acc: 0.5021 - val_loss: 0.6939 - val_acc: 0.4900
Epoch 8/10
- 19s - loss: 0.6934 - acc: 0.4911 - val_loss: 0.6933 - val_acc: 0.4800
Epoch 00008: early stopping
Note: GAP does not work very well and this post with lesser number of epochs (https://stats.stackexchange.com/questions/330119/why-global-average-pooling-is-able-to-work-correctly) and (https://datascience.stackexchange.com/questions/28120/globalaveragepooling2d-in-inception-v3-example) might explain the reason behind it.
As training was done on first 2000 data points, now I will be testing the images for rest of the 237 data points 2001 to 2237 data points and I am interested to view their CAM. However from the referenced code below I do not understand how conv_grad, input_grad and grad_RGB can be used by me.
As we can obtain weights of global average pooling layer so we can omit the preexisting code and directly use GAP layer weights.
The code following is here:
import sys, cv2
from tensorflow.keras.datasets import mnist
from mnist_model import Model as MM
from pathlib import Path
from tensorflow.keras.models import Model
img_rows, img_cols = 300, 400
num_classes = 2
#model=Model(inputs=[m.labels, m.inputs], outputs=[m.predictions, m.g, m.a, m.gb_grad])
for target_y_train_num in range(2000, 2237):
result = model_21.predict(X[target_y_train_num].reshape((-1, 95, 95, 1)))
print('answer: ', K.eval(K.argmax(y[target_y_train_num])))
print('prediction: ', K.eval(K.argmax(result[0])))
print(result) ## [[0.50195]]
conv_grad = result[1] ## What should I use here?? -->> QUESTION
conv_grad = conv_grad.reshape(conv_grad.shape[1:]) ## What should I use here?? -->> QUESTION
conv_output = result[2] ## What should I use here?? -->> QUESTION
conv_output = conv_output.reshape(conv_output.shape[1:]) ## What should I use here??
input_grad = result[3] ## What should I use here?? -->> QUESTION
input_grad = input_grad.reshape(input_grad.shape[1:]) ## What should I use here??
gradRGB = gb_viz = input_grad ## What should I use here as ours is a single channel input but I guess the heat map should always be in RGB
from skimage.transform import resize
#import cv2
# global average pooling -->> QUESTION
## How to recover the 20 weights obtained by GAP layer??
weights = np.mean(conv_grad, axis = (0, 1))
cam = np.zeros(conv_output.shape[0 : 2], dtype = np.float32)
for i, w in enumerate(weights):
cam += w * conv_output[:, :, i]
cam = np.maximum(cam, 0)
cam = cam / np.max(cam)
cam = resize(cam, (95,95), preserve_range=True)
img = x_test[target_y_train_num].astype(float)
img -= np.min(img)
img /= img.max()
cam_heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
cam_heatmap = cv2.cvtColor(cam_heatmap, cv2.COLOR_BGR2RGB)
cam = np.float32(cam.reshape((95, 95, 1))) * np.float32(img)
cam = 255 * cam / np.max(cam)
cam = np.uint8(cam)
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
plt.figure()
img_int = (img * 255.).astype(int).reshape(img.shape[:2])
plt.gray()
plt.imshow(img_int)
plt.savefig('original_{}.png'.format(target_y_train_num))
plt.close()
plt.figure()
plt.imshow(cam_heatmap)
plt.savefig('heatmap_{}.png'.format(target_y_train_num))
plt.close()
plt.figure()
plt.imshow(img_int)
plt.imshow(cam_heatmap, alpha=0.5)
plt.savefig('heatmap_overlaied_{}.png'.format(target_y_train_num))
plt.close()
gb_viz -= np.min(gb_viz)
gb_viz /= gb_viz.max()
img_int = (gb_viz * 255.).astype(int).reshape(img.shape[:2])
imgplot = plt.imshow(img_int)
plt.savefig('grad-cam-backpropagation_{}.png'.format(target_y_train_num))
plt.close()
gd_gb = gb_viz * cam
img_int = (gd_gb * 255.).astype(int).reshape(img.shape[:2])
imgplot = plt.imshow(img_int)
plt.savefig('guided-grad-cam_{}.png'.format(target_y_train_num))
plt.close()
I have read the paper and its concepts again but I could not understand how the values of conv_grad, conv_output, input_grad and cam should be calculated. I have tried to put and retrieve the 20 weights of GAP layer to calculate cam but was unsuccessful and I do not understand the flow of calculation. Sorry for the long post and thanks in advance.