I am using a Conv-6 CNN in TensorFlow 2.5 and Python3. The objective is to selectively set certain weights within any trainable layer. The Conv-6 CNN model definition is as follows:
def conv6_cnn():
"""
Function to define the architecture of a neural network model
following Conv-6 architecture for CIFAR-10 dataset and using
provided parameter which are used to prune the model.
Conv-6 architecture-
64, 64, pool -- convolutional layers
128, 128, pool -- convolutional layers
256, 256, pool -- convolutional layers
256, 256, 10 -- fully connected layers
Output: Returns designed and compiled neural network model
"""
l = tf.keras.layers
model = Sequential()
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same',
input_shape=(32, 32, 3)
)
)
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(Flatten())
model.add(
Dense(
units = 256, activation='relu',
kernel_initializer = tf.initializers.GlorotNormal()
)
)
model.add(
Dense(
units = 256, activation='relu',
kernel_initializer = tf.initializers.GlorotNormal()
)
)
model.add(
Dense(
units = 10, activation='softmax'
)
)
'''
# Compile CNN-
model.compile(
loss=tf.keras.losses.categorical_crossentropy,
# optimizer='adam',
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0003),
metrics=['accuracy']
)
'''
return model
# Load trained model from before-
best_model = conv6_cnn()
best_model.load_weights("best_weights.h5")
I came across this GitHub answer of freezing certain weights during training. On it's basis, I coded the following to freeze weights in the first and sixth conv layers:
conv1 = pruned_model.trainable_weights[0]
# Find all weights less than a threshold (0.1) and set them to zero-
conv1 = tf.where(conv1 < 0.1, 0, conv1)
# For all weights set to zero, stop training them-
conv1 = tf.where(conv1 == 0, tf.stop_gradient(conv1), conv1)
# Sanity check: number of parameters set at 0-
tf.math.count_nonzero(conv1, axis = None).numpy()
# 133
# Original number of paramaters-
tf.math.count_nonzero(best_model.trainable_weights[0], axis = None).numpy()
# 1728
# Assign conv layer1 back to pruned model-
pruned_model.trainable_weights[0].assign(conv1)
# Sanity check-
tf.math.count_nonzero(pruned_model.trainable_weights[0], axis = None).numpy()
# 133
# conv layer 6-
conv6 = pruned_model.trainable_weights[10]
# Find all weights less than a threshold (0.1) and set them to zero-
conv6 = tf.where(conv6 < 0.1, 0, conv6)
# For all weights set to zero, stop training them-
conv6 = tf.where(conv6 == 0, tf.stop_gradient(conv6), conv6)
# Sanity check: number of parameters set at 0-
tf.math.count_nonzero(conv6, axis = None).numpy()
# 5369
# Original number of paramaters-
tf.math.count_nonzero(best_model.trainable_weights[10], axis = None).numpy()
# 589824
# Assign conv layer6 back to pruned model-
pruned_model.trainable_weights[10].assign(conv6)
# Sanity check-
tf.math.count_nonzero(pruned_model.trainable_weights[10], axis = None).numpy()
# 5369
# Train model for 10 epochs for testing:
# Compile CNN-
pruned_model.compile(
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False),
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.01),
metrics=['accuracy']
)
history = pruned_model.fit(
x = X_train, y = y_train,
epochs = 10, validation_data = (X_test, y_test)
)
However, after training when I check the number of non-zero weights:
# first conv layer-
tf.math.count_nonzero(pruned_model.trainable_weights[0], axis = None).numpy()
# sixth conv layer-
tf.math.count_nonzero(pruned_model.trainable_weights[10], axis = None).numpy()
The weights have increased in numbers again. They should have been 133 and 5369, but they are not.
Help?