2

I am using a Conv-6 CNN in TensorFlow 2.5 and Python3. The objective is to selectively set certain weights within any trainable layer. The Conv-6 CNN model definition is as follows:

def conv6_cnn():
    """
    Function to define the architecture of a neural network model
    following Conv-6 architecture for CIFAR-10 dataset and using
    provided parameter which are used to prune the model.
    
    Conv-6 architecture-
    64, 64, pool  -- convolutional layers
    128, 128, pool -- convolutional layers
    256, 256, pool -- convolutional layers
    256, 256, 10  -- fully connected layers
    
    Output: Returns designed and compiled neural network model
    """
    
    l = tf.keras.layers
    
    model = Sequential()
    
    model.add(
        Conv2D(
            filters = 64, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same',
            input_shape=(32, 32, 3)
        )    
    )
        
    model.add(
        Conv2D(
            filters = 64, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same'
        )
    )
    
    model.add(
        MaxPooling2D(
            pool_size = (2, 2),
            strides = (2, 2)
        )
    )
    
    model.add(
        Conv2D(
            filters = 128, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same'
        )
    )

    model.add(
        Conv2D(
            filters = 128, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same'
        )
    )

    model.add(
        MaxPooling2D(
            pool_size = (2, 2),
            strides = (2, 2)
        )
    )

    model.add(
        Conv2D(
            filters = 256, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same'
        )
    )

    model.add(
        Conv2D(
            filters = 256, kernel_size = (3, 3),
            activation='relu', kernel_initializer = tf.initializers.GlorotNormal(),
            strides = (1, 1), padding = 'same'
        )
    )

    model.add(
        MaxPooling2D(
            pool_size = (2, 2),
            strides = (2, 2)
        )
    )
    
    model.add(Flatten())
    
    model.add(
        Dense(
            units = 256, activation='relu',
            kernel_initializer = tf.initializers.GlorotNormal()
        )
    )
    
    model.add(
        Dense(
            units = 256, activation='relu',
            kernel_initializer = tf.initializers.GlorotNormal()
        )
    )
    
    model.add(
        Dense(
            units = 10, activation='softmax'
        )
    )
    

    '''
    # Compile CNN-
    model.compile(
        loss=tf.keras.losses.categorical_crossentropy,
        # optimizer='adam',
        optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0003),
        metrics=['accuracy']
    )
    '''
    
    
    return model


# Load trained model from before-
best_model = conv6_cnn()
best_model.load_weights("best_weights.h5")

I came across this GitHub answer of freezing certain weights during training. On it's basis, I coded the following to freeze weights in the first and sixth conv layers:

conv1 = pruned_model.trainable_weights[0]

# Find all weights less than a threshold (0.1) and set them to zero-
conv1 = tf.where(conv1 < 0.1, 0, conv1)

# For all weights set to zero, stop training them-
conv1 = tf.where(conv1 == 0, tf.stop_gradient(conv1), conv1)


# Sanity check: number of parameters set at 0-
tf.math.count_nonzero(conv1, axis = None).numpy()
# 133

# Original number of paramaters-
tf.math.count_nonzero(best_model.trainable_weights[0], axis = None).numpy()
# 1728

# Assign conv layer1 back to pruned model-
pruned_model.trainable_weights[0].assign(conv1)

# Sanity check-
tf.math.count_nonzero(pruned_model.trainable_weights[0], axis = None).numpy()
# 133

# conv layer 6-
conv6 = pruned_model.trainable_weights[10]

# Find all weights less than a threshold (0.1) and set them to zero-
conv6 = tf.where(conv6 < 0.1, 0, conv6)

# For all weights set to zero, stop training them-
conv6 = tf.where(conv6 == 0, tf.stop_gradient(conv6), conv6)

# Sanity check: number of parameters set at 0-
tf.math.count_nonzero(conv6, axis = None).numpy()
# 5369

# Original number of paramaters-
tf.math.count_nonzero(best_model.trainable_weights[10], axis = None).numpy()
# 589824

# Assign conv layer6 back to pruned model-
pruned_model.trainable_weights[10].assign(conv6)

# Sanity check-
tf.math.count_nonzero(pruned_model.trainable_weights[10], axis = None).numpy()
# 5369


# Train model for 10 epochs for testing:

# Compile CNN-
pruned_model.compile(
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(learning_rate = 0.01),
    metrics=['accuracy']
)

history = pruned_model.fit(
    x = X_train, y = y_train,
    epochs = 10, validation_data = (X_test, y_test)
)

However, after training when I check the number of non-zero weights:

# first conv layer-
tf.math.count_nonzero(pruned_model.trainable_weights[0], axis = None).numpy()

# sixth conv layer-
tf.math.count_nonzero(pruned_model.trainable_weights[10], axis = None).numpy()

The weights have increased in numbers again. They should have been 133 and 5369, but they are not.

Help?

Arun
  • 2,222
  • 7
  • 43
  • 78

0 Answers0