1

Here's my code:

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

def main():
    #loading data and image augmentation
    (X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
    X_test, X_train, X_val = X_test.astype("float32"), X_train.astype("float32"), X_val.astype("float32")
    Y_train, Y_test, Y_val = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10), keras.utils.to_categorical(Y_val, 10)
    datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
    datagen.fit(X_train)
    X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
    X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
    X_val = X_val.reshape(X_val.shape[0], 32, 32, 3)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_test = (X_test - mean) / std
    X_val = (X_val - mean) / std
    X_train = (X_train - mean) / std

    #constructing ResNet function
    def residual_module(layer_in, n_filters, kernel_size, padding, initializer, activation, regularizer, triple=False):
        activation2 = 'linear'
        filters2 = layer_in.shape[-1]
        size2 = 1
        conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
        conv1 = layers.Activation(activation)(conv1)
        batch1 = layers.BatchNormalization()(conv1)
        conv2 = layers.Conv2D(filters2, size2, padding='same', kernel_regularizer=regularizer)(batch1)
        conv2 = layers.Activation(activation2)(conv2)
        batch2 = layers.BatchNormalization()(conv2)
        if triple == True:
            activation2 = activation
            filters2 = n_filters
            size2 = kernel_size
            conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
            batch3 = layers.BatchNormalization()(conv3)
            layer_out = layers.add([batch3, layer_in])
            layer_out = layers.Activation(activation)(layer_out)
        else:
            layer_out = layers.add([batch2, layer_in])
            layer_out = layers.Activation(activation)(layer_out)
        return layer_out

    #VGG16 model with SIREN
    weight_decay = 0.0005
    model = keras.Sequential()
    input_layer = layers.Input(shape=(32,32,3))
    model.add(residual_module(input_layer, n_filters=64, kernel_size=(3,3), padding='same', initializer="he_uniform", activation=tf.math.sin, regularizer=keras.regularizers.l2(weight_decay)))
    first = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(residual_module(first, 128, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay)))
    second = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(residual_module(second, 256, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
    third = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(residual_module(third, 512, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(10))


    '''
    model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_initializer="he_uniform", activation=tf.math.sin, input_shape=(32,32,3), kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(10))
    '''

    #training model
    lr = 0.001
    loss = keras.losses.CategoricalCrossentropy(from_logits=True)
    decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
    optim = keras.optimizers.SGD(decayed_lr, momentum=0.9, nesterov=True)
    batch_size = 128
    #optim = keras.optimizers.Adam(decayed_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
    checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
    checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
    try:
        model.load_weights(checkpoint_filepath, custom_objects = {"sin": tf.math.sin})
    except Exception as e:
        pass
    model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch = len(X_train) / batch_size, epochs=25, callbacks = [checkpoint], validation_data=(X_val, Y_val))
    model.evaluate(X_test, Y_test, verbose=1)

    #saving model
    model.save("VGG16.h5")

if __name__ == '__main__':
    main()

And here's the error I keep getting:

TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation_2/Sin:0', description="created by layer 'activation_2'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.

The errors so far have mostly been about inputting a custom activation function into the ResNet function I created. For example,

TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation/Sin:0', description="created by layer 'activation'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.

So I thought maybe using

layers.Activation(activation)

instead of just putting the activation in the Conv2D layer would fix it, but that clearly did not work, as you can see. I've also tried defining the custom activation function as a class inheriting from layers.Layer, but that also did not work. I used this code to try that:

class Sin(layers.Layer):
        def __init__(self, **kwargs):
            super(Sin, self).__init__(**kwargs)
        def call(self, inputs):
            return tf.math.sin(inputs)

But alas, it did not work.

-Update!

I tried using keras backend, but that failed. I also tried using a lambda layer in my ResNet function. Here's my most recent attempt, which combines both:

#custom sinusoidal activation function
    def sin(x):
        return K.sin(x)

    #constructing ResNet function
    def residual_module(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
        conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
        conv1 = layers.Lambda(lambda x: sin(x))(conv1)
        batch1 = layers.BatchNormalization()(conv1)
        conv2 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch1)
        batch2 = layers.BatchNormalization()(conv2)
        layer_out = layers.add([batch2, layer_in])
        layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
        return layer_out

    def residual_module_triple(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
        conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
        conv1 = layers.Lambda(lambda x: sin(x))(conv1)
        batch1 = layers.BatchNormalization()(conv1)
        conv2 = layers.Conv2D(n_filters, kernel_size, padding='same', kernel_regularizer=regularizer)(batch1)
        conv2 = layers.Lambda(lambda x: sin(x))(conv2)
        batch2 = layers.BatchNormalization()(conv2)
        conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
        batch3 = layers.BatchNormalization()(conv3)
        layer_out = layers.add([batch3, layer_in])
        layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
        return layer_out

1 Answers1

0

I got it up and working so I can resume training!! Here's my fixed code:

def main():
    import os
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
    import tensorflow as tf
    from tensorflow import keras
    from keras import layers
    from keras.datasets import cifar10
    from sklearn.model_selection import train_test_split
    import numpy as np
    import matplotlib.pyplot as plt
    from keras import backend as K

    #loading data and image augmentation
    (X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
    X_test, X_train, X_val = X_test.astype("float32"), X_train.astype("float32"), X_val.astype("float32")
    Y_train, Y_test, Y_val = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10), keras.utils.to_categorical(Y_val, 10)
    datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
    datagen.fit(X_train)
    X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
    X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
    X_val = X_val.reshape(X_val.shape[0], 32, 32, 3)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_test = (X_test - mean) / std
    X_val = (X_val - mean) / std
    X_train = (X_train - mean) / std

    #custom sinusoidal activation function
    class sin(layers.Layer):
        def __init__(self, **kwargs):
            super(sin, self).__init__(**kwargs)
        def call(self, inputs, **kwargs):
            return K.sin(inputs)

    #constructing ResNet function
    def residual_module(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
        inputs = layer_in
        conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
        conv1 = layers.Lambda(lambda x: sin()(x))(conv1)
        batch1 = layers.BatchNormalization()(conv1)
        conv2 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch1)
        batch2 = layers.BatchNormalization()(conv2)
        layer_out = layers.add([batch2, inputs])
        layer_out = layers.Lambda(lambda x: sin()(x))(layer_out)
        return layer_out

    def residual_module_triple(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
        inputs = layer_in
        conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
        conv1 = layers.Lambda(lambda x: sin()(x))(conv1)
        batch1 = layers.BatchNormalization()(conv1, training=True)
        conv2 = layers.Conv2D(n_filters, kernel_size, padding='same', kernel_regularizer=regularizer)(batch1)
        conv2 = layers.Lambda(lambda x: sin()(x))(conv2)
        batch2 = layers.BatchNormalization()(conv2, training=True)
        conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
        batch3 = layers.BatchNormalization()(conv3, training=True)
        layer_out = layers.add([batch3, inputs])
        layer_out = layers.Lambda(lambda x: sin()(x))(layer_out)
        return layer_out

    #VGG16 model with SIREN
    weight_decay = 0.0005
    inputs = layers.Input(shape=(32,32,3))
    x = residual_module(inputs, n_filters=64, kernel_size=(3,3), padding='same', initializer="he_uniform", regularizer=keras.regularizers.l2(weight_decay))
    x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = residual_module(x, 128, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
    x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = residual_module_triple(x, 256, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
    x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = residual_module_triple(x, 512, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
    x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(10)(x)
    model = keras.Model(inputs, outputs)

    #training model
    lr = 0.001
    loss = keras.losses.CategoricalCrossentropy(from_logits=True)
    decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
    optim = keras.optimizers.SGD(decayed_lr, momentum=0.9, nesterov=True)
    batch_size = 128
    model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
    checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
    checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
    try:
        model.load_weights(checkpoint_filepath, custom_objects = {"sin": tf.math.sin})
    except Exception as e:
        pass
    model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch = len(X_train) / batch_size, epochs=25, callbacks = [checkpoint], validation_data=(X_val, Y_val))
    model.evaluate(X_test, Y_test, verbose=1)

    #saving model
    model.save("VGG16.h5")

if __name__ == '__main__':
    main()

Two issues with my code were causing the errors:

  1. I found out that when I was adding a layer, it didn't return anything (e.g. "first" was just a NoneType), so I used "x" as the output of each variable and chained them together with the Model class.
  2. I figured out how to modify the "sin" class I defined to accept multiple arguments. Also, I decided to pass a class function to the Lambda layer instead of a function because it kept passing the Tensor returned from function instead of the function itself.