Here's my code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
def main():
#loading data and image augmentation
(X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
X_test, X_train, X_val = X_test.astype("float32"), X_train.astype("float32"), X_val.astype("float32")
Y_train, Y_test, Y_val = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10), keras.utils.to_categorical(Y_val, 10)
datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
datagen.fit(X_train)
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
X_val = X_val.reshape(X_val.shape[0], 32, 32, 3)
mean = np.mean(X_train)
std = np.std(X_train)
X_test = (X_test - mean) / std
X_val = (X_val - mean) / std
X_train = (X_train - mean) / std
#constructing ResNet function
def residual_module(layer_in, n_filters, kernel_size, padding, initializer, activation, regularizer, triple=False):
activation2 = 'linear'
filters2 = layer_in.shape[-1]
size2 = 1
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Activation(activation)(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(filters2, size2, padding='same', kernel_regularizer=regularizer)(batch1)
conv2 = layers.Activation(activation2)(conv2)
batch2 = layers.BatchNormalization()(conv2)
if triple == True:
activation2 = activation
filters2 = n_filters
size2 = kernel_size
conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
batch3 = layers.BatchNormalization()(conv3)
layer_out = layers.add([batch3, layer_in])
layer_out = layers.Activation(activation)(layer_out)
else:
layer_out = layers.add([batch2, layer_in])
layer_out = layers.Activation(activation)(layer_out)
return layer_out
#VGG16 model with SIREN
weight_decay = 0.0005
model = keras.Sequential()
input_layer = layers.Input(shape=(32,32,3))
model.add(residual_module(input_layer, n_filters=64, kernel_size=(3,3), padding='same', initializer="he_uniform", activation=tf.math.sin, regularizer=keras.regularizers.l2(weight_decay)))
first = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(first, 128, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay)))
second = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(second, 256, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
third = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(third, 512, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10))
'''
model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_initializer="he_uniform", activation=tf.math.sin, input_shape=(32,32,3), kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10))
'''
#training model
lr = 0.001
loss = keras.losses.CategoricalCrossentropy(from_logits=True)
decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
optim = keras.optimizers.SGD(decayed_lr, momentum=0.9, nesterov=True)
batch_size = 128
#optim = keras.optimizers.Adam(decayed_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
try:
model.load_weights(checkpoint_filepath, custom_objects = {"sin": tf.math.sin})
except Exception as e:
pass
model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch = len(X_train) / batch_size, epochs=25, callbacks = [checkpoint], validation_data=(X_val, Y_val))
model.evaluate(X_test, Y_test, verbose=1)
#saving model
model.save("VGG16.h5")
if __name__ == '__main__':
main()
And here's the error I keep getting:
TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation_2/Sin:0', description="created by layer 'activation_2'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.
The errors so far have mostly been about inputting a custom activation function into the ResNet function I created. For example,
TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation/Sin:0', description="created by layer 'activation'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.
So I thought maybe using
layers.Activation(activation)
instead of just putting the activation in the Conv2D layer would fix it, but that clearly did not work, as you can see. I've also tried defining the custom activation function as a class inheriting from layers.Layer, but that also did not work. I used this code to try that:
class Sin(layers.Layer):
def __init__(self, **kwargs):
super(Sin, self).__init__(**kwargs)
def call(self, inputs):
return tf.math.sin(inputs)
But alas, it did not work.
-Update!
I tried using keras backend, but that failed. I also tried using a lambda layer in my ResNet function. Here's my most recent attempt, which combines both:
#custom sinusoidal activation function
def sin(x):
return K.sin(x)
#constructing ResNet function
def residual_module(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin(x))(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch1)
batch2 = layers.BatchNormalization()(conv2)
layer_out = layers.add([batch2, layer_in])
layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
return layer_out
def residual_module_triple(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin(x))(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(n_filters, kernel_size, padding='same', kernel_regularizer=regularizer)(batch1)
conv2 = layers.Lambda(lambda x: sin(x))(conv2)
batch2 = layers.BatchNormalization()(conv2)
conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
batch3 = layers.BatchNormalization()(conv3)
layer_out = layers.add([batch3, layer_in])
layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
return layer_out