Using the pima indians diabetes dataset I'm trying to build an accurate model using Keras. I've written the following code:
# Visualize training history
from keras import callbacks
from keras.layers import Dropout
tb = callbacks.TensorBoard(log_dir='/.logs', histogram_freq=10, batch_size=32,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
# Visualize training history
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu', name='first_input'))
model.add(Dense(500, activation='tanh', name='first_hidden'))
model.add(Dropout(0.5, name='dropout_1'))
model.add(Dense(8, activation='relu', name='second_hidden'))
model.add(Dense(1, activation='sigmoid', name='output_layer'))
# Compile model
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# Fit the model
history = model.fit(X, Y, validation_split=0.33, epochs=1000, batch_size=10, verbose=0, callbacks=[tb])
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
After several tries, I've added dropout layers in order to avoid overfitting, but with no luck. The following graph shows that the validation loss and training loss gets separate at one point.
What else could I do to optimize this network?
UPDATE: based on the comments I got I've tweaked the code like so:
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', kernel_regularizer=regularizers.l2(0.01),
activity_regularizer=regularizers.l1(0.01), activation='relu',
name='first_input')) # added regularizers
model.add(Dense(8, activation='relu', name='first_hidden')) # reduced to 8 neurons
model.add(Dropout(0.5, name='dropout_1'))
model.add(Dense(5, activation='relu', name='second_hidden'))
model.add(Dense(1, activation='sigmoid', name='output_layer'))
Here are the graphs for 500 epochs