I have a simple neural network with two outputs and for each of them I need to use different activation function. I do basically what is written in this article - here, but it looks like my layer with different activation functions is not working:
See my code below:
X = filled_df.loc[:, "SOUTEZ_MEAN_HOME":"TOTAL_POINTS_AWAY"].values
y = filled_df.loc[:, "HOME_YELLOW_CARDS"].values
X= X.astype("float32")
y= y.astype("float32")
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
def negative_binomial_layer(x):
# Get the number of dimensions of the input
num_dims = len(x.get_shape())
# Separate the parameters
n, p = tf.unstack(x, num=2, axis=-1)
# Add one dimension to make the right shape
n = tf.expand_dims(n, -1)
p = tf.expand_dims(p, -1)
# Apply a softplus to make positive
n = tf.cast(n, tf.float32)
p = tf.cast(p, tf.float32)
n = tf.keras.activations.softplus(n)
# Apply a sigmoid activation to bound between 0 and 1
p = tf.keras.activations.sigmoid(p)
# Join back together again
out_tensor = tf.concat((n, p), axis=num_dims-1)
return out_tensor
input_shape = (212, )
# Define inputs with predefined shape
inputs = Input(shape=input_shape)
# Build network with some predefined architecture
Layer1 = Dense(16)
Layer2 = Dense(8)
output1 = Layer1(inputs)
output2 = Layer2(output1)
# Predict the parameters of a negative binomial distribution
outputs = Dense(2)(output2)
#outputs = tf.cast(outputs, tf.float32)
distribution_outputs = Lambda(negative_binomial_layer)(outputs)
# Construct model
model = Model(inputs=inputs, outputs=outputs)
num_epochs = 10
opt = Adam()
model.compile(loss = negative_binomial_loss, optimizer = opt)
history = model.fit(X_train, y_train, epochs = num_epochs,
validation_data = (X_test, y_test))
These are my predicted values if I print y_pred in custom loss function:
Epoch 1/10
y_pred = [[2.19472528 3.14479065]
[-1.16056371 1.69369149]
[-1.12327099 2.06830978]
...
[-1.23587477 4.82307]
[0.235431105 3.86740351]
[-2.75554061 1.10352468]] [[[2.19472528 3.14479065]
[-1.16056371 1.69369149]
[-1.12327099 2.06830978]
...
[-1.23587477 4.82307]
[0.235431105 3.86740351]
[-2.75554061 1.10352468]]]
Second predicted value p should be between 0 and 1 and since it is out of this range I am getting nan during counting loss.
Any suggestions? Thanks