IIUC and assuming you want to leave your tfp.layers.DistributionLambda
as it is, you have a few options, which can you experiment with:
Option 1: Use two Dense
layers with the Keras
functional API:
# Your code
#[.....]
tfd = tfp.distributions
sample_layer = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
scale=1e-3 + tf.math.softplus(0.05 * t[...,1:])))
def get_df_model():
inputs = tf.keras.layers.Input(shape=[len(df.columns),])
x = tf.keras.layers.Dense(10, activation='relu')(inputs)
x = tf.keras.layers.Dense(10, activation='relu')(x)
outputs1 = tf.keras.layers.Dense(len(target.columns))(x)
outputs2 = tf.keras.layers.Dense(len(target.columns))(x) # there are 2 outputs, so we want a mean + standard deviation for EACH of the outputs
outputs1 = sample_layer(outputs1)
outputs2 = sample_layer(outputs2)
model = tf.keras.Model(inputs, [outputs1, outputs2])
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
return model
model = get_df_model()
model.summary()
model.fit(df, target, epochs=10)
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 1)] 0 []
dense_24 (Dense) (None, 10) 20 ['input_1[0][0]']
dense_25 (Dense) (None, 10) 110 ['dense_24[0][0]']
dense_26 (Dense) (None, 2) 22 ['dense_25[0][0]']
dense_27 (Dense) (None, 2) 22 ['dense_25[0][0]']
distribution_lambda_10 (Distri ((None, 1), 0 ['dense_26[0][0]',
butionLambda) (None, 1)) 'dense_27[0][0]']
==================================================================================================
Total params: 174
Trainable params: 174
Non-trainable params: 0
__________________________________________________________________________________________________
Epoch 1/10
157/157 [==============================] - 1s 2ms/step - loss: 522.2677 - distribution_lambda_10_loss: 247.8716 - distribution_lambda_10_1_loss: 274.3961
Epoch 2/10
157/157 [==============================] - 1s 3ms/step - loss: 20.3496 - distribution_lambda_10_loss: 9.5429 - distribution_lambda_10_1_loss: 10.8067
Epoch 3/10
157/157 [==============================] - 1s 6ms/step - loss: 13.7444 - distribution_lambda_10_loss: 6.6085 - distribution_lambda_10_1_loss: 7.1359
Epoch 4/10
157/157 [==============================] - 1s 7ms/step - loss: 11.3713 - distribution_lambda_10_loss: 5.5506 - distribution_lambda_10_1_loss: 5.8206
Epoch 5/10
157/157 [==============================] - 1s 4ms/step - loss: 10.2081 - distribution_lambda_10_loss: 5.0250 - distribution_lambda_10_1_loss: 5.1830
Epoch 6/10
157/157 [==============================] - 0s 3ms/step - loss: 9.5528 - distribution_lambda_10_loss: 4.7256 - distribution_lambda_10_1_loss: 4.8272
Epoch 7/10
157/157 [==============================] - 0s 2ms/step - loss: 9.1495 - distribution_lambda_10_loss: 4.5393 - distribution_lambda_10_1_loss: 4.6102
Epoch 8/10
157/157 [==============================] - 1s 6ms/step - loss: 8.8837 - distribution_lambda_10_loss: 4.4159 - distribution_lambda_10_1_loss: 4.4678
Epoch 9/10
157/157 [==============================] - 0s 3ms/step - loss: 8.7027 - distribution_lambda_10_loss: 4.3319 - distribution_lambda_10_1_loss: 4.3708
Epoch 10/10
157/157 [==============================] - 0s 3ms/step - loss: 8.5743 - distribution_lambda_10_loss: 4.2724 - distribution_lambda_10_1_loss: 4.3019
<keras.callbacks.History at 0x7f51001c2f50>
Note what the docs state regarding the distributions when using DistributionLambda
:
By default, a distribution is represented as a tensor via a random draw, e.g., tfp.distributions.Distribution.sample
Option 2: Use one Dense
layer and split the output into two:
def get_df_model():
sample_layer = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
scale=1e-3 + tf.math.softplus(0.05 * t[...,1:])))
inputs = tf.keras.layers.Input(shape=[len(df.columns),])
x = tf.keras.layers.Dense(10, activation='relu')(inputs)
x = tf.keras.layers.Dense(10, activation='relu')(x)
x = tf.keras.layers.Dense(2 * len(target.columns))(x)
x1, x2 = tf.split(x, num_or_size_splits=2, axis=-1)
outputs1 = sample_layer(x1)
outputs2 = sample_layer(x2)
model = tf.keras.Model(inputs, [outputs1, outputs2])
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
return model
Option 3: Use slice :2
# Your code
#[.....]
tfd = tfp.distributions
sample_layer = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :2],
scale=1e-3 + tf.math.softplus(0.05 * t[...,2:])))
def get_df_model():
inputs = tf.keras.layers.Input(shape=[len(df.columns),])
x = tf.keras.layers.Dense(10, activation='relu')(inputs)
x = tf.keras.layers.Dense(10, activation='relu')(x)
outputs = tf.keras.layers.Dense(2*len(target.columns))(x)
outputs = sample_layer(outputs)
model = tf.keras.Model(inputs, [outputs])
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
return model
model = get_df_model()
model.summary()
model.fit(df, target, epochs=10)
Additionally: If you want to explicitly use independent distributions based on the parameters x1
and x2
, try:
def get_df_model():
inputs = tf.keras.layers.Input(shape=[len(df.columns),])
x = tf.keras.layers.Dense(10, activation='relu')(inputs)
x = tf.keras.layers.Dense(10, activation='relu')(x)
x = tf.keras.layers.Dense(2 * len(target.columns))(x)
x1, x2 = tf.split(x, num_or_size_splits=2, axis=-1)
outputs1 = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
scale=1e-3 + tf.math.softplus(0.05 * t[...,1:])))(x1)
outputs2 = tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
scale=1e-3 + tf.math.softplus(0.05 * t[...,1:])))(x2)
model = tf.keras.Model(inputs, [outputs1, outputs2])
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
return model