I will suggest you to do it via a customized layer instead of the Lambda
layer. Why? A customized will give you more freedom to do stuffs, and it is also more transparent in terms of viewing your desired weights. More precisely, if you do it through Lambda
layer, the constant weight will not be saved as a part of the model, but it will if you use a customized layer.
Here is an example
from keras import backend as K
from keras.layers import *
from keras.models import *
import numpy as np
class MyLayer(Layer) :
# see https://keras.io/layers/writing-your-own-keras-layers/
def __init__(self,
w_vec=None,
allow_training=False,
**kwargs) :
self._w_vec = w_vec
assert allow_training or (w_vec is not None), \
"ERROR: non-trainable w_vec must be initialized"
self.allow_training = allow_training
super().__init__(**kwargs)
return
def build(self, input_shape) :
batch_size, num_feats = input_shape
self.w_vec = self.add_weight(shape=(1, num_feats),
name='w_vec',
initializer='uniform', # <- use your own preferred initializer
trainable=self.allow_training,)
if self._w_vec is not None :
# predefined w_vec
assert self._w_vec.shape[1] == num_feats, \
"ERROR: initial w_vec shape mismatches the input shape"
# set it to the weight
self.set_weights([self._w_vec]) # <- set weights to the supplied one
super().build(input_shape)
return
def call(self, x) :
# Given:
# x = [H21, H22, H23]
# w_vec = [w1, w2, w3]
# Step 1: output elem_prod
# elem_prod = [H21*w1, H22*w2, H23*w3]
elem_prod = x * self.w_vec
# Step 2: output ret
# ret = (H21*w1) * (H22*w2) * (H23*w3)
ret = K.prod(elem_prod, axis=-1, keepdims=True)
return ret
def compute_output_shape(self, input_shape) :
return (input_shape[0], 1)
def make_test_cases(w_vec=None, allow_training=False):
x = Input(shape=(75,))
y = Dense(75, activation='tanh', name='fc1')(x)
y = Dense(3, activation='tanh', name='fc2')(y)
y = MyLayer(w_vec, allow_training, name='core')(y)
y = Dense(1, name='fc3')(y)
net = Model(inputs=x, outputs=y, name='{}-{}'.format( 'randomInit' if w_vec is None else 'assignInit',
'trainable' if allow_training else 'nontrainable'))
print(net.name)
print(net.layers[-2].get_weights()[0])
print(net.summary())
return net
And you may run the following test cases to see the differences (pay attention to the first and the last lines in the print out, which gives you the initial values and the number of constant parameters, respectively)
a. Constant weights, non-trainable
m1 = make_test_cases(w_vec=np.arange(3).reshape([1,3]), allow_training=False)
will give you
assignInit-nontrainable [[0. 1. 2.]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,930
Non-trainable params: 3
_________________________________________________________________
b. Constant weights, trainable
m2 = make_test_cases(w_vec=np.arange(3).reshape([1,3]), allow_training=True)
will give you
assignInit-trainable [[0. 1. 2.]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_5 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,933
Non-trainable params: 0
_________________________________________________________________
c. Random weights, trainable
m3 = make_test_cases(w_vec=None, allow_training=True)
will give you
randomInit-trainable [[ 0.02650297 -0.02010062 -0.03771694]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_6 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,933
Non-trainable params: 0
_________________________________________________________________
Final remark
I will say it is unclear which case may work better in advance for your problem, but trying all three sounds like a good plan.