I don't know the the operating mechanism of lasagne functions. for the code below.
class WScaleLayer(lasagne.layers.Layer):
def __init__(self, incoming, **kwargs):
super(WScaleLayer, self).__init__(incoming, **kwargs)
W = incoming.W.get_value()
scale = np.sqrt(np.mean(W ** 2))
incoming.W.set_value(W / scale)
self.scale = self.add_param(scale, (), name='scale', trainable=False)
self.b = None
if hasattr(incoming, 'b') and incoming.b is not None:
b = incoming.b.get_value()
self.b = self.add_param(b, b.shape, name='b', regularizable=False)
del incoming.params[incoming.b]
incoming.b = None
self.nonlinearity = lasagne.nonlinearities.linear
if hasattr(incoming, 'nonlinearity') and incoming.nonlinearity is not None:
self.nonlinearity = incoming.nonlinearity
incoming.nonlinearity = lasagne.nonlinearities.linear
def get_output_for(self, v, **kwargs):
v = v * self.scale
if self.b is not None:
pattern = ['x', 0] + ['x'] * (v.ndim - 2)
v = v + self.b.dimshuffle(*pattern)
return self.nonlinearity(v)
Can you tell me whether self.scale is constant in the training process after initialization?