I converted the following code from Keras to Pytorch. The main challenge here for me is to make multi-inputs and multi-outputs model similar to keras.models.Model
. As how to implement the following code, in Pytorch, to accept the multi input and outputs.
from tensorflow import keras as k
import tensorflow as tf
class NetworkKeys:
NUM_UNITS = "num_units"
ACTIVATION = "activation"
L2_REG_FACT = "l2_reg_fact"
DROP_PROB = "drop_prob"
BATCH_NORM = "batch_norm"
def build_dense_network(input_dim, output_dim,
output_activation, params, with_output_layer=True):
model = k.models.Sequential()
activation = params.get(NetworkKeys.ACTIVATION, "relu")
l2_reg_fact = params.get(NetworkKeys.L2_REG_FACT, 0.0)
regularizer = k.regularizers.l2(l2_reg_fact) if l2_reg_fact > 0 else None
drop_prob = params.get(NetworkKeys.DROP_PROB, 0.0)
batch_norm = params.get(NetworkKeys.BATCH_NORM, False)
last_dim = input_dim
for i in range(len(params[NetworkKeys.NUM_UNITS])):
model.add(k.layers.Dense(units=params[NetworkKeys.NUM_UNITS][i],
kernel_regularizer=regularizer,
input_dim=last_dim))
if batch_norm:
model.add(k.layers.BatchNormalization())
model.add(k.layers.Activation(activation))
last_dim = params[NetworkKeys.NUM_UNITS][i]
if drop_prob > 0.0:
model.add(k.layers.Dropout(rate=drop_prob))
if with_output_layer:
model.add(k.layers.Dense(units=output_dim, activation=output_activation))
return model
ldre_net = build_dense_network(input_dim=input_dim, output_dim=1,
output_activation=k.activations.linear,
params=hidden_params)
p_samples = k.layers.Input(shape=(input_dim,))
q_samples = k.layers.Input(shape=(input_dim,))
train_model = k.models.Model(inputs=[p_samples, q_samples],
outputs=[ldre_net(p_samples),ldre_net(q_samples)])
Here is my attempt to convert the above code to Pytorch code:
def l2_penalty(model, l2_lambda=0.001):
"""Returns the L2 penalty of the params."""
l2_norm = sum(p.pow(2).sum() for p in model.parameters())
return l2_lambda*l2_norm
def build_dense_network(input_dim, output_dim,
output_activation, params, with_output_layer=True):
activation = params.get(NetworkKeys.ACTIVATION, "relu")
l2_reg_fact = params.get(NetworkKeys.L2_REG_FACT, 0.0)
drop_prob = params.get(NetworkKeys.DROP_PROB, 0.0)
batch_norm = params.get(NetworkKeys.BATCH_NORM, False)
layers=[]
last_dim = input_dim
for i in range(len(params[NetworkKeys.NUM_UNITS])):
layers.append(nn.Linear(last_dim,params[NetworkKeys.NUM_UNITS][i]))
if batch_norm:
layers.append(torch.nn.BatchNorm1d(params[NetworkKeys.NUM_UNITS][i]))
if activation=="relu":
layers.append(nn.ReLU())
elif activation=="LeakyRelu":
layers.append(nn.LeakyReLU(0.1,inplace=True))
else:
pass
last_dim = params[NetworkKeys.NUM_UNITS][i]
if drop_prob > 0.0:
layers.append(torch.nn.Dropout(p=drop_prob))
if with_output_layer:
layers.append(nn.Linear(params[NetworkKeys.NUM_UNITS][-1],output_dim))
model = nn.Sequential(*layers)
regularizer = l2_penalty(model, l2_lambda=0.001) if l2_reg_fact > 0 else None
return model, regularizer
class Split(torch.nn.Module):
def __init__(self, module, n_parts: int, dim=1):
super().__init__()
self._n_parts = n_parts
self._dim = dim
self._module = module
def forward(self, inputs):
output = self._module(inputs)
chunk_size = output.shape[self._dim] // self._n_parts
return torch.split(output, chunk_size, dim=self._dim)
class Net(nn.Module):
def __init__(self, hidden_params, input_dim):
self._ldre_net, ldre_regularizer = build_dense_network(input_dim=input_dim,
output_dim=1,output_activation="linear", params=hidden_params)
self._p_samples = nn.Linear(input_dim,input_dim)
self._q_samples = nn.Linear(input_dim,input_dim)
self._split_layers = Split(
self._ldre_net,
n_parts=2,
dim = 0
)
def forward(self, x, inTrain=True):
if inTrain:
p = self._p_samples(x)
q = self._q_samples(x)
p = x[:, 0, :]
q = x[:, 1, :]
combined = torch.cat((p.view(p.size(0), -1),
q.view(q.size(0), -1)), dim=0)
p_output, q_output =self._split_layers(combined)
return p_output, q_output
else:
return self._ldre_net(x)
I am wondering whether my implementation in the Net class
is correct or not?