Running the following code snippet
torch.nn.CrossEntropyLoss()(torch.Tensor([0]), torch.Tensor([1]))
returns
tensor(-0.)
How can this be? Am I missing something fundamental about this problem?
I have a super simple Feed-Forward NN model:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class FeedForwardNeuralNet(nn.Module):
def __init__(self, S1, S2, S3):
super(FeedForwardNeuralNet, self).__init__()
self.linear1 = nn.Linear(S1, S2)
self.linear2 = nn.Linear(S2, S3)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
return x
def train(data, S1, S2, S3, weight_decay, loss_fn, learning_rate=0.01):
model = FeedForwardNeuralNet(S1, S2, S3)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)
#py torch data loader
for input, target in data:
optimizer.zero_grad() # clear gradients
output = model(input) # forward pass
loss = loss_fn(output, target) # calculate loss
print(loss.item())
loss.backward() # calculate gradients
optimizer.step() # update weights
return model
And the loss remains at zero.
My data is in the shape:
X: [float, float] Y: {0 or 1}
from sklearn.model_selection import train_test_split
X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size=0.8)
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5)
X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
X_val = torch.from_numpy(X_val).type(torch.FloatTensor)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_train = torch.from_numpy(np.array(list(map(lambda a: \[a\], y_train)))).type(torch.FloatTensor)
y_val = torch.from_numpy(np.array(list(map(lambda a: \[a\], y_val)))).type(torch.FloatTensor)
y_test = torch.from_numpy(np.array(list(map(lambda a: \[a\], y_test)))).type(torch.FloatTensor)
train_data = torch.utils.data.TensorDataset(X_train, y_train)
validation_data = torch.utils.data.TensorDataset(X_val, y_val)
test_data = torch.utils.data.TensorDataset(X_test, y_test)
batch_size=1000
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(validation_data, batch_size=len(validation_data), shuffle=False) # use the entire val dataset in one batch
test_loader = torch.utils.data.DataLoader(test_data, batch_size=len(test_data), shuffle=False) # use the entire test dataset in one batch
I run train with the following parameters
train(data=train_loader, S1=2, S2=S2, S3=1, weight_decay=0.1, loss_fn=nn.CrossEntropyLoss())