Here is a regression model where I attempt to predict the y values (outputs) from x values (inputs) . Each class is given a different mean and normalized with l2 normalization:
x_values = sklearn.preprocessing.normalize(x_values, norm="l2")
This may appear as a classification problem attempting to be solved using regression. I'm attempting to understand multiclass regression in PyTorch, as the PyTorch doc gives the following example which suggests multiclass regression is possible:
>>> loss = nn.MSELoss()
>>> input = torch.randn(3, 5, requires_grad=True)
>>> target = torch.randn(3, 5)
>>> output = loss(input, target)
>>> output.backward()
src: https://pytorch.org/docs/master/generated/torch.nn.MSELoss.html
Entire code:
% reset - f
from datetime import datetime
from sklearn import metrics
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as data_utils
import torch.nn as nn
import torch.nn.functional as F
import random
from torch.autograd import Variable
import pandas as pd
import unittest
import time
from collections import Counter
import sklearn
x_values = []
y_values = []
input_size = 17
lr = .1
# Class1
mu, sigma = 0, 0.1 # mean and standard deviation
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
# Class2
mu, sigma = 5, 0.5 # mean and standard deviation
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
# Class3
mu, sigma = 10, 1.0 # mean and standard deviation
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
# Class4
mu, sigma = 15, 1.5 # mean and standard deviation
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
# Class5
mu, sigma = 20, 2.0 # mean and standard deviation
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values.append(np.random.normal(mu, sigma, input_size))
x_values = sklearn.preprocessing.normalize(x_values, norm="l2")
y_values.append(0)
y_values.append(0)
y_values.append(0)
y_values.append(1)
y_values.append(1)
y_values.append(1)
y_values.append(2)
y_values.append(2)
y_values.append(2)
y_values.append(3)
y_values.append(3)
y_values.append(3)
y_values.append(4)
y_values.append(4)
y_values.append(4)
num_classes = len(y_values)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.criterion = torch.nn.MSELoss()
self.model = torch.nn.Sequential(
torch.nn.Linear(input_size, 100),
torch.nn.ReLU(),
torch.nn.Linear(100, 50),
torch.nn.ReLU(),
torch.nn.Linear(50, num_classes)
# torch.nn.ReLU()
)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
def update(self, state, action):
y_pred = self.model(torch.Tensor(state))
loss = self.criterion(y_pred, Variable(torch.Tensor(action)))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return loss
def predict(self, s):
with torch.no_grad():
return self.model(torch.Tensor(s))
def weights_init(m):
if type(m) == nn.Linear:
m.weight.data.normal_(0.0, 1)
model = NeuralNet()
model.apply(weights_init)
print('len(states)', len(x_values))
i = 0
for s in range(7000):
if i == 15:
i = 0
x = x_values[i]
loss_value = model.update(x, y_values)
if s % 1000 == 0:
print('loss_value', loss_value)
i = i + 1
Predicting on the x_values
:
[torch.argmax(model.predict(s)) for s in x_values]
returns:
[tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14),
tensor(14)]
As I have defined classes with difference means and the final loss value is low (4.7370e-15) I expect the predicted values to be closer to:
[tensor(0)
tensor(0),
tensor(0),
tensor(1),
tensor(1),
tensor(1),
tensor(2),
tensor(2),
tensor(2),
tensor(3),
tensor(3),
tensor(3),
tensor(4),
tensor(4),
tensor(4)]
What are the predicted outputs not closed to my expectation?
Have I set up the model incorrectly?