I need some help or an idea about what's going wrong in my code. I am trying to implement the SGD regressor using l2, but the bias in my model reaches very high value (when alpha is above 10). I think something is wrong with gradient and its calculations.
This is my SGD class:
class SGDRegression1():
def __init__(self, n_iter, eta0, seed = None, alpha = None) -> None:
self.n_iter = n_iter
self.eta0 = eta0
self.alpha = alpha
if seed is not None:
self.rng = np.random.default_rng(seed=seed)
def __add_bias(self, X):
return np.c_[np.ones((X.T.shape[-1],1)), X]
def learning_schedule_optimal(self, t):
return 1 / (t + self.eta0)
def fit(self, X:np.ndarray, y):
X_b = self.__add_bias(X)
self.theta = self.rng.standard_normal((X_b.shape[1], 1))
m = len(X_b)
for epoch in range(self.n_iter):
for iteration in range(m):
sample_index = self.rng.integers(m)
Xi = X_b[sample_index: sample_index+1]
yi = y[sample_index: sample_index+1]
# l2 penalty
l2 = self.alpha*self.theta
l2[0] = 0
gradient = Xi.T @ (Xi @ self.theta - yi) + l2
eta = self.learning_schedule_optimal(epoch * m + iteration + 1)
self.theta -= eta * gradient
self.intercept_ = self.theta[0]
self.coef_ = self.theta[1:]
def predict(self, X):
return self.__add_bias(X) @ self.theta
Following are the call methods and additional code:
import numpy as np
import matplotlib.pyplot as plt
X = 6 * np.random.rand(m, 1) - 3
y = 0.25 * X + np.random.randn(m, 1) + 2
X_test = np.linspace(-3, 3, m).reshape(m, 1)
plt.plot(X, y, "g.", label="Predictions")
plt.legend()
sgd = SGDRegression1(n_iter=1, eta0=0.01, seed=42, alpha=10)
sgd.fit(X, y)
print(sgd.coef_, sgd.intercept_)
y_pred = sgd.predict(X_test)
plt.plot(X_test, y_pred, "b-")
plt.grid(True)
plt.show()
I tried to change the learning schedule and GD algorithm but it changed nothing.
How is gradient descent calculated in sklearn
with offset and l2 regularization?