I developed an algorithm of gradient descent, but when I try it with some sklearn exampples the results are incorrect and I do not know how to fix it. This is the full algorithm:
First of all I have a class for an exception and a function to calculate score called rendimiento():
class ClasificadorNoEntrenado(Exception): pass
def rendimiento(clasificador, X, y):
aciertos = 0
total_ejemplos = len(X)
for i in range(total_ejemplos):
ejemplo = X[i]
clasificacion_esperada = y[i]
clasificacion_obtenida = clasificador.clasifica(ejemplo)
if clasificacion_obtenida == clasificacion_esperada:
aciertos += 1
accuracy = aciertos / total_ejemplos
return accuracy
Second, I have a function that calculates sigmoide:
from scipy.special import expit
def sigmoide(x):
return expit(x)
Third, I have the main algorithm:
class RegresionLogisticaMiniBatch():
def __init__(self,clases=[0,1],normalizacion=False,
rate=0.1,rate_decay=False,batch_tam=64):
self.clases = clases;
self.rate = rate;
self.normalizacion = normalizacion;
self.rate_decay = rate_decay;
self.batch_tam = batch_tam;
self.pesos = None
self.media = None
self.desviacion = None
def entrena(self, X, y, n_epochs, reiniciar_pesos=False, pesos_iniciales=None):
self.X = X
self.y = y
self.n_epochs = n_epochs
if reiniciar_pesos or self.pesos is None:
self.pesos = pesos_iniciales if pesos_iniciales is not None else np.random.uniform(-1, 1, size=X.shape[1])
if self.normalizacion:
self.media = np.mean(X, axis=0)
self.desviacion = np.std(X, axis=0)
indices = np.random.permutation(len(X))
X_shuffled = X[indices]
y_shuffled = y[indices]
for i in range(0, len(X), self.batch_tam):
batch_X = X_shuffled[i:i + self.batch_tam]
batch_y = y_shuffled[i:i + self.batch_tam]
# Compute logistic function (sigmoid)
z = np.dot(batch_X, self.pesos)
y_pred = sigmoide(z)
# Compute gradient
error = batch_y - y_pred
gradiente = np.dot(batch_X.T, error) / len(batch_X)
# Update weights
self.pesos += self.rate * gradiente
def clasifica_prob(self, ejemplo):
if self.pesos is None:
raise ClasificadorNoEntrenado("El clasificador no ha sido entrenado")
if self.normalizacion:
ejemplo = (ejemplo - self.media) / self.desviacion
probabilidad = sigmoide(np.dot(ejemplo, self.pesos))
if probabilidad >= 0.5:
return 1
else:
return 0
#return {'no': 1 - probabilidad, 'si': probabilidad}
def clasifica(self,ejemplo):
probabilidad = self.clasifica_prob(ejemplo)
return probabilidad
And finally I try to see if it is correct with a dataset of sklearn:
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()
X_cancer,y_cancer=cancer.data,cancer.target
lr_cancer=RegresionLogisticaMiniBatch(rate=0.1,rate_decay=True,normalizacion=True)
Xe_cancer, Xt_cancer, ye_cancer, yt_cancer = train_test_split(X_cancer, y_cancer);
lr_cancer.entrena(Xe_cancer,ye_cancer,10000)
print(rendimiento(lr_cancer,Xe_cancer,ye_cancer))
print(rendimiento(lr_cancer,Xt_cancer,yt_cancer))
But the results are very random and low.
I tried to developed a Logistic Regression with gradient descent and mini batch algorithm but it not predicts correct, hope someone could help me to fix this.