I'm trying to implement a very naive gradient descent in python. However, it looks like it goes into an infinite loop. Could you please help me debug it?
y = lambda x : x**2
dy_dx = lambda x : 2*x
def gradient_descent(function,derivative,initial_guess):
optimum = initial_guess
while derivative(optimum) != 0:
optimum = optimum - derivative(optimum)
else:
return optimum
gradient_descent(y,dy_dx,5)
Edit:
Now I have this code, I really can't comprehend the output. P.s. It might freeze your CPU.
y = lambda x : x**2
dy_dx = lambda x : 2*x
def gradient_descent(function,derivative,initial_guess):
optimum = initial_guess
while abs(derivative(optimum)) > 0.01:
optimum = optimum - 2*derivative(optimum)
print((optimum,derivative(optimum)))
else:
return optimum
gradient_descent(y,dy_dx,5)
Now I'm trying to apply it to a regression problem, however the output doesn't appear to be correct as shown in the output below:
Output of gradient descent code below
import matplotlib.pyplot as plt
def stepGradient(x,y, step):
b_current = 0
m_current = 0
b_gradient = 0
m_gradient = 0
N = int(len(x))
for i in range(0, N):
b_gradient += -(1/N) * (y[i] - ((m_current*x[i]) + b_current))
m_gradient += -(1/N) * x[i] * (y[i] - ((m_current * x[i]) + b_current))
while abs(b_gradient) > 0.01 and abs(m_gradient) > 0.01:
b_current = b_current - (step * b_gradient)
m_current = m_current - (step * m_gradient)
for i in range(0, N):
b_gradient += -(1/N) * (y[i] - ((m_current*x[i]) + b_current))
m_gradient += -(1/N) * x[i] * (y[i] - ((m_current * x[i]) + b_current))
return [b_current, m_current]
x = [1,2, 2,3,4,5,7,8]
y = [1.5,3,1,3,2,5,6,7]
step = 0.00001
(b,m) = stepGradient(x,y,step)
plt.scatter(x,y)
abline_values = [m * i + b for i in x]
plt.plot(x, abline_values, 'b')
plt.show()
Fixed :D
import matplotlib.pyplot as plt
def stepGradient(x,y):
step = 0.001
b_current = 0
m_current = 0
b_gradient = 0
m_gradient = 0
N = int(len(x))
for i in range(0, N):
b_gradient += -(1/N) * (y[i] - ((m_current*x[i]) + b_current))
m_gradient += -(1/N) * x[i] * (y[i] - ((m_current * x[i]) + b_current))
while abs(b_gradient) > 0.01 or abs(m_gradient) > 0.01:
b_current = b_current - (step * b_gradient)
m_current = m_current - (step * m_gradient)
b_gradient= 0
m_gradient = 0
for i in range(0, N):
b_gradient += -(1/N) * (y[i] - ((m_current*x[i]) + b_current))
m_gradient += -(1/N) * x[i] * (y[i] - ((m_current * x[i]) + b_current))
return [b_current, m_current]
x = [1,2, 2,3,4,5,7,8,10]
y = [1.5,3,1,3,2,5,6,7,20]
(b,m) = stepGradient(x,y)
plt.scatter(x,y)
abline_values = [m * i + b for i in x]
plt.plot(x, abline_values, 'b')
plt.show()