This is a simple code to show the problem
import torch
X = torch.arange(-3, 3, step=0.1)
Y = X * 3
Y += 0.1 * torch.randn(Y.shape)
def my_train_model(iter):
w = torch.tensor(-15.0, requires_grad=True)
lr = 0.1
for epoch in range(iter):
print(w.grad)
yhat = w * X
loss = torch.mean((yhat - Y) ** 2)
loss.backward()
with torch.no_grad():
print(w.grad)
w = w - lr * w.grad # gradient exists if w-= lr*w.grad
print(w.grad)
w.grad.zero_()
print(loss)
my_train_model(4)
This sets the w.grad to none
after performing the w = w - lr * w.grad
, but the problem will be solved if I use w -= lr * w.grad
instead of the above expression!
What is the problem with the first expression which sets w.grad
to none
?