Runtime Error - element 0 of tensors does not require grad and does not have a grad_fn

Question

I am using a Unet model for semantic segmentation - I have a custom dataset of images and their masks both in .png format. I have looked in the online forums and tried stuff, but not much works? Any suggestions in how to resolve the error or improve the code would be helpful.

model.eval()
with torch.no_grad():
    for xb, yb in val_dl:
        yb_pred = model(xb.to(device))
        # yb_pred = yb_pred["out"].cpu()
        print(yb_pred.shape)
        yb_pred = torch.argmax(yb_pred,axis = 1)     
        break
    
    print(yb_pred.shape)
    

criteron = nn.CrossEntropyLoss(reduction = 'sum')
opt = optim.Adam(model.parameters(), lr = 3e-4)

def loss_batch(loss_func, output, target, opt = None):
    loss = loss_func(output, target)
    
    if opt is not None:
        opt.zero_grad()
        loss.backward()
        opt.step()        
    return loss.item(), None

lr_scheduler = ReduceLROnPlateau(opt, mode = 'min', factor = 0.5, patience= 20, verbose = 1)

def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']
    
current_lr = get_lr(opt)
print('current_lr = {}'.format(current_lr))


def loss_epoch(model, loss_func, dataset_dl, sanity_check = False, opt = None):
    running_loss = 0.0
    len_data = len(dataset_dl.dataset)
    
    for xb, yb in dataset_dl:
        xb = xb.to(device)
        yb = yb.to(device)
        
        # xb = torch.tensor(xbh, requires_grad=True)
                
        output = model(xb)
        
        loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
        running_loss += loss_b
        if sanity_check is True:
            break
    loss = running_loss/float(len_data) 
    return loss, None

def train_val(model, params):
    num_epochs = params["num_epochs"]
    loss_func = params["loss_func"]
    opt = params["optimizer"]
    train_dl = params["train_dl"]
    val_dl = params["val_dl"]
    sanity_check = params["sanity_check"]
    lr_scheduler = params["lr_scheduler"]
    path2weights = params["path2weights"]
    
    loss_history = {"train": [],
                    "val": []}
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    
    for epoch in range(num_epochs):
        current_lr = get_lr(opt)
        print('Epoch {}/{}, current_lr = {}'.format(epoch, num_epochs - 1, current_lr))
        
        with torch.enable_grad():
            model.train()
            train_loss, _ = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
        loss_history["train"].append(train_loss)
        model.eval()
        
        with torch.no_grad():
            val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
        loss_history["val"].append(val_loss)
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), path2weights)
            print("copied best model weights!!")
    
        lr_scheduler.step(val_loss)
        if current_lr != get_lr(opt):
            print("Loading best model weights!!")
            model.load_state_dict(best_model_wts)
            print("train Loss: %.6f" %(train_loss))
            print("val_loss: %.6f" %(val_loss))
            print("-"*20)
            
        model.load_state_dict(best_model_wts)
        return model, loss_history, metric_history
    

path2models = "./models/"
if not os.path.exists(path2models):
    os.mkdir(path2models)
    
param_train = {
    "num_epochs": 10,
    "loss_func": criteron,
    "optimizer": opt,
    "train_dl": train_dl,
    "val_dl": val_dl,
    "sanity_check": False,
    "lr_scheduler": lr_scheduler,
    "path2weights": path2models + "weights.pt"
model, loss_hist, _ = train_val(model, param_train)

The error message looks like:

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

And here is the traceback:

File "<ipython-input-108-1ef24c0b1593>", line 10, in <module>
    model, loss_hist, _ = train_val(model, param_train)

  File "<ipython-input-106-53830bafab8b>", line 27, in train_val
    val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)

  File "<ipython-input-104-5fc229145602>", line 13, in loss_epoch
    loss_b, metric_b = loss_batch(loss_func, output, yb, opt)

  File "<ipython-input-100-68322a002c04>", line 6, in loss_batch
    loss.backward()

  File "C:\Users\W540\anaconda3\lib\site-packages\torch\tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)

  File "C:\Users\W540\anaconda3\lib\site-packages\torch\autograd\__init__.py", line 100, in backward
    allow_unreachable=True)  # allow_unreachable flag

I am not sure which variable to set as require_grad = True or where I should enable grad...

score 12 · Answer 1 · edited Dec 04 '21 at 19:56

12

You can try this before loss.backward():

loss = Variable(loss, requires_grad = True)

Or, because the Variable has been removed from PyTorch (still exists but deprecated), you can do the same thing simply by using following code:

loss.requires_grad = True

edited Dec 04 '21 at 19:56

ouflak

2,458
10
44
49

answered Feb 24 '21 at 17:01

parvaneh shayegh

507
5
13

I have the same error in my code and this answer did not help. – CrackedStone Mar 04 '21 at 01:08
@CrackedStone were you able to solve this? For me it is the same – OuttaSpaceTime Dec 06 '21 at 16:50
Setting the leaf dependencies of the loss with `requires_grad = True` would work as well. – Nir Jun 07 '22 at 11:37

score 2 · Answer 2 · answered May 08 '22 at 21:50

2

I got this error from passing the input instead of the output to the loss function.

output = model(input)
loss = loss_fn(input, target)

The correct code is

loss = loss_fn(output, target)

answered May 08 '22 at 21:50

Tom Huntington

2,260
10
20

score 0 · Answer 3 · answered Dec 06 '21 at 18:46

0

For me calling .retain_grad() before the .backward() solved the issue as stated here

answered Dec 06 '21 at 18:46

OuttaSpaceTime

710
10
24

Runtime Error - element 0 of tensors does not require grad and does not have a grad_fn

3 Answers3

Linked