0

I'm new to Pytorch. And I use the architecture that a pre-trained EfficientNetV2 model to connect to a single fully connected layer with one neuron using the ReLU activation function in regression task. However, both losses on training and validation set suddenly increase after first epoch and keep at about the same value during 50 epochs, then suddenly decrease to about same value as first epoch. Can anyone help me figure out what's happening?

Some codes for model and training process:

# hyper-parameter
image_size = 256
learning_rate = 1e-3
batch_size = 32
epochs = 60

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.net = models.efficientnet_v2_m(pretrained=True,weights='DEFAULT')
        self.net.classifier[1] = nn.Linear(in_features=1280, out_features=1, bias=True)
        self.net.classifier = nn.Sequential(self.net.classifier,nn.ReLU())
    
    def forward(self, input):
        output = self.net(input)
        return output

model = Model()
# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.L1Loss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy():
    
    model.eval()
    loss = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in validation_loader:
            images, labels = data

            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
            # print("The model test will be running on", device, "device")

            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # run the model on the test set to predict labels
            outputs = model(images)
            # the label with the highest energy will be our prediction
            # print('outputs: ',outputs)
            # print('labels: ',labels)
            temp = loss_fn(outputs, labels.unsqueeze(1))
            loss += loss_fn(outputs, labels.unsqueeze(1)).item()
            total += 1

    # compute the accuracy over all test images
    mae = loss/total
    return(mae)
    
   
# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
    
    best_accuracy = 0.0

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model.to(device)
    model.train()

    train_loss_all = []
    val_loss_all = []

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        total = 0

        for i, (images, labels) in tqdm(enumerate(train_loader, 0),total=len(train_loader)):
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels.unsqueeze(1))

            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every one batch
            running_loss += loss.item()     # extract the loss value
            total += 1

        train_loss = running_loss/total
        train_loss_all.append(train_loss)

        accuracy = testAccuracy()
        val_loss_all.append(accuracy)

        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

    history = {'train_loss':train_loss_all,'val_loss':val_loss_all}
    return(history)

Loss curve: loss curve

Cindy
  • 1
  • 1

0 Answers0