I used the pretrained EfficientNet-v2-b3 model from 'https://github.com/rwightman/pytorch-image-models' and the pytorch framework to train the cigarette box. The train is as follows:
There are 1100 classes each of which is one cigarette specification. All the images are stored in a directory named original_dataset_20210805 each sub-directory of which accounts for a class of images.
Remove the classes each of which has only less than 50 images. There are 959 classes remained.
For each class, random select 10 images into the validation dataset named 'valData', random select about 1/10 images into the test dataset named 'testData', and the remained images are selected into the train dataset named 'trainData'.
For each image, resize it into w×h = 200×300.
To augment data, rotate each image with the angle of 90°, and all the images with rotation 90° of each class are selected into one class. For example, if there is one cigarette specification A, then rotate all the images of A by 90° and all the rotated images are named a new class A-rot1. Then rotate 180° to obtain A-rot2 and rotate 270° to A-rot3. Carry the rotations on all classes, then we have 959×4=3836 classes.
The 'trainData' has 502172 images, the 'valData' has 38360 images, and the 'testData' has 21463 images.
Use the pretrained model to start train. Save the best model as follow:
if train_acc > last_train_acc and val_acc > last_val_acc:
save_best_model()
Exit train if train_acc >= 0.99 and val_acc >= 0.99
At Epoch 121, the train exits with train_acc 0.9911 and val_acc 0.9902.
Use the best model to infer the testData, and the accuracy is 0.981. Using the best model to infer trainData, I expect the accuracy should be more than 0.99 but actually it is 0.84. Using the model on valData, the actual accuracy is 0.82. This is very strange. Then I use the best model on another original_dataset_20210709 which is some different from the above original_dataset_20210805. And the images in original_dataset_20210709 haven't been resized into w×h=200×300. The accuracy is 0.969.
The infer codes are as follows:
def infer(cfg:Config):
transform_test = build_transforms(cfg.img_height, cfg.img_width, 'test')
model = get_model(cfg, 'test')
model = model.to(cfg.get_device())
model.eval()
records = []
sub_classes = os.listdir(cfg.test_data_dirname)
if sub_classes is None or len(sub_classes) < 1:
return
sub_classes= sorted(sub_classes)
classid_dict = {}
with open(cfg.classid_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
tokens = line.split(',')
classid_dict[int(tokens[0])] = tokens[1]
records.append(cfg.test_data_dirname + ',' + str(len(sub_classes)) + ' classes\n')
records.append('image, prediction result\n')
start_time = datetime.now()
elapsed = 0.0
count = 0
with torch.no_grad():
for sub_cls in sub_classes:
print(' process sub-directory' + sub_cls)
files = os.listdir(os.path.join(cfg.test_data_dirname, sub_cls))
count += len(files)
if files is None or len(files) < 1:
print('The sub-directory ' + sub_cls + " has no files")
continue
for file in files:
try:
img_path = os.path.join(cfg.test_data_dirname, sub_cls, file)
if os.path.isfile(img_path):
img_test = Image.open(img_path)
img = img_test
img = transform_test(img).to(cfg.get_device())
img = torch.unsqueeze(img, 0)
output = model(img)
_, preds = torch.max(output.data, 1)
id = preds[0].item()
if classid_dict.get(id) is not None:
#print(img_path + ' is predicted as:' + classid_dict[id])
records.append(sub_cls + '/' + file + ',' + classid_dict[id] + '\n')
log_func(sub_cls + '/' + file + ' is predicted as:' + classid_dict[id])
pass
else:
records.append(sub_cls + '/' + file + ', unknown class\n')
except Exception as e:
print(str(e))
elapsed = (datetime.now() - start_time).total_seconds()
records.append('elapsed {:.4f} sec,average elapsed {:.4f} sec\n'.format(elapsed, elapsed/count))
result_path = os.path.join(cfg.results_dir, 'infer_' + cfg.backbone + '_' + str(cfg.num_classes) + '_' + format_datetime(datetime.now()) + '.csv')
with open(result_path, 'w', encoding='utf-8') as f:
f.writelines(records)
- I check the python codes and find the possible reanson maybe is from the transform on the image before fed into the model. The transform codes are as follows:
def build_transforms(img_height, img_width, run_mode="train", mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
if run_mode == 'train':
transform = T.Compose([
# Use OpenCV to open the image
T.Lambda(lambda img: random_rotate_bound(img, 30)),
T.Lambda(lambda img: random_translate(img, 20)),
T.Lambda(lambda img: random_zoom(img)),
T.Lambda(lambda img: sameScaleZoom(img, img_height, img_width)),
T.RandomChoice([T.Lambda(lambda img: random_AffineTransform(img)),
T.Lambda(lambda img: random_warpPerspective(img))]),
T.RandomChoice([T.Lambda(lambda img: random_degarde_img(img)),
T.Lambda(lambda img: random_mosaic(img)),
T.Lambda(lambda img: random_motion_blur(img)),
T.Lambda(lambda img: random_focus_blur(img))]),
# Convert the OpenCV-format image into PIL before continue
T.ToPILImage('RGB'),
T.RandomOrder([T.ColorJitter(brightness=0.5),
T.ColorJitter(contrast=(0.2, 1.8)),
T.ColorJitter(saturation=(0.2, 1.8)),
T.ColorJitter(hue=0.08)]),
T.ToTensor(),
T.Normalize(mean, std)
])
else:
transform = T.Compose([
#T.Lambda(lambda img: sameScaleZoom(img, img_height, img_width)),
# On this case, use PIL rather than OpenCV to open the image
T.Resize(size=(img_height, img_width)),
T.ToTensor(),
T.Normalize(mean, std)
])
return transform
To verify my guess, for the infer dataset 'valData' (not use 'trainData' because it takes too much time), I change the transform from transform_test = build_transforms(cfg.img_height, cfg.img_width, 'test')
into transform_test = build_transforms(cfg.img_height, cfg.img_width, 'train')
. Expectedly, the accuracy is 0.9918.
My question is:
- In reference, the trained model has an accuracy of 0.989 on the testData, but an accuracy of about 0.84 on the trainData and about 0.82 on the valData.
- What I do wrong in the transform?
- Or is there other reason to cause such a strange phenomenon?
Thanks all people who are willing to answer the question.
Appened 1: 12) The validation code is as follows:
def val(cfg:Config, model, criterion, transform=None):
start_time = datetime.now()
val_loss = 0
total = 0
val_correct = 0
model.eval()
if transform is None:
transform = build_transforms(cfg.img_height, cfg.img_width)
dset_loader, dset_size = load_data(cfg, transform, run_mode='val', shuffle=False)
for data in dset_loader:
inputs, labels = data
if cfg.is_use_cuda:
#inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
inputs = inputs.cuda()
labels = torch.stack([anno.cuda() for anno in labels])
else:
#inputs, labels = Variable(inputs), Variable(labels)
pass
with torch.no_grad():
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs.data, 1)
val_loss += loss.data.item()*inputs.size(0)
val_correct += torch.sum(preds == labels.data)
val_loss /= dset_size
val_acc = val_correct.item()*1.0/dset_size
elapsed = (datetime.now() - start_time).total_seconds()
log_func('exit val,{} samples,elapsed {:.4f} sec,average elapsed{:.4f} sec'.format(dset_size, elapsed, elapsed/dset_size))
return val_loss, val_acc
- The load_data code is:
def load_data(cfg:Config, transform, run_mode='train', shuffle=True):
if run_mode == 'train':
dataset = TheDataset(cfg, transform, run_mode)
data_loader = DataLoader(dataset, batch_size=cfg.train_batch_size, shuffle=shuffle, num_workers=cfg.num_workers)
return data_loader, len(dataset)
else:
dataset = TheDataset(cfg, transform, run_mode)
data_loader = DataLoader(dataset, batch_size=cfg.val_batch_size, shuffle=shuffle, num_workers=cfg.num_workers)
return data_loader, len(dataset)
- The class 'TheDataset' is defined as follows:
class TheDataset(Dataset):
def __init__(self, cfg:Config, transforms, run_mode='train') -> None:
super().__init__()
self.img_mode = cfg.img_mode
self.transforms = transforms
self.config = cfg
self.run_mode = run_mode
assert cfg is not None, "The config object cannot be none"
assert cfg.train_data_dirname is not None, "The train data cannot be none"
assert transforms is not None, 'The transforms cannot be none'
self.label_list = list()
self.path_list = list()
self.label_2_path_index_list = {} # Key:the label,value:a list each element of which is the index of the image file path related to the key in path_list
if run_mode == 'train':
self.dirname = cfg.train_data_dirname
self.file_path = cfg.train_data_file_list
elif run_mode == 'val':
self.dirname = cfg.val_data_dirname
self.file_path = cfg.val_data_file_list
elif run_mode == 'test':
self.dirname = cfg.test_data_dirname
self.file_path = cfg.test_data_file_list
else:
self.dirname = cfg.train_data_dirname
self.file_path = cfg.train_data_file_list
index = 0
with open(self.file_path, 'r') as f:
for line in f:
if line is not None and len(line) > 5:
a_path, a_label = line.strip().split(',')
if a_path is not None and a_label is not None:
a_label = int(a_label)
self.path_list.append(os.path.join(self.dirname, a_path.strip()))
self.label_list.append(a_label)
if self.label_2_path_index_list.get(a_label) is None:
self.label_2_path_index_list[a_label] = []
self.label_2_path_index_list[a_label].append(index)
index += 1
def __getitem__(self, index):
img_path = self.path_list[index]
img_label = self.label_list[index]
img = cv2.imread(img_path)
if self.img_mode == 'RGB':
try:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
except:
msg = 'cannot convert to RGB:' + img_path
log_func(msg)
img = self.transforms(img)
return img, img_label
def __len__(self):
return len(self.label_list)
def __repr__(self):
return self.__str__()
def __str__(self):
return "TheDataset info: datasize={}, num_labels={}".format(len(self.path_list), len(self.label_2_path_index_list))
Append 2: 15) the whole train.py is :
from pathlib import WindowsPath
import sys
import json
import os
import cv2
import torch
import torch.nn as nn
from PIL import Image
import torch.optim as optim
from torch.autograd import Variable
from datetime import datetime
import pandas as pd
from torch.cuda.amp.grad_scaler import GradScaler
from torch.cuda.amp.autocast_mode import autocast
from torchvision import transforms, datasets
from efficientnet_pytorch import EfficientNet
import torch.nn.functional as F
from part01_data import load_data
from part03_transform import build_transforms
from part02_model import get_model, exp_lr_scheduler
from utils import print, set_logpath, format_datetime, write_one_log_record
from config import Config, ConfigEncoder
log_path = ''
def val(cfg:Config, model, criterion, transform=None):
start_time = datetime.now()
val_loss = 0
total = 0
val_correct = 0
model.eval()
if transform is None:
transform = build_transforms(cfg.img_height, cfg.img_width)
dset_loader, dset_size = load_data(cfg, transform, run_mode='val', shuffle=False)
for data in dset_loader:
inputs, labels = data
if cfg.is_use_cuda:
#inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
inputs = inputs.cuda()
labels = torch.stack([anno.cuda() for anno in labels])
else:
#inputs, labels = Variable(inputs), Variable(labels)
pass
with torch.no_grad():
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs.data, 1)
val_loss += loss.data.item()*inputs.size(0)
val_correct += torch.sum(preds == labels.data)
val_loss /= dset_size
val_acc = val_correct.item()*1.0/dset_size
elapsed = (datetime.now() - start_time).total_seconds()
print('val exit,{} samples,elapsed {:.4f} sec,average elapsed {:.4f} sec'.format(dset_size, elapsed, elapsed/dset_size))
return val_loss, val_acc
def train(cfg:Config, shuffle=True):
train_log_path = os.path.join(cfg.results_dir, cfg.backbone + '_' + str(cfg.num_classes) + 'classes_' + format_datetime(datetime.now()) + '.csv')
print('Begin to train,the data directory:' + cfg.train_data_dirname)
if cfg.is_use_apex:
scaler = GradScaler()
# step 1:Preparation
best_acc = 0.0
best_val_acc = 0.0
start_epoch = -1
criterion = nn.CrossEntropyLoss()
model_ft, optimizer_args, start_epoch, best_acc, best_val_acc = get_model(cfg, 'train')
if cfg.is_use_cuda:
model_ft = model_ft.cuda()
criterion = criterion.cuda()
optimizer = optim.SGD(model_ft.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0004)
if optimizer_args is not None:
optimizer.load_state_dict(optimizer_args)
since = datetime.now()
best_model_wts = model_ft.state_dict()
transform = build_transforms(cfg.img_height, cfg.img_width)
print('the transforms are as follows:')
print(str(transform))
print('preparation is finished')
write_one_log_record('epoch, train loss, train accuracy, validation loss, validation accuracy, elapsed/minute\n', train_log_path, 'w')
start_epoch_dt = datetime.now()
for epoch in range(start_epoch+1,cfg.num_epochs):
# step 2:load data and adjust optimizer
model_ft.train(True)
dset_loader, dset_size = load_data(cfg, transform, run_mode='train', shuffle=shuffle)
print('Epoch: {}/{},totally {} images'.format(epoch+1, cfg.num_epochs, dset_size))
optimizer = exp_lr_scheduler(optimizer, epoch)
running_loss = 0.0
running_corrects = 0
count = 0
batch_count = len(dset_loader)
start_batches_dt = datetime.now()
# step 3:begin batch train
for data in dset_loader:
# step 3.1:detach sample and label and move them to the device
inputs, labels = data
if cfg.is_use_cuda:
#inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
inputs = inputs.cuda()
labels = torch.stack([anno.cuda() for anno in labels])
else:
#inputs, labels = Variable(inputs), Variable(labels)
pass
# step 3.2:compute and forward
optimizer.zero_grad()
if cfg.is_use_apex:
with autocast():
outputs = model_ft(inputs)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.unscale_(optimizer)
scaler.step(optimizer)
scaler.update()
else:
outputs = model_ft(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# step 3.3:detach label and compute loss and correct count
_, preds = torch.max(outputs.data, 1)
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
# step 3.4:print batch info
count += 1
start_batches_dt = output_batch_info(cfg, epoch, count, batch_count, loss.item(), outputs.size()[0], start_batches_dt)
# step 4:exit this epoch and compute the loss
train_loss = running_loss / dset_size
train_acc = running_corrects.double() / dset_size
val_loss, val_acc = val(cfg, model_ft, criterion, transform)
# step 5:judge the best model and save it
best_model_wts, best_acc, best_val_acc = save_best_model(cfg, model_ft, best_model_wts, train_acc, best_acc, val_acc, best_val_acc)
# step 6:save the last checkpoint
save_newest_checkpoint(cfg, model_ft, optimizer, epoch, best_acc, best_val_acc)
# step 7:save the middle checkpoint
save_checkpoint_per_epochs(cfg, model_ft, optimizer, epoch, best_acc, best_val_acc)
# step 8:compute the loss, accuracy and elapsed time in this epoch
start_epoch_dt = summarize_epoch_info(start_epoch_dt, epoch, train_loss, train_acc, val_loss, val_acc, train_log_path)
# step 9:judge it is proper to exit the train process
if have_meet_acc_requirement_or_not(cfg, epoch, train_loss, train_acc, val_loss, val_acc):
break
time_elapsed = (datetime.now() - since).total_seconds()
print('train complete,elapsed {}hours {:.4f} minutes'.format(time_elapsed//3600, (time_elapsed - (time_elapsed//3600)*3600)/60))
return best_model_wts
def output_batch_info(cfg:Config, epoch, count, batch_count, loss_per_sample, size_of_this_batch, start_batches_dt):
flag = ''
elapsed = (datetime.now() - start_batches_dt).total_seconds()
if count % cfg.print_per_batch == 0:
flag = str(cfg.print_per_batch)
more_time = (batch_count - count) * elapsed/cfg.print_per_batch
if size_of_this_batch < cfg.train_batch_size: # the last batch
flag = '本'
more_time = (batch_count - count) * elapsed
if len(flag) > 0:
print(' Epoch: {}, batch: {}/{}, average train loss of each sample: {:.4f}, batch {} elapsed: {:.4f} sec,this epoch needs more {:.4f} sec'.format(epoch+1, count, batch_count, loss_per_sample, flag, elapsed, more_time))
return datetime.now()
return start_batches_dt
def have_meet_acc_requirement_or_not(cfg: Config, epoch, train_loss, train_acc, val_loss, val_acc):
if train_acc < cfg.acc_valve or (cfg.is_check_best_with_val_loss and val_acc < cfg.acc_valve):
return False
return True
def summarize_epoch_info(start_epoch_dt, epoch, train_loss, train_acc, val_loss, val_acc, output_path):
elapsed = (datetime.now() - start_epoch_dt).total_seconds()/60
remained_minutes = (cfg.num_epochs - epoch - 1)*elapsed
remained_hours = remained_minutes//60
remained_minutes = remained_minutes - remained_hours*60
record = '{},{:.4f},{:.4f},{:.4f},{:.4f},{:.4f}\n'.format(epoch+1, train_loss, train_acc, val_loss, val_acc, elapsed)
write_one_log_record(record, output_path, 'a')
return datetime.now()
def save_one_checkpoint(model, optimizer, epoch, best_acc, best_val_acc, output_path):
checkpoint = {
'net': model.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'best_acc': best_acc,
'best_val_acc': best_val_acc
}
torch.save(checkpoint, output_path)
def save_checkpoint_per_epochs(cfg:Config, model, optimizer, epoch, best_acc, best_val_acc):
if cfg.save_per_epoch > 0 and (epoch+1)%cfg.save_per_epoch == 0:
checkpoint_path = cfg.resume_ckpt_dir + "/" + cfg.backbone + f'_checkpoint_{epoch+1}_' + str(cfg.num_classes) + 'classes.pth'
save_one_checkpoint(model, optimizer, epoch, best_acc, best_val_acc, checkpoint_path)
def save_newest_checkpoint(cfg:Config, model, optimizer, epoch, best_acc, best_val_acc):
checkpoint_path = cfg.resume_ckpt_dir + "/" + cfg.backbone + '_checkpoint_last_' + str(cfg.num_classes) + 'classes.pth'
save_one_checkpoint(model, optimizer, epoch, best_acc, best_val_acc, checkpoint_path)
def save_best_model(cfg:Config, model, best_model_weights, train_acc, best_acc, val_acc, best_val_acc):
if train_acc <= best_acc or (cfg.is_check_best_with_val_loss and val_acc <= best_val_acc):
return best_model_weights, best_acc, best_val_acc
best_model_weights = model.state_dict()
model_out_path = cfg.models_dir + "/" + cfg.backbone + '_best_' + str(cfg.num_classes) + 'classes.pth'
torch.save(best_model_weights, model_out_path)
best_acc = train_acc
best_val_acc = val_acc if val_acc > best_val_acc else best_val_acc
return best_model_weights, train_acc, best_val_acc
def infer(cfg:Config):
transform_test = build_transforms(cfg.img_height, cfg.img_width, 'test')
#transform_test = build_transforms(cfg.img_height, cfg.img_width, 'train')
model = get_model(cfg, 'test')
model = model.to(cfg.get_device())
model.eval()
records = []
sub_classes = os.listdir(cfg.test_data_dirname)
if sub_classes is None or len(sub_classes) < 1:
return
sub_classes= sorted(sub_classes)
classid_dict = {}
with open(cfg.classid_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
tokens = line.split(',')
classid_dict[int(tokens[0])] = tokens[1]
records.append(cfg.test_data_dirname + ',' + str(len(sub_classes)) + ' classes\n')
records.append('image, predict \n')
start_time = datetime.now()
elapsed = 0.0
count = 0
with torch.no_grad():
for sub_cls in sub_classes:
files = os.listdir(os.path.join(cfg.test_data_dirname, sub_cls))
count += len(files)
if files is None or len(files) < 1:
continue
for file in files:
try:
img_path = os.path.join(cfg.test_data_dirname, sub_cls, file)
if os.path.isfile(img_path):
# 当使用test模式transform = build_transforms(cfg.img_height, cfg.img_width, 'test')生成变换时,
# 使用img = Image.open(img_path)
img_test = Image.open(img_path)
img = img_test
img = transform_test(img).to(cfg.get_device())
img = torch.unsqueeze(img, 0)
output = model(img)
_, preds = torch.max(output.data, 1)
id = preds[0].item()
if classid_dict.get(id) is not None:
records.append(sub_cls + '/' + file + ',' + classid_dict[id] + '\n')
print(sub_cls + '/' + file + ' is predicted as:' + classid_dict[id])
pass
else:
records.append(sub_cls + '/' + file + ', unknown\n')
except Exception as e:
print(str(e))
elapsed = (datetime.now() - start_time).total_seconds()
records.append('elapsed {:.4f} sec ,average elapsed {:.4f} sec\n'.format(elapsed, elapsed/count))
result_path = os.path.join(cfg.results_dir, 'infer_' + cfg.backbone + '_' + str(cfg.num_classes) + '_' + format_datetime(datetime.now()) + '.csv')
with open(result_path, 'w', encoding='utf-8') as f:
f.writelines(records)
def use_one_model(cfg:Config, model_name):
cfg.backbone = model_name
log_path = os.path.join(cfg.log_dir, cfg.backbone + '_' + str(cfg.num_classes) + 'classes_' + format_datetime(datetime.now()) + '.log')
set_logpath(log_path)
start_time = datetime.now()
torch.cuda.empty_cache()
print('start, the args are:=====')
args = json.dumps(cfg, ensure_ascii=False, cls=ConfigEncoder, indent=2)
print(args)
try:
#train(cfg)
infer(cfg)
except Exception as e:
print(str(e))
elapsed = (datetime.now() - start_time).total_seconds()
hours = elapsed//3600
minutes = (elapsed - hours*3600)/60
def use_many_models(cfg:Config):
#backbones = ['efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'adv-efficientnet-b0', 'adv-efficientnet-b1', 'adv-efficientnet-b2', 'tf_efficientnet_b0_ns', 'tf_efficientnet_b1_ns','tf_efficientnet_b2_ns', 'efficientnet-b3', 'adv-efficientnet-b3', 'tf_efficientnet_b3_ns']
backbones = ['tf_efficientnetv2_b0', 'tf_efficientnetv2_b1', 'tf_efficientnetv2_b2', 'tf_efficientnetv2_b3', 'tf_efficientnetv2_s']
for backbone in backbones:
use_one_model(cfg, backbone)
if __name__ == '__main__':
cfg = Config()
use_one_model(cfg, cfg.backbone)