My training script for the model:
seed = 42
import random
import os
import numpy as np
def seed_everything(seed):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
seed_everything(seed)
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data.catalog import Metadata
import os
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("experiment",)
cfg.DATASETS.TEST = ("test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.MODEL.DEVICE = "cuda"
cfg.SOLVER.IMS_PER_BATCH = 2
num_gpu = 1
bs = (num_gpu * 2)
cfg.SOLVER.BASE_LR = 0.02 * bs / 16
cfg.SOLVER.MAX_ITER = 7500
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
My inference script on server-1 is:
import cv2
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.SEED = 42
predictor = DefaultPredictor(cfg)
img = cv2.imread('filename.jpg')
outputs = predictor(img)
print(outputs["instances"])
pred_classes = outputs['instances'].pred_classes.tolist()
classes = ["Handwritten", "Logo", "Markings", "Signature"]
for pred_class in pred_classes:
print('*'*10)
print(classes[pred_class])
print('*'*10)
if any(classes[pred_class] == "Handwritten" for pred_class in pred_classes):
print(True)
else:
print(False)
My inference script on server-2 is:
class Handwritten:
"""
Detects a list of handwritten pages in a PDF chart.
Attributes
----------
path_of_model : str
Path where the trained model is stored.
path_of_weights : str
Path where the weights file is stored.
"""
def __init__(self, path_of_weights: str) -> None:
"""Initialize Handwritten class.
Parameters
----------
path_of_model : str
path_of_weights : str
"""
self.cfg = get_cfg()
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
self.cfg.MODEL.WEIGHTS = path_of_weights
self.cfg.MODEL.DEVICE = "cpu"
self.cfg.SEED = 42
self.predictor = DefaultPredictor(self.cfg)
self.metadata = Metadata()
self.metadata.set(
thing_classes=["Handwritten", "Logo", "Markings", "Signature"],
thing_dataset_id_to_contiguous_id={0: 0, 1: 1, 2: 2, 3: 3},
)
def __call__(self, img: Any) -> Any:
"""Return the predicted output classes for the image."""
self.outputs = self.predictor(img)
return self.outputs["instances"]
def detect_hw(self, image: Any) -> bool:
"""Detect handwritten dx entity in image and if present then classifies it as hw page.
Parameters
----------
image : Matrix
.Image matrix of a page.
Return
-------
True/False : bool
Boolean value that states if the page is handwritten or not.
"""
outputs = self.__call__(image)
pred_classes = outputs.pred_classes.tolist()
classes = ["Handwritten", "Logo", "Markings", "Signature"]
if any(classes[pred_class] == "Handwritten" for pred_class in pred_classes):
return True
else:
return False
app = FastAPI()
path_of_weights = "model/model_final.pth"
model = Handwritten(path_of_weights)
@app.post("/cv/predict", status_code=200)
def predict(
page_no: int = Form(...), dimensions: list = Form(...), image: UploadFile = File(...)
) -> Dict[str, int]:
"""Predicts if image is handwritten page or not.
Parameters
----------
page_no : Page number of the given input page
dimensions : Height and width of the page
image : Image of the page as bytestream
"""
image_bytes = image.file.read()
decoded_image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), -1)
height, width = int(dimensions[0]), int(dimensions[1])
prediction_time = time.time()
pg_image = cv2.resize(decoded_image, (height, width))
try:
# Check if page is handwritten
hw_result = model.detect_hw(pg_image)
# If handwritten, consider for output
if hw_result:
hw_pages = page_no
else:
hw_pages = -99
prediction_info = {
"hw_pages": hw_pages,
"prediction_time": prediction_time,
}
#_logger.info(f"prediction info: {prediction_info}")
except HTTPError as e:
do something
return {"hw_pages": hw_pages}
While the model keeps giving good results on server-1, it is somehow being very erratic in server-2. The weights and the seed is the same. Somehow, I am unable to understand this change in behavior in both of these scenarios.
The model is trained on server-1
Server-1 is g4dn.2xlarge
. Server-2 is g4dn.xlarge
Is there something wrong which I am doing?