Chassis ( https://www.chassis.ml ) is an open source project that will do what you want.
You provide CHassis your PyTorch file. It will wrap it in an mlflow model, and then a grpc server, create a container, and then drop everything into Docker hub for you. You can then pull from docker hub and push to ECR yourself.
PyTorch examples are here: https://github.com/modzy/chassis/tree/main/chassisml_sdk/examples/pytorch and while Yolo isn't there, a fasterrcnn notebook is included in the examples and you can modify to fit your needs.
You will need access to a chassis server. You can either follow the instructions on the Chassis website to set one up locally(https://chassis.ml/getting-started/deploy-manual/), or use the publicly hosted one by signing up at https://chassis.modzy.com
basic code is here:
#import modules
import chassisml
import pickle
import cv2
import torch
import getpass
import numpy as np
import torchvision.models as models
from torchvision import transforms
#provide docker crednetials
dockerhub_user = getpass.getpass('docker hub username')
dockerhub_pass = getpass.getpass('docker hub password')
#pull model and define pre / post processing of data
model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
model.eval()
COCO_INSTANCE_CATEGORY_NAMES = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
device = 'cpu'
def preprocess(input_bytes):
decoded = cv2.imdecode(np.frombuffer(input_bytes, np.uint8), -1)
img_t = transform(decoded)
batch_t = torch.unsqueeze(img_t, 0).to(device)
return batch_t
def postprocess(num_detections, predictions):
inference_result = {
"detections": [
{
"xMin": predictions["boxes"][i].detach().cpu().numpy().tolist()[0],
"xMax": predictions["boxes"][i].detach().cpu().numpy().tolist()[2],
"yMin": predictions["boxes"][i].detach().cpu().numpy().tolist()[1],
"yMax": predictions["boxes"][i].detach().cpu().numpy().tolist()[3],
"class": labels[predictions["labels"][i].detach().cpu().item()],
"classProbability": predictions["scores"][i].detach().cpu().item(),
} for i in range(num_detections)
]
}
structured_output = {
"data": {
"result": inference_result,
"explanation": None,
"drift": None,
}
}
return structured_output
def process(input_bytes):
# preprocess
batch_t = preprocess(input_bytes)
# run inference
predictions = model(batch_t)[0]
num_detections = len(predictions["boxes"])
# postprocess
structured_output = postprocess(num_detections, predictions)
return structured_output
#create chassis client
chassis_client = chassisml.ChassisClient("<chassis_server_url>:<chassis service port>")
#convert pytorch model to mlflow model
# create Chassis model
chassis_model = chassis_client.create_model(process_fn=process)
# test Chassis model (can pass filepath, bufferedreader, bytes, or text here):
sample_filepath = './data/airplane.jpg'
results = chassis_model.test(sample_filepath)
print(results)
# have chassis containerize model
response = chassis_model.publish(
model_name="PyTorch Faster R-CNN Object Detection",
model_version="0.0.2",
registry_user=dockerhub_user,
registry_pass=dockerhub_pass
)
# wait for packaging to complete.
job_id = response.get('job_id')
final_status = chassis_client.block_until_complete(job_id)