February 13, 2025

Training object detection models for imgproxy

  • tutorial
  • AI
A robot reading a book

Prerequisites

Step 0: Setting a goal

Step 1: Gathering a dataset

Step 1.1: Getting Open Images label names

labels = [
"/m/01yrx", # Cat
"/m/0bt9lr", # Dog
"/m/06mf6", # Rabbit
"/m/03qrc", # Hamster
"/m/0gv1x", # Parrot
]

Step 1.2: Downloading images and annotations

import os
dataset_root = "./dataset" # @param {type:"string"}
dataset_root = os.path.abspath(dataset_root)
os.makedirs(dataset_root, exist_ok=True)
!wget "https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv" -O "{dataset_root}/boxes-train.csv"
!wget "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv" -O "{dataset_root}/boxes-val.csv"
<class_id> <center_x> <center_y> <width> <height>
!pip install boto3 botocore tqdm
import os
import boto3
import botocore
from tqdm.notebook import tqdm
from concurrent import futures
class dataset:
def __init__(self, root, name, s3_dir, labels, csv_path):
self.root = root
self.name = name
self.images_dir = os.path.join(root, name, "images")
self.labels_dir = os.path.join(root, name, "labels")
os.makedirs(self.images_dir, exist_ok=True)
os.makedirs(self.labels_dir, exist_ok=True)
self.s3_dir = s3_dir
self.s3_bucket = boto3.resource(
's3',
config=botocore.config.Config(signature_version=botocore.UNSIGNED, max_pool_connections=35)
).Bucket("open-images-dataset")
self.labels = labels
self.csv_path = csv_path
def download_image(self, image_id, boxes):
path = os.path.join(self.images_dir, f"{image_id}.jpg")
try:
self.s3_bucket.download_file(f"{self.s3_dir}/{image_id}.jpg", path)
except Exception as inst:
print(f"Downloading error: {inst}")
return
labels_path = os.path.join(self.labels_dir, f"{image_id}.txt")
with open(labels_path, "w") as labels_f:
for box in boxes:
cls, x1, y1, x2, y2 = box
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
labels_f.write(f"{cls} {cx} {cy} {w} {h}\n")
labels_f.close()
def download_dataset(self, max_images):
f = open(self.csv_path, "r")
num_images = 0
num_cls_images = {}
image_id = None
boxes = []
has_good_boxes = False
has_bad_boxes = False
# Skip header
f.readline()
pbar = tqdm(total=max_images * len(self.labels), desc=f"Selecting {self.name} images", leave=True)
executor = futures.ThreadPoolExecutor(max_workers=30)
all_futures = []
while True:
new_image_id = None
line = f.readline()
if line != "":
split = line.strip().split(",")
new_image_id = split[0]
label_name = split[2]
x_min = float(split[4])
x_max = float(split[5])
y_min = float(split[6])
y_max = float(split[7])
group_of = split[10]
inside = split[12]
# The image ID changed, it's time to download the image and reset the state
if new_image_id != image_id:
if has_good_boxes and not has_bad_boxes:
all_futures.append(executor.submit(self.download_image, image_id, boxes))
pbar.update(len(boxes))
num_images += 1
for class_id in set(b[0] for b in boxes):
num_cls_images[class_id] = num_cls_images.get(class_id, 0) + 1
had_enough = True
# Check if we've found enough images for each class
for class_id in range(len(self.labels)):
had_enough = had_enough and num_cls_images.get(class_id, 0) >= max_images
# If we've found enough images, stop
if had_enough:
break
image_id = new_image_id
boxes = []
has_good_boxes = False
has_bad_boxes = False
if image_id == None:
break
if label_name in self.labels:
if inside == "0" and group_of == "0":
class_id = self.labels.index(label_name)
has_good_boxes = has_good_boxes or num_cls_images.get(class_id, 0) < max_images
boxes.append((class_id, x_min, y_min, x_max, y_max))
else:
has_bad_boxes = True
pbar.close()
print(f"Selected {self.name} images:")
for class_id in range(len(self.labels)):
print(f"{self.labels[class_id]}: {num_cls_images.get(class_id, 0)}")
pbar = tqdm(total=num_images, desc=f"Downloading {self.name} images", leave=True)
for future in futures.as_completed(all_futures):
future.result()
pbar.update(1)
pbar.close()
<dataset_root>
train/
images/
<image_id>.jpg
labels/
<image_id>.txt
val/
images/
<image_id>.jpg
labels/
<image_id>.txt
train_ds = dataset(
dataset_root,
"train",
"train",
labels,
os.path.join(dataset_root, "boxes-train.csv"),
)
train_ds.download_dataset(5000)
val_ds = dataset(
dataset_root,
"val",
"validation",
labels,
os.path.join(dataset_root, "boxes-val.csv"),
)
val_ds.download_dataset(1000)
Selected train images:
/m/01yrx: 5012
/m/0bt9lr: 5036
/m/06mf6: 1133
/m/03qrc: 448
/m/0gv1x: 1396
Selected val images:
/m/01yrx: 344
/m/0bt9lr: 1002
/m/06mf6: 64
/m/03qrc: 28
/m/0gv1x: 74
%%writefile {dataset_root}/data.yaml
train: ../train/images
val: ../val/images
test:
names:
- Cat # /m/01yrx
- Dog # /m/0bt9lr
- Rabbit # /m/06mf6
- Hamster # /m/03qrc
- Parrot # /m/0gv1x

Step 2: Training the model

!pip install ultralytics
import ultralytics
ultralytics.checks()
%load_ext tensorboard
%tensorboard --logdir ./OID-pets
!yolo train \
model=yolo11s.pt \
data={dataset_root}/data.yaml \
epochs=300 \
patience=50 \
imgsz=640 \
batch=0.7 \
cache="disk" \
project="OID-pets" \
name="train" \
exist_ok=True \
degrees=45 \
flipud=0.5 \
fliplr=0.5
Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size
1/300 34.6G 1.627 2.64 1.979 55 640: 100% 95/95 [00:59<00:00, 1.60it/s]
Class Images Instances Box(P R mAP50 mAP50-95): 100% 6/6 [00:08<00:00, 1.46s/it]
all 1508 1782 0.623 0.705 0.693 0.366
TensorBoard showing the training process

Step 3: Testing the model

Download button in Google Colab
Upload button in Google Colab
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
test_image_path = "./test.jpg" # @param {type:"string"}
test_image_name = os.path.basename(os.path.realpath(test_image_path))
result_path = "OID-pets/predict/" + test_image_name
!yolo predict \
model="OID-pets/train/weights/best.pt" \
source=$test_image_path \
imgsz=640 \
project="OID-pets" \
name="predict" \
exist_ok=True
img = mpimg.imread(result_path)
plt.figure(figsize=(8, 8))
plt.imshow(img)
plt.tight_layout()
plt.axis('off')
plt.show()
A hamster detected on the image

Step 4: Configuring imgproxy

Step 4.1: Exporting the model to the ONNX format

!yolo export \
model="OID-pets/train/weights/best.pt" \
format=onnx \
imgsz=640 \
simplify=True \
half=True \
device=0
!cp OID-pets/train/weights/best.pt ./oid-pets.pt
!cp OID-pets/train/weights/best.onnx ./oid-pets.onnx

Step 4.2: Creating a class names file

cat
dog
rabbit
hamster
parrot

Step 4.3: Building a Docker image

ARG BASE_IMAGE="docker.imgproxy.pro/imgproxy:latest-ml"
FROM ${BASE_IMAGE}
COPY oid-pets.onnx /opt/imgproxy/share/models/oid-pets.onnx
COPY oid-pets.names /opt/imgproxy/share/models/oid-pets.names
ENV IMGPROXY_OBJECT_DETECTION_NET=/opt/imgproxy/share/models/oid-pets.onnx
ENV IMGPROXY_OBJECT_DETECTION_NET_TYPE=yolov11
ENV IMGPROXY_OBJECT_DETECTION_CLASSES=/opt/imgproxy/share/models/oid-pets.names
ENV IMGPROXY_OBJECT_DETECTION_NET_SIZE=640
ENV IMGPROXY_OBJECT_DETECTION_CONFIDENCE_THRESHOLD=0.5
docker build -t imgproxy:latest-ml-pets .

Step 5: Testing imgproxy with the model

docker run -p 8080:8080 imgproxy:latest-ml-pets
http://localhost:8080/unsafe/rs:fill:500:500/g:obj/dd:1/plain/https://assets.imgproxy.net/sample-corgi.jpg
A corgi with a bounding box around it

Resizeable image

Start your free trial today:

Get imgproxy Pro on Cloud Marketplace: