Training object detection models for imgproxy

  • tutorial
  • AI
A robot reading a book

Prerequisites

Step 0: Setting a goal

Step 1: Gathering a dataset

Step 1.1: Getting Open Images label names

labels = [
  "/m/01yrx",  # Cat
  "/m/0bt9lr", # Dog
  "/m/06mf6",  # Rabbit
  "/m/03qrc",  # Hamster
  "/m/0gv1x",  # Parrot
]

Step 1.2: Downloading images and annotations

import os

dataset_root = "./dataset" # @param {type:"string"}
dataset_root = os.path.abspath(dataset_root)

os.makedirs(dataset_root, exist_ok=True)
!wget "https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv" -O "{dataset_root}/boxes-train.csv"
!wget "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv" -O "{dataset_root}/boxes-val.csv"
<class_id> <center_x> <center_y> <width> <height>
!pip install boto3 botocore tqdm

import os
import boto3
import botocore
from tqdm.notebook import tqdm
from concurrent import futures

class dataset:
  def __init__(self, root, name, s3_dir, labels, csv_path):
    self.root = root
    self.name = name

    self.images_dir = os.path.join(root, name, "images")
    self.labels_dir = os.path.join(root, name, "labels")

    os.makedirs(self.images_dir, exist_ok=True)
    os.makedirs(self.labels_dir, exist_ok=True)

    self.s3_dir = s3_dir
    self.s3_bucket = boto3.resource(
      's3',
      config=botocore.config.Config(signature_version=botocore.UNSIGNED, max_pool_connections=35)
    ).Bucket("open-images-dataset")

    self.labels = labels
    self.csv_path = csv_path


  def download_image(self, image_id, boxes):
    path = os.path.join(self.images_dir, f"{image_id}.jpg")

    try:
      self.s3_bucket.download_file(f"{self.s3_dir}/{image_id}.jpg", path)
    except Exception as inst:
      print(f"Downloading error: {inst}")
      return

    labels_path = os.path.join(self.labels_dir, f"{image_id}.txt")

    with open(labels_path, "w") as labels_f:
      for box in boxes:
        cls, x1, y1, x2, y2 = box

        cx = (x1 + x2) / 2
        cy = (y1 + y2) / 2
        w = x2 - x1
        h = y2 - y1

        labels_f.write(f"{cls} {cx} {cy} {w} {h}\n")

      labels_f.close()


  def download_dataset(self, max_images):
    f = open(self.csv_path, "r")

    num_images = 0
    num_cls_images = {}

    image_id = None
    boxes = []
    has_good_boxes = False
    has_bad_boxes = False

    # Skip header
    f.readline()

    pbar = tqdm(total=max_images * len(self.labels), desc=f"Selecting {self.name} images", leave=True)

    executor = futures.ThreadPoolExecutor(max_workers=30)
    all_futures = []

    while True:
      new_image_id = None

      line = f.readline()

      if line != "":
        split = line.strip().split(",")

        new_image_id = split[0]
        label_name = split[2]
        x_min = float(split[4])
        x_max = float(split[5])
        y_min = float(split[6])
        y_max = float(split[7])
        group_of = split[10]
        inside = split[12]

      # The image ID changed, it's time to download the image and reset the state
      if new_image_id != image_id:
        if has_good_boxes and not has_bad_boxes:
          all_futures.append(executor.submit(self.download_image, image_id, boxes))

          pbar.update(len(boxes))

          num_images += 1

          for class_id in set(b[0] for b in boxes):
            num_cls_images[class_id] = num_cls_images.get(class_id, 0) + 1

          had_enough = True
          # Check if we've found enough images for each class
          for class_id in range(len(self.labels)):
            had_enough = had_enough and num_cls_images.get(class_id, 0) >= max_images
          # If we've found enough images, stop
          if had_enough:
            break

        image_id = new_image_id
        boxes = []
        has_good_boxes = False
        has_bad_boxes = False

      if image_id == None:
        break

      if label_name in self.labels:
          if inside == "0" and group_of == "0":
            class_id = self.labels.index(label_name)

            has_good_boxes = has_good_boxes or num_cls_images.get(class_id, 0) < max_images

            boxes.append((class_id, x_min, y_min, x_max, y_max))
          else:
            has_bad_boxes = True

    pbar.close()

    print(f"Selected {self.name} images:")
    for class_id in range(len(self.labels)):
      print(f"{self.labels[class_id]}: {num_cls_images.get(class_id, 0)}")

    pbar = tqdm(total=num_images, desc=f"Downloading {self.name} images", leave=True)

    for future in futures.as_completed(all_futures):
      future.result()
      pbar.update(1)

    pbar.close()
<dataset_root>
  train/
    images/
      <image_id>.jpg
    labels/
      <image_id>.txt
  val/
    images/
      <image_id>.jpg
    labels/
      <image_id>.txt
train_ds = dataset(
  dataset_root,
  "train",
  "train",
  labels,
  os.path.join(dataset_root, "boxes-train.csv"),
)
train_ds.download_dataset(5000)

val_ds = dataset(
  dataset_root,
  "val",
  "validation",
  labels,
  os.path.join(dataset_root, "boxes-val.csv"),
)
val_ds.download_dataset(1000)
Selected train images:
/m/01yrx: 5012
/m/0bt9lr: 5036
/m/06mf6: 1133
/m/03qrc: 448
/m/0gv1x: 1396

Selected val images:
/m/01yrx: 344
/m/0bt9lr: 1002
/m/06mf6: 64
/m/03qrc: 28
/m/0gv1x: 74
%%writefile {dataset_root}/data.yaml

train: ../train/images
val: ../val/images
test:

names:
  - Cat     # /m/01yrx
  - Dog     # /m/0bt9lr
  - Rabbit  # /m/06mf6
  - Hamster # /m/03qrc
  - Parrot  # /m/0gv1x

Step 2: Training the model

!pip install ultralytics
import ultralytics
ultralytics.checks()
%load_ext tensorboard
%tensorboard --logdir ./OID-pets
!yolo train \
  model=yolo11s.pt \
  data={dataset_root}/data.yaml \
  epochs=300 \
  patience=50 \
  imgsz=640 \
  batch=0.7 \
  cache="disk" \
  project="OID-pets" \
  name="train" \
  exist_ok=True \
  degrees=45 \
  flipud=0.5 \
  fliplr=0.5
Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
1/300      34.6G      1.627       2.64      1.979         55        640: 100% 95/95 [00:59<00:00,  1.60it/s]
           Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% 6/6 [00:08<00:00,  1.46s/it]
             all       1508       1782      0.623      0.705      0.693      0.366
TensorBoard showing the training process

Step 3: Testing the model

Download button in Google Colab
Upload button in Google Colab
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

test_image_path = "./test.jpg" # @param {type:"string"}
test_image_name = os.path.basename(os.path.realpath(test_image_path))
result_path = "OID-pets/predict/" + test_image_name

!yolo predict \
  model="OID-pets/train/weights/best.pt" \
  source=$test_image_path \
  imgsz=640 \
  project="OID-pets" \
  name="predict" \
  exist_ok=True

img = mpimg.imread(result_path)
plt.figure(figsize=(8, 8))
plt.imshow(img)
plt.tight_layout()
plt.axis('off')
plt.show()
A hamster detected on the image

Step 4: Configuring imgproxy

Step 4.1: Exporting the model to the ONNX format

!yolo export \
  model="OID-pets/train/weights/best.pt" \
  format=onnx \
  imgsz=640 \
  simplify=True \
  half=True \
  device=0

!cp OID-pets/train/weights/best.pt ./oid-pets.pt
!cp OID-pets/train/weights/best.onnx ./oid-pets.onnx

Step 4.2: Creating a class names file

cat
dog
rabbit
hamster
parrot

Step 4.3: Building a Docker image

ARG BASE_IMAGE="docker.imgproxy.pro/imgproxy:latest-ml"

FROM ${BASE_IMAGE}

COPY oid-pets.onnx /opt/imgproxy/share/models/oid-pets.onnx
COPY oid-pets.names /opt/imgproxy/share/models/oid-pets.names

ENV IMGPROXY_OBJECT_DETECTION_NET=/opt/imgproxy/share/models/oid-pets.onnx
ENV IMGPROXY_OBJECT_DETECTION_NET_TYPE=yolov11
ENV IMGPROXY_OBJECT_DETECTION_CLASSES=/opt/imgproxy/share/models/oid-pets.names
ENV IMGPROXY_OBJECT_DETECTION_NET_SIZE=640
ENV IMGPROXY_OBJECT_DETECTION_CONFIDENCE_THRESHOLD=0.5
docker build -t imgproxy:latest-ml-pets .

Step 5: Testing imgproxy with the model

docker run -p 8080:8080 imgproxy:latest-ml-pets
http://localhost:8080/unsafe/rs:fill:500:500/g:obj/dd:1/plain/https://assets.imgproxy.net/sample-corgi.jpg
A corgi with a bounding box around it

Start your free trial today

Or get imgproxy Pro on Cloud Marketplace