TACO dataset: Model - Machine Learning Cursussen

Navigeer naar deze notebook op GitHub: book/ml_principles/labs/imgrec_taco_train

Via bovenstaande link kan je deze notebook openen in Google Colaboratory. In die omgeving kunnen we gebruik maken van gratis quota voor GPUs (en TPUs). GPU acceleratie is hier sterk aanbevolen voor zowel model training als model inference.

Colab setup¶

Als je de notebook voor het eerst opent op Colab, kies je in het menu rechts bovenaan Connect > Change runtime type: Python 3 en T4 GPU.
Pas nadat de GPU Runtime is opgestart ga je verder met onderstaande installatie van het ultralitics package.
⚠️ Bij deze waarschuwing Warning: This notebook was not authored by Google selecteer je Run anyway.

!pip install ultralytics

import glob
import json
import os
import shutil
from pprint import pprint

import kagglehub
import matplotlib.pyplot as plt
import pandas as pd
import yaml
from PIL import Image, ImageDraw
from ultralytics import YOLO

# Enable TensorBoard logging
!yolo settings datasets_dir=/content/taco
!yolo settings tensorboard=True

⚠️ Om bovenstaande aanpassingen in werking te stellen moet je nu de notebook runtime herstarten

Dataset¶

# Download the dataset
path = kagglehub.dataset_download("vencerlanz09/taco-dataset-yolo-format")

print(path)

# Copy the contents of the directory instead of moving it to avoid root permission issues
destination = "./taco"
if not os.path.exists(destination):
    shutil.copytree(path, destination)
path = destination
print(path)

# Update the meta data (see taco#1.ipynb)
with open(path + "/data.yaml") as f:
    meta = yaml.safe_load(f)

# Update image paths
meta["train"] = "./train/images"
meta["val"] = "./valid/images"
meta["test"] = "./test/images"

# Swap Bottle cap and Bottle labels (see)
bottle_cap_idx = meta["names"].index("Bottle cap")
bottle_idx = meta["names"].index("Bottle")
meta["names"][bottle_cap_idx], meta["names"][bottle_idx] = (
    meta["names"][bottle_idx],
    meta["names"][bottle_cap_idx],
)

# write back to file
with open(path + "/data.yaml", "w") as f:
    yaml.dump(meta, f)

pprint(meta)

Training (hyper) Parameters¶

# Define training parameters
epochs = 30
batch_size = 32
imgsz = 640  # Image size
optimizer_type = "AdamW"  # AdamW optimizer (recommended for better regularization)
lr = 1e-4
weight_decay = 1e-4

Model Training¶

# Initialize YOLOv11 model (pre-trained weights)
model = YOLO("yolov8s.pt")

# check the architecture
# print(model)

# Initialize YOLOv11 model (pre-trained weights)
model = YOLO("yolov8s.pt")

Start Tensorboard voor live monitoring.

%load_ext tensorboard
%tensorboard --logdir /content/runs

# Train model with Cosine Annealing learning rate scheduler
model.train(
    data=path + "/data.yaml",
    epochs=epochs,
    batch=batch_size,
    imgsz=imgsz,
    optimizer=optimizer_type,
    lr0=lr,  # Initial learning rate
    weight_decay=weight_decay,
    save=True,  # Save the best model
    save_period=1,  # Save model every 10 epochs
    # val=True,  # Evaluate on validation set
)

best_model = YOLO("./runs/detect/train/weights/best.pt")
val_results = best_model.val()

print("Best Validation Metrics from Best Model:")
print(f"Precision: {val_results.box.mp:.4f}")
print(f"Recall: {val_results.box.mr:.4f}")
print(f"mAP@50: {val_results.box.map50:.4f}")
print(f"mAP@50-95: {val_results.box.map:.4f}")

log file testing¶

# Read the log file into a DataFrame
log_file = "./runs/detect/train/results.csv"
log_data = pd.read_csv(log_file)

# Check the first few rows of the data and column names
print(log_data.columns)
print(log_data.head())

Graphs¶

# Convert necessary columns to numeric
log_data["epoch"] = pd.to_numeric(log_data["epoch"], errors="coerce").astype(
    int
)  # Convert to integer
log_data["train/box_loss"] = pd.to_numeric(log_data["train/box_loss"], errors="coerce")
log_data["train/cls_loss"] = pd.to_numeric(log_data["train/cls_loss"], errors="coerce")
log_data["train/dfl_loss"] = pd.to_numeric(log_data["train/dfl_loss"], errors="coerce")
log_data["val/box_loss"] = pd.to_numeric(log_data["val/box_loss"], errors="coerce")
log_data["val/cls_loss"] = pd.to_numeric(log_data["val/cls_loss"], errors="coerce")
log_data["val/dfl_loss"] = pd.to_numeric(log_data["val/dfl_loss"], errors="coerce")

# Drop rows with NaN values in relevant columns
log_data = log_data.dropna(
    subset=[
        "epoch",
        "train/box_loss",
        "train/cls_loss",
        "train/dfl_loss",
        "val/box_loss",
        "val/cls_loss",
        "val/dfl_loss",
    ]
)

# Plot the training and validation losses
plt.figure(figsize=(12, 8))

# Plot training losses
plt.plot(
    log_data["epoch"], log_data["train/box_loss"], label="Train Box Loss", linestyle="-", marker="o"
)
plt.plot(
    log_data["epoch"],
    log_data["train/cls_loss"],
    label="Train Class Loss",
    linestyle="-",
    marker="x",
)
plt.plot(
    log_data["epoch"], log_data["train/dfl_loss"], label="Train DFL Loss", linestyle="-", marker="s"
)

# Plot validation losses
plt.plot(
    log_data["epoch"], log_data["val/box_loss"], label="Val Box Loss", linestyle="--", marker="o"
)
plt.plot(
    log_data["epoch"], log_data["val/cls_loss"], label="Val Class Loss", linestyle="--", marker="x"
)
plt.plot(
    log_data["epoch"], log_data["val/dfl_loss"], label="Val DFL Loss", linestyle="--", marker="s"
)

# Customize the plot
plt.title("Training and Validation Losses over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.xticks(log_data["epoch"])  # Ensure that the epoch ticks are shown as integers
plt.legend(loc="upper right")

# Show the plot
plt.show()

Validation results¶

results = model.val()  # Evaluate on the validation set
print(f"Validation Results: {results}")

print("Validation Results:")
print("Mean Precision:", results.box.mp)  # Mean Precision
print("Mean Recall:", results.box.mr)  # Mean Recall
print("mAP 50:", results.box.map50)  # Mean Average Precision at IoU 0.5
print("mAP 50-95:", results.box.map)  # Mean Average Precision at IoU 0.5-0.95

Validation Metrics plot¶

# Scalar values from results_dict
precision = results.results_dict["metrics/precision(B)"]
recall = results.results_dict["metrics/recall(B)"]
map50 = results.results_dict["metrics/mAP50(B)"]
map50_95 = results.results_dict["metrics/mAP50-95(B)"]

# Plotting single values (snapshot)
metrics = ["Precision", "Recall", "mAP50", "mAP50-95"]
values = [precision, recall, map50, map50_95]

plt.figure(figsize=(8, 6))
plt.bar(metrics, values, color=["b", "r", "g", "purple"])
plt.title("Model Evaluation Metrics")
plt.ylabel("Values")
plt.show()

Testing¶

best_model = YOLO("./runs/detect/train/weights/best.pt")
test_results = best_model.val(data="data.yaml", split="test")

# Print test metrics
print(f"Test Precision: {test_results.box.mp:.4f}")
print(f"Test Recall: {test_results.box.mr:.4f}")
print(f"Test mAP@50: {test_results.box.map50:.4f}")
print(f"Test mAP@50-95: {test_results.box.map:.4f}")

# Function to parse ground truth annotations in YOLO format
def parse_annotation(annotation_path):
    """Parse a YOLO-style annotation file.

    Extract the class IDs and bounding box information.
    """
    if not os.path.exists(annotation_path):
        print(f"Annotation file {annotation_path} not found.")
        return [], []  # Return empty lists if annotation file is missing

    with open(annotation_path, "r") as file:
        lines = file.readlines()

    labels = []
    boxes = []
    for line in lines:
        parts = line.strip().split()
        class_id = int(parts[0])  # Class ID
        # YOLO format: class_id, x_center, y_center, width, height
        box = [float(x) for x in parts[1:]]  # Bounding box: [x_center, y_center, width, height]
        labels.append(class_id)
        boxes.append(box)
    return labels, boxes


# Load the YOLO model
model = YOLO("./runs/detect/train/weights/best.pt")

# Path to the test images and corresponding labels (annotations)
test_image_dir = path + "/test/images/"
test_label_dir = path + "/test/labels/"

# Get the list of test images
test_images = glob.glob(
    os.path.join(test_image_dir, "*.jpg")
)  # Adjust for correct extension if needed

# Output directory to save inference results
output_dir = "./inference_results/"
os.makedirs(output_dir, exist_ok=True)

# Class names
class_names = [
    "Aluminium foil",
    "Bottle cap",
    "Bottle",
    "Broken glass",
    "Can",
    "Carton",
    "Cigarette",
    "Cup",
    "Lid",
    "Other litter",
    "Other plastic",
    "Paper",
    "Plastic bag - wrapper",
    "Plastic container",
    "Pop tab",
    "Straw",
    "Styrofoam piece",
    "Unlabeled litter",
]

# Loop through each image and perform inference
for img_path in test_images:
    # Get the corresponding annotation file (in YOLO format)
    annotation_path = os.path.join(
        test_label_dir, os.path.basename(img_path).replace(".jpg", ".txt").replace(".JPG", ".txt")
    )

    # Perform inference without verbose output
    results = model(img_path, verbose=False)  # Run YOLOv8 on the image

    # Get actual labels (ground truth) from annotation file
    actual_labels, actual_boxes = parse_annotation(annotation_path)
    actual_labels_names = [class_names[label] for label in actual_labels]

    # Save the result image with predictions
    img_name = os.path.basename(img_path)
    result_img_path = os.path.join(output_dir, img_name)
    results[0].save(result_img_path)

    # Extract predicted labels and bounding boxes
    if results[0].boxes is None or len(results[0].boxes.cls) == 0:
        predicted_labels = ["No prediction"]
        predicted_boxes = []
    else:
        predicted_labels = [results[0].names[int(cls)] for cls in results[0].boxes.cls]
        predicted_boxes = (
            results[0].boxes.xywh.cpu().numpy()
        )  # Ensure numpy format for further processing

    # Open the original image for proper ground truth visualization
    img_predicted = Image.open(result_img_path)
    img_actual = Image.open(img_path)  # Reload the original image for ground truth

    # Create drawing objects
    draw_predicted = ImageDraw.Draw(img_predicted)
    draw_actual = ImageDraw.Draw(img_actual)

    # Draw predicted bounding boxes (blue) on the predicted image
    img_width, img_height = img_predicted.size
    if len(predicted_boxes) == 0:  # Check if predicted_boxes is empty
        draw_predicted.text((10, 10), "No prediction", fill="red")
    else:
        for i, box in enumerate(predicted_boxes):
            x_center, y_center, width, height = box
            x1 = int((x_center - width / 2) * img_width)
            y1 = int((y_center - height / 2) * img_height)
            x2 = int((x_center + width / 2) * img_width)
            y2 = int((y_center + height / 2) * img_height)
            draw_predicted.rectangle([x1, y1, x2, y2], outline="blue", width=2)
            draw_predicted.text(
                (x1, y1),
                predicted_labels[i] if i < len(predicted_labels) else "Unknown",
                fill="blue",
            )

    # Draw ground truth bounding boxes (green) on the actual image
    for i, box in enumerate(actual_boxes):
        x_center, y_center, width, height = box
        x1 = int((x_center - width / 2) * img_width)
        y1 = int((y_center - height / 2) * img_height)
        x2 = int((x_center + width / 2) * img_width)
        y2 = int((y_center + height / 2) * img_height)
        draw_actual.rectangle([x1, y1, x2, y2], outline="green", width=2)
        draw_actual.text((x1, y1), actual_labels_names[i], fill="white")

    # Display images side by side
    fig, axes = plt.subplots(1, 2, figsize=(15, 7))

    axes[0].imshow(img_predicted)
    axes[0].set_title("\n".join(predicted_labels), fontsize=14, wrap=True)
    axes[0].axis("off")

    axes[1].imshow(img_actual)
    axes[1].set_title("\n".join(actual_labels_names), fontsize=14, wrap=True)
    axes[1].axis("off")

    plt.show()