Navigeer naar deze notebook op GitHub: book
Via bovenstaande link kan je deze notebook openen in Google Colaboratory. In die omgeving kunnen we gebruik maken van gratis quota voor GPUs (en TPUs). GPU acceleratie is hier sterk aanbevolen voor zowel model training als model inference.
Colab setup¶
Als je de notebook voor het eerst opent op Colab, kies je in het menu rechts bovenaan
Connect > Change runtime type:Python 3enT4 GPU.Pas nadat de GPU Runtime is opgestart ga je verder met onderstaande installatie van het ultralitics package.
⚠️ Bij deze waarschuwing
Warning: This notebook was not authored by Googleselecteer jeRun anyway.
!pip install ultralyticsimport glob
import json
import os
import shutil
from pprint import pprint
import kagglehub
import matplotlib.pyplot as plt
import pandas as pd
import yaml
from PIL import Image, ImageDraw
from ultralytics import YOLO
# Enable TensorBoard logging
!yolo settings datasets_dir=/content/taco
!yolo settings tensorboard=True⚠️ Om bovenstaande aanpassingen in werking te stellen moet je nu de notebook runtime herstarten
Dataset¶
# Download the dataset
path = kagglehub.dataset_download("vencerlanz09/taco-dataset-yolo-format")
print(path)# Copy the contents of the directory instead of moving it to avoid root permission issues
destination = "./taco"
if not os.path.exists(destination):
shutil.copytree(path, destination)
path = destination
print(path)# Update the meta data (see taco#1.ipynb)
with open(path + "/data.yaml") as f:
meta = yaml.safe_load(f)
# Update image paths
meta["train"] = "./train/images"
meta["val"] = "./valid/images"
meta["test"] = "./test/images"
# Swap Bottle cap and Bottle labels (see)
bottle_cap_idx = meta["names"].index("Bottle cap")
bottle_idx = meta["names"].index("Bottle")
meta["names"][bottle_cap_idx], meta["names"][bottle_idx] = (
meta["names"][bottle_idx],
meta["names"][bottle_cap_idx],
)
# write back to file
with open(path + "/data.yaml", "w") as f:
yaml.dump(meta, f)
pprint(meta)Training (hyper) Parameters¶
# Define training parameters
epochs = 30
batch_size = 32
imgsz = 640 # Image size
optimizer_type = "AdamW" # AdamW optimizer (recommended for better regularization)
lr = 1e-4
weight_decay = 1e-4Model Training¶
# Initialize YOLOv11 model (pre-trained weights)
model = YOLO("yolov8s.pt")
# check the architecture
# print(model)# Initialize YOLOv11 model (pre-trained weights)
model = YOLO("yolov8s.pt")Start Tensorboard voor live monitoring.
%load_ext tensorboard
%tensorboard --logdir /content/runs# Train model with Cosine Annealing learning rate scheduler
model.train(
data=path + "/data.yaml",
epochs=epochs,
batch=batch_size,
imgsz=imgsz,
optimizer=optimizer_type,
lr0=lr, # Initial learning rate
weight_decay=weight_decay,
save=True, # Save the best model
save_period=1, # Save model every 10 epochs
# val=True, # Evaluate on validation set
)best_model = YOLO("./runs/detect/train/weights/best.pt")
val_results = best_model.val()
print("Best Validation Metrics from Best Model:")
print(f"Precision: {val_results.box.mp:.4f}")
print(f"Recall: {val_results.box.mr:.4f}")
print(f"mAP@50: {val_results.box.map50:.4f}")
print(f"mAP@50-95: {val_results.box.map:.4f}")
log file testing¶
# Read the log file into a DataFrame
log_file = "./runs/detect/train/results.csv"
log_data = pd.read_csv(log_file)
# Check the first few rows of the data and column names
print(log_data.columns)
print(log_data.head())
Graphs¶
# Convert necessary columns to numeric
log_data["epoch"] = pd.to_numeric(log_data["epoch"], errors="coerce").astype(
int
) # Convert to integer
log_data["train/box_loss"] = pd.to_numeric(log_data["train/box_loss"], errors="coerce")
log_data["train/cls_loss"] = pd.to_numeric(log_data["train/cls_loss"], errors="coerce")
log_data["train/dfl_loss"] = pd.to_numeric(log_data["train/dfl_loss"], errors="coerce")
log_data["val/box_loss"] = pd.to_numeric(log_data["val/box_loss"], errors="coerce")
log_data["val/cls_loss"] = pd.to_numeric(log_data["val/cls_loss"], errors="coerce")
log_data["val/dfl_loss"] = pd.to_numeric(log_data["val/dfl_loss"], errors="coerce")
# Drop rows with NaN values in relevant columns
log_data = log_data.dropna(
subset=[
"epoch",
"train/box_loss",
"train/cls_loss",
"train/dfl_loss",
"val/box_loss",
"val/cls_loss",
"val/dfl_loss",
]
)
# Plot the training and validation losses
plt.figure(figsize=(12, 8))
# Plot training losses
plt.plot(
log_data["epoch"], log_data["train/box_loss"], label="Train Box Loss", linestyle="-", marker="o"
)
plt.plot(
log_data["epoch"],
log_data["train/cls_loss"],
label="Train Class Loss",
linestyle="-",
marker="x",
)
plt.plot(
log_data["epoch"], log_data["train/dfl_loss"], label="Train DFL Loss", linestyle="-", marker="s"
)
# Plot validation losses
plt.plot(
log_data["epoch"], log_data["val/box_loss"], label="Val Box Loss", linestyle="--", marker="o"
)
plt.plot(
log_data["epoch"], log_data["val/cls_loss"], label="Val Class Loss", linestyle="--", marker="x"
)
plt.plot(
log_data["epoch"], log_data["val/dfl_loss"], label="Val DFL Loss", linestyle="--", marker="s"
)
# Customize the plot
plt.title("Training and Validation Losses over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.xticks(log_data["epoch"]) # Ensure that the epoch ticks are shown as integers
plt.legend(loc="upper right")
# Show the plot
plt.show()
Validation results¶
results = model.val() # Evaluate on the validation set
print(f"Validation Results: {results}")print("Validation Results:")
print("Mean Precision:", results.box.mp) # Mean Precision
print("Mean Recall:", results.box.mr) # Mean Recall
print("mAP 50:", results.box.map50) # Mean Average Precision at IoU 0.5
print("mAP 50-95:", results.box.map) # Mean Average Precision at IoU 0.5-0.95
Validation Metrics plot¶
# Scalar values from results_dict
precision = results.results_dict["metrics/precision(B)"]
recall = results.results_dict["metrics/recall(B)"]
map50 = results.results_dict["metrics/mAP50(B)"]
map50_95 = results.results_dict["metrics/mAP50-95(B)"]
# Plotting single values (snapshot)
metrics = ["Precision", "Recall", "mAP50", "mAP50-95"]
values = [precision, recall, map50, map50_95]
plt.figure(figsize=(8, 6))
plt.bar(metrics, values, color=["b", "r", "g", "purple"])
plt.title("Model Evaluation Metrics")
plt.ylabel("Values")
plt.show()Testing¶
best_model = YOLO("./runs/detect/train/weights/best.pt")
test_results = best_model.val(data="data.yaml", split="test")# Print test metrics
print(f"Test Precision: {test_results.box.mp:.4f}")
print(f"Test Recall: {test_results.box.mr:.4f}")
print(f"Test mAP@50: {test_results.box.map50:.4f}")
print(f"Test mAP@50-95: {test_results.box.map:.4f}")# Function to parse ground truth annotations in YOLO format
def parse_annotation(annotation_path):
"""Parse a YOLO-style annotation file.
Extract the class IDs and bounding box information.
"""
if not os.path.exists(annotation_path):
print(f"Annotation file {annotation_path} not found.")
return [], [] # Return empty lists if annotation file is missing
with open(annotation_path, "r") as file:
lines = file.readlines()
labels = []
boxes = []
for line in lines:
parts = line.strip().split()
class_id = int(parts[0]) # Class ID
# YOLO format: class_id, x_center, y_center, width, height
box = [float(x) for x in parts[1:]] # Bounding box: [x_center, y_center, width, height]
labels.append(class_id)
boxes.append(box)
return labels, boxes
# Load the YOLO model
model = YOLO("./runs/detect/train/weights/best.pt")
# Path to the test images and corresponding labels (annotations)
test_image_dir = path + "/test/images/"
test_label_dir = path + "/test/labels/"
# Get the list of test images
test_images = glob.glob(
os.path.join(test_image_dir, "*.jpg")
) # Adjust for correct extension if needed
# Output directory to save inference results
output_dir = "./inference_results/"
os.makedirs(output_dir, exist_ok=True)
# Class names
class_names = [
"Aluminium foil",
"Bottle cap",
"Bottle",
"Broken glass",
"Can",
"Carton",
"Cigarette",
"Cup",
"Lid",
"Other litter",
"Other plastic",
"Paper",
"Plastic bag - wrapper",
"Plastic container",
"Pop tab",
"Straw",
"Styrofoam piece",
"Unlabeled litter",
]
# Loop through each image and perform inference
for img_path in test_images:
# Get the corresponding annotation file (in YOLO format)
annotation_path = os.path.join(
test_label_dir, os.path.basename(img_path).replace(".jpg", ".txt").replace(".JPG", ".txt")
)
# Perform inference without verbose output
results = model(img_path, verbose=False) # Run YOLOv8 on the image
# Get actual labels (ground truth) from annotation file
actual_labels, actual_boxes = parse_annotation(annotation_path)
actual_labels_names = [class_names[label] for label in actual_labels]
# Save the result image with predictions
img_name = os.path.basename(img_path)
result_img_path = os.path.join(output_dir, img_name)
results[0].save(result_img_path)
# Extract predicted labels and bounding boxes
if results[0].boxes is None or len(results[0].boxes.cls) == 0:
predicted_labels = ["No prediction"]
predicted_boxes = []
else:
predicted_labels = [results[0].names[int(cls)] for cls in results[0].boxes.cls]
predicted_boxes = (
results[0].boxes.xywh.cpu().numpy()
) # Ensure numpy format for further processing
# Open the original image for proper ground truth visualization
img_predicted = Image.open(result_img_path)
img_actual = Image.open(img_path) # Reload the original image for ground truth
# Create drawing objects
draw_predicted = ImageDraw.Draw(img_predicted)
draw_actual = ImageDraw.Draw(img_actual)
# Draw predicted bounding boxes (blue) on the predicted image
img_width, img_height = img_predicted.size
if len(predicted_boxes) == 0: # Check if predicted_boxes is empty
draw_predicted.text((10, 10), "No prediction", fill="red")
else:
for i, box in enumerate(predicted_boxes):
x_center, y_center, width, height = box
x1 = int((x_center - width / 2) * img_width)
y1 = int((y_center - height / 2) * img_height)
x2 = int((x_center + width / 2) * img_width)
y2 = int((y_center + height / 2) * img_height)
draw_predicted.rectangle([x1, y1, x2, y2], outline="blue", width=2)
draw_predicted.text(
(x1, y1),
predicted_labels[i] if i < len(predicted_labels) else "Unknown",
fill="blue",
)
# Draw ground truth bounding boxes (green) on the actual image
for i, box in enumerate(actual_boxes):
x_center, y_center, width, height = box
x1 = int((x_center - width / 2) * img_width)
y1 = int((y_center - height / 2) * img_height)
x2 = int((x_center + width / 2) * img_width)
y2 = int((y_center + height / 2) * img_height)
draw_actual.rectangle([x1, y1, x2, y2], outline="green", width=2)
draw_actual.text((x1, y1), actual_labels_names[i], fill="white")
# Display images side by side
fig, axes = plt.subplots(1, 2, figsize=(15, 7))
axes[0].imshow(img_predicted)
axes[0].set_title("\n".join(predicted_labels), fontsize=14, wrap=True)
axes[0].axis("off")
axes[1].imshow(img_actual)
axes[1].set_title("\n".join(actual_labels_names), fontsize=14, wrap=True)
axes[1].axis("off")
plt.show()