Complete YOLO Object Detection Code Guide
1. Introduction to YOLO
YOLO (You Only Look Once) is a state-of-the-art real-time object detection algorithm that revolutionized computer vision by treating object detection as a single regression problem. Unlike traditional methods that apply classifiers to different parts of an image, YOLO looks at the entire image once and predicts bounding boxes and class probabilities directly.
Key Advantages:
- Speed: Real-time detection (30+ FPS)
- Global Context: Sees entire image during training and testing
- Unified Architecture: Single neural network for end-to-end training
- Versatility: Works well across different object types
YOLO Evolution:
- YOLOv1 (2016): Original paper, 45 FPS
- YOLOv2/YOLO9000 (2016): Better accuracy, 40+ FPS
- YOLOv3 (2018): Multi-scale detection, Darknet-53
- YOLOv4 (2020): Improved accuracy and speed
- YOLOv5 (2020): PyTorch implementation, user-friendly
- YOLOv8 (2023): Latest Ultralytics version, best performance
2. YOLO Architecture Overview
Core Concept
YOLO divides an image into an S×S grid. Each grid cell predicts: - B bounding boxes (x, y, width, height, confidence) - C class probabilities
Network Architecture (YOLOv8)
Input Image (640×640×3)
↓
Backbone (CSPDarknet53)
↓
Neck (PANet)
↓
Head (Detection layers)
↓
Output (Predictions)
Loss Function Components:
- Localization Loss: Bounding box coordinate errors
- Confidence Loss: Object presence confidence
- Classification Loss: Class prediction errors
3. Setting Up the Environment
Prerequisites
# Python 3.8+
python --version
# Create virtual environment
python -m venv yolo_env
source yolo_env/bin/activate # Linux/Mac
# or
yolo_env\Scripts\activate # Windows
Install Dependencies
# Install PyTorch (check pytorch.org for your system)
pip install torch torchvision torchaudio
# Install Ultralytics YOLOv8
pip install ultralytics
# Additional dependencies
pip install opencv-python pillow matplotlib numpy pandas
pip install jupyter notebook # For interactive development
Verify Installation
import torch
import ultralytics
from ultralytics import YOLO
import cv2
import numpy as np
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Ultralytics version: {ultralytics.__version__}")
4. YOLOv8 Implementation
Basic Object Detection
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
# Load pre-trained model
= YOLO('yolov8n.pt') # nano version for speed
model # model = YOLO('yolov8s.pt') # small
# model = YOLO('yolov8m.pt') # medium
# model = YOLO('yolov8l.pt') # large
# model = YOLO('yolov8x.pt') # extra large
# Single image inference
def detect_objects(image_path):
"""
Detect objects in a single image
"""
= model(image_path)
results
# Process results
for result in results:
# Get bounding boxes, confidence scores, and class IDs
= result.boxes.xyxy.cpu().numpy() # x1, y1, x2, y2
boxes = result.boxes.conf.cpu().numpy()
confidences = result.boxes.cls.cpu().numpy()
class_ids
# Load image
= cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_rgb
# Draw bounding boxes
for i, (box, conf, cls_id) in enumerate(zip(boxes, confidences, class_ids)):
= map(int, box)
x1, y1, x2, y2 = model.names[int(cls_id)]
class_name
# Draw rectangle and label
0, 255, 0), 2)
cv2.rectangle(img_rgb, (x1, y1), (x2, y2), (f'{class_name}: {conf:.2f}',
cv2.putText(img_rgb, -10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
(x1, y1
return img_rgb, results
# Example usage
= 'path/to/your/image.jpg'
image_path = detect_objects(image_path)
detected_img, results
# Display results
=(12, 8))
plt.figure(figsize
plt.imshow(detected_img)'off')
plt.axis('YOLO Object Detection Results')
plt.title( plt.show()
Video Processing
def process_video(video_path, output_path=None):
"""
Process video for object detection
"""
= cv2.VideoCapture(video_path)
cap
# Get video properties
= int(cap.get(cv2.CAP_PROP_FPS))
fps = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
width = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
height
# Setup video writer if output path provided
if output_path:
= cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
out
while True:
= cap.read()
ret, frame if not ret:
break
# Run YOLO detection
= model(frame)
results
# Draw results on frame
= results[0].plot()
annotated_frame
# Save or display frame
if output_path:
out.write(annotated_frame)else:
'YOLO Detection', annotated_frame)
cv2.imshow(if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()if output_path:
out.release()
cv2.destroyAllWindows()
# Example usage
'input_video.mp4', 'output_video.mp4') process_video(
Real-time Webcam Detection
def real_time_detection():
"""
Real-time object detection from webcam
"""
= cv2.VideoCapture(0) # Use 0 for default camera
cap
while True:
= cap.read()
ret, frame if not ret:
break
# Run YOLO detection
= model(frame)
results
# Draw results
= results[0].plot()
annotated_frame
# Display frame
'Real-time YOLO Detection', annotated_frame)
cv2.imshow(
# Exit on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# Start real-time detection
real_time_detection()
5. Custom Dataset Training
Dataset Preparation
import os
import shutil
from pathlib import Path
def create_dataset_structure(base_path):
"""
Create YOLO dataset structure
"""
= [
paths 'train/images',
'train/labels',
'val/images',
'val/labels',
'test/images',
'test/labels'
]
for path in paths:
/ path).mkdir(parents=True, exist_ok=True)
Path(base_path
print(f"Dataset structure created at {base_path}")
# Create dataset structure
= Path('custom_dataset')
dataset_path create_dataset_structure(dataset_path)
Data Configuration File
# data.yaml
train: ../custom_dataset/train/images
val: ../custom_dataset/val/images
test: ../custom_dataset/test/images
nc: 3 # number of classes
names: ['person', 'car', 'bicycle'] # class names
Training Script
from ultralytics import YOLO
import torch
def train_custom_model():
"""
Train YOLO model on custom dataset
"""
# Load a pre-trained model
= YOLO('yolov8n.pt')
model
# Train the model
= model.train(
results ='data.yaml', # dataset config file
data=100, # number of training epochs
epochs=640, # image size
imgsz=16, # batch size
batch_size='cuda' if torch.cuda.is_available() else 'cpu',
device=4, # number of data loader workers
workers='runs/train', # project directory
project='custom_model', # experiment name
name=True, # save model checkpoints
save=10, # save checkpoint every N epochs
save_period=True, # cache images for faster training
cache=True, # use data augmentation
augment=0.01, # initial learning rate
lr0=0.0005, # weight decay
weight_decay=3, # warmup epochs
warmup_epochs=50, # early stopping patience
patience=True # verbose output
verbose
)
return results
# Start training
if __name__ == "__main__":
= train_custom_model()
results print("Training completed!")
Data Augmentation
# Custom augmentation configuration
= {
augmentation_config 'hsv_h': 0.015, # HSV-Hue augmentation
'hsv_s': 0.7, # HSV-Saturation augmentation
'hsv_v': 0.4, # HSV-Value augmentation
'degrees': 10.0, # rotation degrees
'translate': 0.1, # translation
'scale': 0.5, # scale
'shear': 2.0, # shear degrees
'perspective': 0.0, # perspective
'flipud': 0.0, # flip up-down probability
'fliplr': 0.5, # flip left-right probability
'mosaic': 1.0, # mosaic probability
'mixup': 0.1, # mixup probability
'copy_paste': 0.1 # copy-paste probability
}
6. Advanced Features
Model Validation and Metrics
def validate_model(model_path, data_config):
"""
Validate trained model and get metrics
"""
= YOLO(model_path)
model
# Validate the model
= model.val(
results =data_config,
data=640,
imgsz=16,
batch_size='cuda' if torch.cuda.is_available() else 'cpu',
device=True,
plots=True
save_json
)
# Print metrics
print(f"mAP50: {results.box.map50:.4f}")
print(f"mAP50-95: {results.box.map:.4f}")
print(f"Precision: {results.box.mp:.4f}")
print(f"Recall: {results.box.mr:.4f}")
return results
# Validate model
= validate_model('runs/train/custom_model/weights/best.pt', 'data.yaml') validation_results
Model Export and Optimization
def export_model(model_path, export_format='onnx'):
"""
Export model to different formats
"""
= YOLO(model_path)
model
# Export options
= {
export_formats 'onnx': model.export(format='onnx'),
'torchscript': model.export(format='torchscript'),
'tflite': model.export(format='tflite'),
'tensorrt': model.export(format='engine'), # TensorRT
'openvino': model.export(format='openvino'),
'coreml': model.export(format='coreml')
}
return export_formats[export_format]
# Export to ONNX
= export_model('runs/train/custom_model/weights/best.pt', 'onnx') onnx_model
Hyperparameter Tuning
def hyperparameter_tuning():
"""
Automated hyperparameter tuning
"""
= YOLO('yolov8n.pt')
model
# Tune hyperparameters
model.tune(='data.yaml',
data=30,
epochs=300,
iterations='AdamW',
optimizer=True,
plots=True
save
)
# Run hyperparameter tuning
hyperparameter_tuning()
7. Performance Optimization
Multi-GPU Training
def multi_gpu_training():
"""
Training with multiple GPUs
"""
if torch.cuda.device_count() > 1:
= YOLO('yolov8n.pt')
model
# Multi-GPU training
= model.train(
results ='data.yaml',
data=100,
epochs=640,
imgsz=32, # Increase batch size for multi-GPU
batch_size='0,1,2,3', # Specify GPU IDs
device=8
workers
)else:
print("Multiple GPUs not available")
multi_gpu_training()
Inference Optimization
import time
import numpy as np
def benchmark_model(model_path, test_images):
"""
Benchmark model performance
"""
= YOLO(model_path)
model
# Warm up
for _ in range(10):
'path/to/test/image.jpg')
model(
# Benchmark
= []
times for image_path in test_images:
= time.time()
start_time = model(image_path)
results = time.time()
end_time - start_time)
times.append(end_time
= np.mean(times)
avg_time = 1 / avg_time
fps
print(f"Average inference time: {avg_time:.4f} seconds")
print(f"FPS: {fps:.2f}")
return avg_time, fps
# Benchmark your model
= ['test1.jpg', 'test2.jpg', 'test3.jpg']
test_images = benchmark_model('yolov8n.pt', test_images) avg_time, fps
Memory Optimization
def memory_efficient_inference(model_path, image_path):
"""
Memory efficient inference for large images
"""
= YOLO(model_path)
model
# Process image in tiles for large images
def process_large_image(image_path, tile_size=640, overlap=0.1):
= cv2.imread(image_path)
img = img.shape[:2]
h, w
if h <= tile_size and w <= tile_size:
# Small image, process normally
return model(img)
# Split into tiles
= []
results = int(tile_size * (1 - overlap))
step
for y in range(0, h, step):
for x in range(0, w, step):
# Extract tile
= img[y:y+tile_size, x:x+tile_size]
tile
# Process tile
= model(tile)
tile_results
# Adjust coordinates
for result in tile_results:
if result.boxes is not None:
0, 2]] += x
result.boxes.xyxy[:, [1, 3]] += y
result.boxes.xyxy[:, [
results.extend(tile_results)
return results
return process_large_image(image_path)
# Process large image
= memory_efficient_inference('yolov8n.pt', 'large_image.jpg') large_image_results
8. Real-world Applications
Security Camera System
class SecuritySystem:
def __init__(self, model_path, camera_sources):
self.model = YOLO(model_path)
self.cameras = camera_sources
self.alerts = []
def monitor_cameras(self):
"""
Monitor multiple camera feeds
"""
for camera_id, source in self.cameras.items():
= cv2.VideoCapture(source)
cap
while True:
= cap.read()
ret, frame if not ret:
break
# Detect objects
= self.model(frame)
results
# Check for specific objects (e.g., person)
for result in results:
if result.boxes is not None:
= result.boxes.cls.cpu().numpy()
classes if 0 in classes: # Person detected
self.trigger_alert(camera_id, frame)
# Display frame
= results[0].plot()
annotated_frame f'Camera {camera_id}', annotated_frame)
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def trigger_alert(self, camera_id, frame):
"""
Trigger security alert
"""
= time.strftime("%Y-%m-%d %H:%M:%S")
timestamp = {
alert 'camera_id': camera_id,
'timestamp': timestamp,
'frame': frame
}self.alerts.append(alert)
print(f"ALERT: Person detected on Camera {camera_id} at {timestamp}")
# Setup security system
= {
cameras 'cam1': 0, # Webcam
'cam2': 'rtsp://camera2/stream', # IP camera
}= SecuritySystem('yolov8n.pt', cameras) security
Traffic Monitoring
class TrafficMonitor:
def __init__(self, model_path):
self.model = YOLO(model_path)
self.vehicle_count = 0
self.speed_violations = []
def analyze_traffic(self, video_path):
"""
Analyze traffic from video feed
"""
= cv2.VideoCapture(video_path)
cap
while True:
= cap.read()
ret, frame if not ret:
break
# Detect vehicles
= self.model(frame)
results
# Count vehicles
= [2, 3, 5, 7] # car, motorcycle, bus, truck
vehicle_classes = 0
current_vehicles
for result in results:
if result.boxes is not None:
= result.boxes.cls.cpu().numpy()
classes += sum(1 for cls in classes if cls in vehicle_classes)
current_vehicles
self.vehicle_count = max(self.vehicle_count, current_vehicles)
# Display results
= results[0].plot()
annotated_frame f'Vehicles: {current_vehicles}',
cv2.putText(annotated_frame, 10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
(
'Traffic Monitor', annotated_frame)
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
print(f"Maximum vehicles detected: {self.vehicle_count}")
# Monitor traffic
= TrafficMonitor('yolov8n.pt')
traffic_monitor 'traffic_video.mp4') traffic_monitor.analyze_traffic(
Quality Control System
class QualityControl:
def __init__(self, model_path):
self.model = YOLO(model_path)
self.defect_log = []
def inspect_products(self, image_paths):
"""
Inspect products for defects
"""
for image_path in image_paths:
= self.model(image_path)
results
# Analyze results for defects
= []
defects_found for result in results:
if result.boxes is not None:
= result.boxes.cls.cpu().numpy()
classes = result.boxes.conf.cpu().numpy()
confidences
for cls, conf in zip(classes, confidences):
if conf > 0.5: # Confidence threshold
= self.model.names[int(cls)]
defect_type
defects_found.append(defect_type)
# Log results
= {
inspection_result 'image': image_path,
'defects': defects_found,
'status': 'FAIL' if defects_found else 'PASS',
'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
}
self.defect_log.append(inspection_result)
print(f"Inspected {image_path}: {inspection_result['status']}")
return self.defect_log
# Quality control inspection
= QualityControl('custom_defect_model.pt')
qc = ['product1.jpg', 'product2.jpg', 'product3.jpg']
product_images = qc.inspect_products(product_images) inspection_results
Best Practices and Tips
Performance Tips
- Choose the right model size: Use YOLOv8n for speed, YOLOv8x for accuracy
- Optimize image size: Use 640x640 for balance, smaller for speed
- Use appropriate batch size: Maximize GPU utilization
- Enable model compilation: Use TorchScript or TensorRT for production
- Implement model caching: Load models once and reuse
Training Tips
- Data quality over quantity: Focus on high-quality, diverse training data
- Proper data augmentation: Use appropriate augmentations for your domain
- Monitor training metrics: Watch for overfitting and adjust accordingly
- Use transfer learning: Start with pre-trained weights
- Regular validation: Validate on held-out data during training
Deployment Tips
- Model versioning: Keep track of model versions and performance
- A/B testing: Test different models in production
- Monitoring: Track inference time and accuracy in production
- Fallback mechanisms: Have backup models for critical applications
- Documentation: Document model performance and limitations
This comprehensive guide covers the essential aspects of working with YOLO for object detection. Start with the basic implementations and gradually explore advanced features as your needs grow. Remember to always validate your models thoroughly before deploying them in production environments.