MobileNetV2 PyTorch Docker Deployment Guide
This guide walks you through deploying a pre-trained MobileNetV2 model using PyTorch and Docker, creating a REST API for image classification.
Project Structure
mobilenetv2-pytorch-docker/
├── app/
│ ├── __init__.py
│ ├── main.py
│ ├── model_handler.py
│ └── utils.py
├── requirements.txt
├── Dockerfile
├── docker-compose.yml
├── .dockerignore
└── README.md
1. Application Code
app/main.py
- FastAPI Application
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import uvicorn
import io
from PIL import Image
import numpy as np
from .model_handler import MobileNetV2Handler
from .utils import preprocess_image, decode_predictions
import logging
# Configure logging
=logging.INFO)
logging.basicConfig(level= logging.getLogger(__name__)
logger
= FastAPI(
app ="MobileNetV2 PyTorch Image Classification API",
title="Deploy MobileNetV2 using PyTorch for image classification",
description="1.0.0"
version
)
# Initialize model handler
= MobileNetV2Handler()
model_handler
@app.on_event("startup")
async def startup_event():
"""Load model on startup"""
try:
model_handler.load_model()"Model loaded successfully")
logger.info(except Exception as e:
f"Failed to load model: {e}")
logger.error(raise
@app.get("/")
async def root():
return {"message": "MobileNetV2 PyTorch Classification API", "status": "running"}
@app.get("/health")
async def health_check():
return {"status": "healthy", "model_loaded": model_handler.is_loaded()}
@app.post("/predict")
async def predict(file: UploadFile = File(...)):
"""
Predict image class using MobileNetV2
"""
if not file.content_type.startswith("image/"):
raise HTTPException(status_code=400, detail="File must be an image")
try:
# Read and preprocess image
= await file.read()
image_data = Image.open(io.BytesIO(image_data))
image
if image.mode != 'RGB':
= image.convert('RGB')
image
# Preprocess for MobileNetV2
= preprocess_image(image)
processed_image
# Make prediction
= model_handler.predict(processed_image)
predictions
# Decode predictions
= decode_predictions(predictions, top=5)
decoded_predictions
return JSONResponse(content={
"predictions": decoded_predictions,
"success": True
})
except Exception as e:
f"Prediction error: {e}")
logger.error(raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
@app.post("/batch_predict")
async def batch_predict(files: list[UploadFile] = File(...)):
"""
Batch prediction for multiple images
"""
if len(files) > 10: # Limit batch size
raise HTTPException(status_code=400, detail="Maximum 10 images allowed per batch")
= []
results
for file in files:
if not file.content_type.startswith("image/"):
results.append({"filename": file.filename,
"error": "File must be an image"
})continue
try:
= await file.read()
image_data = Image.open(io.BytesIO(image_data))
image
if image.mode != 'RGB':
= image.convert('RGB')
image
= preprocess_image(image)
processed_image = model_handler.predict(processed_image)
predictions = decode_predictions(predictions, top=3)
decoded_predictions
results.append({"filename": file.filename,
"predictions": decoded_predictions,
"success": True
})
except Exception as e:
results.append({"filename": file.filename,
"error": str(e),
"success": False
})
return JSONResponse(content={"results": results})
@app.get("/model_info")
async def model_info():
"""Get model information"""
return {
"model_name": "MobileNetV2",
"framework": "PyTorch",
"input_size": [224, 224],
"num_classes": 1000,
"pretrained": True
}
if __name__ == "__main__":
="0.0.0.0", port=8000) uvicorn.run(app, host
app/model_handler.py
- PyTorch Model Management
import torch
import torch.nn as nn
from torchvision import models
import numpy as np
import logging
= logging.getLogger(__name__)
logger
class MobileNetV2Handler:
def __init__(self):
self.model = None
self.device = None
self._loaded = False
def load_model(self):
"""Load pre-trained MobileNetV2 model"""
try:
"Loading MobileNetV2 PyTorch model...")
logger.info(
# Determine device (CPU/GPU)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
f"Using device: {self.device}")
logger.info(
# Load pre-trained MobileNetV2
self.model = models.mobilenet_v2(pretrained=True)
self.model.eval() # Set to evaluation mode
self.model.to(self.device)
# Warm up the model with a dummy prediction
= torch.randn(1, 3, 224, 224).to(self.device)
dummy_input with torch.no_grad():
= self.model(dummy_input)
_
self._loaded = True
"Model loaded and warmed up successfully")
logger.info(
except Exception as e:
f"Failed to load model: {e}")
logger.error(raise
def predict(self, image_tensor):
"""Make prediction on preprocessed image tensor"""
if not self._loaded:
raise RuntimeError("Model not loaded")
try:
# Ensure tensor is on correct device
if isinstance(image_tensor, np.ndarray):
= torch.from_numpy(image_tensor)
image_tensor
= image_tensor.to(self.device)
image_tensor
# Ensure batch dimension
if len(image_tensor.shape) == 3:
= image_tensor.unsqueeze(0)
image_tensor
# Make prediction
with torch.no_grad():
= self.model(image_tensor)
outputs # Apply softmax to get probabilities
= torch.nn.functional.softmax(outputs, dim=1)
probabilities
return probabilities.cpu().numpy()
except Exception as e:
f"Prediction failed: {e}")
logger.error(raise
def predict_batch(self, image_tensors):
"""Make batch predictions"""
if not self._loaded:
raise RuntimeError("Model not loaded")
try:
# Convert to tensor if numpy array
if isinstance(image_tensors, np.ndarray):
= torch.from_numpy(image_tensors)
image_tensors
= image_tensors.to(self.device)
image_tensors
# Make batch prediction
with torch.no_grad():
= self.model(image_tensors)
outputs = torch.nn.functional.softmax(outputs, dim=1)
probabilities
return probabilities.cpu().numpy()
except Exception as e:
f"Batch prediction failed: {e}")
logger.error(raise
def is_loaded(self):
"""Check if model is loaded"""
return self._loaded
def get_device(self):
"""Get current device"""
return str(self.device) if self.device else "not initialized"
app/utils.py
- Utility Functions
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
import json
import os
import requests
import logging
= logging.getLogger(__name__)
logger
# ImageNet class labels
= "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
IMAGENET_CLASSES_URL
def get_imagenet_classes():
"""Download and cache ImageNet class labels"""
try:
if os.path.exists("imagenet_classes.txt"):
with open("imagenet_classes.txt", "r") as f:
= [line.strip() for line in f.readlines()]
classes else:
"Downloading ImageNet class labels...")
logger.info(= requests.get(IMAGENET_CLASSES_URL)
response = response.text.strip().split('\n')
classes
# Cache the classes
with open("imagenet_classes.txt", "w") as f:
for class_name in classes:
f"{class_name}\n")
f.write(
return classes
except Exception as e:
f"Could not load ImageNet classes: {e}")
logger.warning(return [f"class_{i}" for i in range(1000)]
# Load ImageNet classes
= get_imagenet_classes()
IMAGENET_CLASSES
def preprocess_image(image: Image.Image, target_size=(224, 224)):
"""
Preprocess image for MobileNetV2 PyTorch model
"""
try:
# Define transforms
= transforms.Compose([
transform 256),
transforms.Resize(
transforms.CenterCrop(target_size),
transforms.ToTensor(),
transforms.Normalize(=[0.485, 0.456, 0.406], # ImageNet normalization
mean=[0.229, 0.224, 0.225]
std
)
])
# Apply transforms
= transform(image)
image_tensor
return image_tensor
except Exception as e:
raise ValueError(f"Image preprocessing failed: {e}")
def preprocess_batch(images: list, target_size=(224, 224)):
"""
Preprocess batch of images
"""
try:
= transforms.Compose([
transform 256),
transforms.Resize(
transforms.CenterCrop(target_size),
transforms.ToTensor(),
transforms.Normalize(=[0.485, 0.456, 0.406],
mean=[0.229, 0.224, 0.225]
std
)
])
= []
batch_tensors for image in images:
if isinstance(image, str): # If path
= Image.open(image).convert('RGB')
image elif not isinstance(image, Image.Image):
raise ValueError("Invalid image type")
= transform(image)
tensor
batch_tensors.append(tensor)
return torch.stack(batch_tensors)
except Exception as e:
raise ValueError(f"Batch preprocessing failed: {e}")
def decode_predictions(predictions, top=5):
"""
Decode model predictions to human-readable labels
"""
try:
# Get top predictions
if isinstance(predictions, torch.Tensor):
= predictions.numpy()
predictions
# Handle batch predictions (take first sample)
if len(predictions.shape) > 1:
= predictions[0]
predictions
# Get top k indices
= np.argsort(predictions)[-top:][::-1]
top_indices
# Format results
= []
results for idx in top_indices:
= float(predictions[idx])
confidence = IMAGENET_CLASSES[idx] if idx < len(IMAGENET_CLASSES) else f"class_{idx}"
class_name
results.append({"class_id": int(idx),
"class_name": class_name,
"confidence": confidence
})
return results
except Exception as e:
raise ValueError(f"Prediction decoding failed: {e}")
def validate_image(image_data):
"""
Validate image data
"""
try:
= Image.open(image_data)
image return image.format in ['JPEG', 'PNG', 'BMP', 'TIFF', 'WEBP']
except:
return False
def tensor_to_numpy(tensor):
"""Convert PyTorch tensor to numpy array"""
if isinstance(tensor, torch.Tensor):
return tensor.detach().cpu().numpy()
return tensor
def numpy_to_tensor(array, device='cpu'):
"""Convert numpy array to PyTorch tensor"""
if isinstance(array, np.ndarray):
return torch.from_numpy(array).to(device)
return array
class ModelProfiler:
"""Simple profiler for model performance"""
def __init__(self):
self.inference_times = []
self.preprocessing_times = []
def record_inference_time(self, time_ms):
self.inference_times.append(time_ms)
def record_preprocessing_time(self, time_ms):
self.preprocessing_times.append(time_ms)
def get_stats(self):
if not self.inference_times:
return {"message": "No inference data recorded"}
return {
"avg_inference_time_ms": np.mean(self.inference_times),
"avg_preprocessing_time_ms": np.mean(self.preprocessing_times) if self.preprocessing_times else 0,
"total_inferences": len(self.inference_times),
"min_inference_time_ms": np.min(self.inference_times),
"max_inference_time_ms": np.max(self.inference_times)
}
# Global profiler instance
= ModelProfiler() profiler
app/__init__.py
# Empty file to make app a Python package
2. Configuration Files
requirements.txt
fastapi==0.104.1
uvicorn[standard]==0.24.0
torch==2.1.0
torchvision==0.16.0
Pillow==10.1.0
python-multipart==0.0.6
numpy==1.24.3 requests==2.31.0
Dockerfile
# Use official Python runtime as base image
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
\
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgomp1 \
wget \
curl && rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Install PyTorch CPU version (smaller image)
RUN pip install --no-cache-dir --upgrade pip && \
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app/ ./app/
# Create non-root user for security
RUN useradd --create-home --shell /bin/bash app && \
chown -R app:app /app
USER app
# Pre-download ImageNet classes
RUN python -c "from app.utils import get_imagenet_classes; get_imagenet_classes()"
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Command to run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
Dockerfile.gpu
(For GPU Support)
# Use NVIDIA PyTorch base image
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
\
curl \
wget && rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Install additional dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app/ ./app/
# Create non-root user for security
RUN useradd --create-home --shell /bin/bash app && \
chown -R app:app /app
USER app
# Pre-download ImageNet classes
RUN python -c "from app.utils import get_imagenet_classes; get_imagenet_classes()"
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Command to run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
docker-compose.yml
version: '3.8'
services:
mobilenetv2-pytorch-api:
build: .
ports:
- "8000:8000"
environment:
- PYTHONPATH=/app
- TORCH_HOME=/app/.torch
volumes:
- ./logs:/app/logs
- torch_cache:/app/.torch
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 1G
# GPU version (uncomment and modify as needed)
# mobilenetv2-pytorch-gpu:
# build:
# context: .
# dockerfile: Dockerfile.gpu
# ports:
# - "8000:8000"
# environment:
# - PYTHONPATH=/app
# - TORCH_HOME=/app/.torch
# - NVIDIA_VISIBLE_DEVICES=all
# volumes:
# - ./logs:/app/logs
# - torch_cache:/app/.torch
# restart: unless-stopped
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# Optional: Add nginx for production
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- mobilenetv2-pytorch-api
restart: unless-stopped
volumes:
torch_cache:
.dockerignore
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env/
pip-log.txt
pip-delete-this-directory.txt
.git
.gitignore
README.md
.pytest_cache
.coverage
.nyc_output
node_modules
.DS_Store
*.log
logs/
*.pth
*.pt
.torch/
nginx.conf
(Optional - for production)
events {
worker_connections 1024;
}
http {
upstream api {
server mobilenetv2-pytorch-api:8000;
}
server {
listen 80;
client_max_body_size 10M;
location / {
proxy_pass http://api;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_send_timeout 300;
}
}
}
3. Deployment Commands
Build and Run with Docker
# Build the CPU image
docker build -t mobilenetv2-pytorch-api .
# Build the GPU image (if you have NVIDIA GPU)
docker build -f Dockerfile.gpu -t mobilenetv2-pytorch-gpu .
# Run CPU version
docker run -p 8000:8000 mobilenetv2-pytorch-api
# Run GPU version
docker run --gpus all -p 8000:8000 mobilenetv2-pytorch-gpu
# Run with environment variables
docker run -p 8000:8000 -e TORCH_HOME=/tmp/.torch mobilenetv2-pytorch-api
Using Docker Compose
# Build and start services
docker-compose up --build
# Run in background
docker-compose up -d
# View logs
docker-compose logs -f mobilenetv2-pytorch-api
# Stop services
docker-compose down
4. Usage Examples
Test the API
# Health check
curl http://localhost:8000/health
# Model info
curl http://localhost:8000/model_info
# Single image prediction
curl -X POST "http://localhost:8000/predict" \
-H "accept: application/json" \
-H "Content-Type: multipart/form-data" \
-F "file=@path/to/your/image.jpg"
# Batch prediction
curl -X POST "http://localhost:8000/batch_predict" \
-H "accept: application/json" \
-H "Content-Type: multipart/form-data" \
-F "files=@image1.jpg" \
-F "files=@image2.jpg"
Python Client Example
import requests
import json
# Single prediction
def predict_image(image_path, api_url="http://localhost:8000"):
= f"{api_url}/predict"
url = {"file": open(image_path, "rb")}
files = requests.post(url, files=files)
response return response.json()
# Batch prediction
def predict_batch(image_paths, api_url="http://localhost:8000"):
= f"{api_url}/batch_predict"
url = [("files", open(path, "rb")) for path in image_paths]
files = requests.post(url, files=files)
response return response.json()
# Usage
= predict_image("cat.jpg")
result print(json.dumps(result, indent=2))
= predict_batch(["cat.jpg", "dog.jpg"])
batch_result print(json.dumps(batch_result, indent=2))
Response Format
{
"predictions": [
{
"class_id": 281,
"class_name": "tabby",
"confidence": 0.8234567
},
{
"class_id": 282,
"class_name": "tiger_cat",
"confidence": 0.1234567
}
],
"success": true
}
5. Performance Optimization
Model Optimization
# Add to model_handler.py for optimization
import torch.jit
class OptimizedMobileNetV2Handler(MobileNetV2Handler):
def __init__(self, use_jit=True, use_half_precision=False):
super().__init__()
self.use_jit = use_jit
self.use_half_precision = use_half_precision
def load_model(self):
super().load_model()
if self.use_jit:
# TorchScript compilation for faster inference
self.model = torch.jit.script(self.model)
"Model compiled with TorchScript")
logger.info(
if self.use_half_precision and self.device.type == 'cuda':
# Half precision for GPU
self.model = self.model.half()
"Model converted to half precision") logger.info(
Docker Optimization
# Multi-stage build for smaller image
FROM python:3.11-slim as builder
WORKDIR /app
COPY requirements.txt .
RUN pip install --user --no-cache-dir -r requirements.txt
FROM python:3.11-slim
WORKDIR /app
COPY --from=builder /root/.local /root/.local
COPY app/ ./app/
# Make sure scripts in .local are usable
ENV PATH=/root/.local/bin:$PATH
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
6. Monitoring and Logging
Enhanced Logging
# Add to main.py
import time
from app.utils import profiler
@app.middleware("http")
async def log_requests(request, call_next):
= time.time()
start_time = await call_next(request)
response = (time.time() - start_time) * 1000
process_time
f"{request.method} {request.url.path} - {process_time:.2f}ms")
logger.info(
if request.url.path == "/predict":
profiler.record_inference_time(process_time)
return response
@app.get("/stats")
async def get_stats():
"""Get performance statistics"""
return profiler.get_stats()
7. Cloud Deployment
AWS ECS Task Definition
{
"family": "mobilenetv2-pytorch-task",
"networkMode": "awsvpc",
"requiresCompatibilities": ["FARGATE"],
"cpu": "1024",
"memory": "2048",
"containerDefinitions": [
{
"name": "mobilenetv2-pytorch-api",
"image": "your-registry/mobilenetv2-pytorch-api:latest",
"portMappings": [
{
"containerPort": 8000,
"protocol": "tcp"
}
],
"environment": [
{
"name": "TORCH_HOME",
"value": "/tmp/.torch"
}
],
"essential": true,
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/mobilenetv2-pytorch",
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "ecs"
}
}
}
]
}
Kubernetes Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: mobilenetv2-pytorch-api
spec:
replicas: 3
selector:
matchLabels:
app: mobilenetv2-pytorch-api
template:
metadata:
labels:
app: mobilenetv2-pytorch-api
spec:
containers:
- name: mobilenetv2-pytorch-api
image: mobilenetv2-pytorch-api:latest
ports:
- containerPort: 8000
env:
- name: TORCH_HOME
value: /tmp/.torch
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
volumeMounts:
- name: torch-cache
mountPath: /tmp/.torch
volumes:
- name: torch-cache
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: mobilenetv2-pytorch-service
spec:
selector:
app: mobilenetv2-pytorch-api
ports:
- protocol: TCP
port: 80
targetPort: 8000
type: LoadBalancer
This PyTorch-based guide provides the same functionality as the TensorFlow version but uses PyTorch’s ecosystem, including torchvision for pre-trained models, PyTorch transformations for preprocessing, and proper tensor handling throughout the application.