0% found this document useful (0 votes)
14 views3 pages

Lab (Bounding Box)

Deep Learning

Uploaded by

enssifan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views3 pages

Lab (Bounding Box)

Deep Learning

Uploaded by

enssifan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

11/27/24, 8:29 AM Untitled6.

ipynb - Colab

Bounding Box Predictions is a fundamental task in object detection where the model predicts the coordinates of a rectangular box enclosing
an object in an image .

Objective

Train a Convolutional Neural Network (CNN) to predict bounding box coordinates for a single object in an image.

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt

Purpose: These libraries support dataset loading, neural network construction, and visualization.

2. Dataset Preparation

We will use the MNIST dataset, which contains images of handwritten digits, and generate bounding boxes around each digit.

class MNISTWithBoundingBoxes:
def __init__(self, train=True):
self.dataset = datasets.MNIST(
root='./data',
train=train,
download=True,
transform=transforms.ToTensor()
)

def __getitem__(self, idx):


img, label = self.dataset[idx]
img_np = img.squeeze(0).numpy() # Convert to numpy for bbox calculation

# Calculate the bounding box for the digit


rows, cols = np.where(img_np > 0)
y_min, x_min = rows.min(), cols.min()
y_max, x_max = rows.max(), cols.max()

# Normalize the bounding box coordinates


bbox = torch.tensor([x_min / 28, y_min / 28, x_max / 28, y_max / 28], dtype=torch.float32)

return img, label, bbox

def __len__(self):
return len(self.dataset)

# Initialize DataLoaders
train_dataset = MNISTWithBoundingBoxes(train=True)
test_dataset = MNISTWithBoundingBoxes(train=False)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

3. Define the Model

The model takes the image as input and outputs the bounding box coordinates: [ 𝑥 min , 𝑦 min , 𝑥 max , 𝑦 max ] [x min​,y min​,x max​,y max​].

class BoundingBoxModel(nn.Module):
def __init__(self):
super(BoundingBoxModel, self).__init__()
self.backbone = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(32 * 7 * 7, 128),
nn.ReLU(),

https://colab.research.google.com/drive/1uyLIsxk740OFLmhYmj27Jfeqk9PgdLkC#scrollTo=bYJ61DzZpw-E&printMode=true 1/3
11/27/24, 8:29 AM Untitled6.ipynb - Colab
nn.Linear(128, 4) # 4 outputs: [x_min, y_min, x_max, y_max]
)

def forward(self, x):


features = self.backbone(x)
bbox = self.fc(features)
return bbox

# Initialize the model


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BoundingBoxModel().to(device)

4. Define the Loss Function and Optimizer

Bounding box prediction uses Mean Squared Error (MSE) as the loss function since it involves regression.

criterion = nn.MSELoss() # Loss for bounding box regression


optimizer = optim.Adam(model.parameters(), lr=0.001) # Optimizer

5. Train the Model

# Training Loop
epochs = 5
for epoch in range(epochs):
model.train()
total_loss = 0
for imgs, _, bboxes in train_loader:
imgs, bboxes = imgs.to(device), bboxes.to(device)

# Forward pass
pred_bboxes = model(imgs)

# Compute loss
loss = criterion(pred_bboxes, bboxes)

# Backpropagation and optimization


optimizer.zero_grad()
loss.backward()
optimizer.step()

total_loss += loss.item()

print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")

6.Test the Model

Evaluate the model on unseen test data to check its performance.

# Testing the Model


model.eval()
with torch.no_grad():
for imgs, _, bboxes in test_loader:
imgs, bboxes = imgs.to(device), bboxes.to(device)

pred_bboxes = model(imgs)

# Print the first sample's predicted and actual bounding box


print("Predicted BBox:", pred_bboxes[0].cpu().numpy())
print("Ground Truth BBox:", bboxes[0].cpu().numpy())
break

7. Visualize Results

Visualize the bounding box predictions for better understanding.

def visualize_bbox(img, bbox, pred_bbox=None):


img = img.squeeze(0).numpy() # Convert tensor to numpy for plotting
plt.imshow(img, cmap='gray')

# Ground truth bounding box


x_min, y_min, x_max, y_max = bbox
x min y min x max y max = x min * 28 y min * 28 x max * 28 y max * 28
https://colab.research.google.com/drive/1uyLIsxk740OFLmhYmj27Jfeqk9PgdLkC#scrollTo=bYJ61DzZpw-E&printMode=true 2/3
11/27/24, 8:29 AM Untitled6.ipynb - Colab
x_min, y_min, x_max, y_max = x_min 28, y_min 28, x_max 28, y_max 28
plt.gca().add_patch(plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, edgecolor='green', facecolor='none', lw=2, label="Gro

# Predicted bounding box (if provided)


if pred_bbox is not None:
px_min, py_min, px_max, py_max = pred_bbox
px_min, py_min, px_max, py_max = px_min * 28, py_min * 28, px_max * 28, py_max * 28
plt.gca().add_patch(plt.Rectangle((px_min, py_min), px_max - px_min, py_max - py_min, edgecolor='red', facecolor='none', lw=2, la

plt.legend()
plt.show()

# Visualize a sample
img, _, bbox = train_dataset[0]
pred_bbox = model(img.unsqueeze(0).to(device)).cpu().detach().numpy()[0]
visualize_bbox(img, bbox.numpy(), pred_bbox)

Expected Results

Predicted Bounding Box: Model predicts bounding box coordinates for a digit in the test set.

Ground Truth Bounding Box: Actual bounding box coordinates for comparison. Visualization:

Green box: Ground truth.

Red box: Prediction.

https://colab.research.google.com/drive/1uyLIsxk740OFLmhYmj27Jfeqk9PgdLkC#scrollTo=bYJ61DzZpw-E&printMode=true 3/3

You might also like