0% found this document useful (0 votes)

14 views38 pages

BLDD VIT ResNet50v2 CustomCNN

The document outlines a comprehensive approach to training a Vision Transformer (ViT) model for banana disease recognition using a dataset of images. It includes data preprocessing, model training, validation, and evaluation steps, along with visualizations of training metrics and a confusion matrix for test results. The model achieves high accuracy in classifying various banana diseases based on the provided images.

Uploaded by

shibilbasith4u

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views38 pages

BLDD VIT ResNet50v2 CustomCNN

Uploaded by

shibilbasith4u

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 38

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from torchinfo import summary
from torchvision.models import vit_b_16, ViT_B_16_Weights
from sklearn.metrics import accuracy_score, confusion_matrix
import os
from pathlib import Path
from tqdm.auto import tqdm
from collections import OrderedDict
import random
import warnings

warnings.filterwarnings("ignore")

# Setting image path and getting list of images

IMAGE_PATH =
Path("/kaggle/input/banana-disease-recognition-dataset/Banana Disease
Recognition Dataset/Original Images/Original Images")
IMAGE_PATH_LIST = list(IMAGE_PATH.glob("*/*.jpg"))
print(f'Total Images = {len(IMAGE_PATH_LIST)}')

classes = sorted(os.listdir(IMAGE_PATH))
print('==' * 20)
print(' ' * 10, f'Total Classes = {len(classes)}')
print('==' * 20)

for c in classes:
total_images_class = list(Path(os.path.join(IMAGE_PATH,
c)).glob("*.jpg"))
print(f'* {c}: {len(total_images_class)} images')

# Visualize a few sample images

NUM_IMAGES = 3
fig, ax = plt.subplots(nrows=len(classes), ncols=NUM_IMAGES,
figsize=(10, 30))
p = 0
for c in classes:
total_images_class = list(Path(os.path.join(IMAGE_PATH,
c)).glob("*.jpg"))
images_selected = random.choices(total_images_class, k=NUM_IMAGES)
for i, img_path in enumerate(images_selected):
img_bgr = cv2.imread(str(img_path))
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
ax[p, i].imshow(img_rgb)
ax[p, i].axis('off')
ax[p, i].set_title(f'Class: {c}\nShape: {img_rgb.shape}')
p += 1
fig.tight_layout()
fig.show()

# Prepare dataframe with image paths and labels

images_path = []
labels = []

for img_path in IMAGE_PATH_LIST:

images_path.append(img_path)
labels.append(img_path.parent.stem)

df_path_and_label = pd.DataFrame({'path': images_path, 'label':

labels})
print(df_path_and_label.head())

# Split the data

SEED = 42
df_train, df_rest = train_test_split(df_path_and_label, test_size=0.3,
random_state=SEED, stratify=df_path_and_label["label"])
df_valid, df_test = train_test_split(df_rest, test_size=0.5,
random_state=SEED, stratify=df_rest["label"])

# Define label mapping: class string to numerical value

label_map = dict(zip(classes, range(len(classes))))
print(label_map)

# Define transforms from ViT_B_16_Weights

weights = ViT_B_16_Weights.DEFAULT
auto_transforms = weights.transforms()
print(auto_transforms)

# Updated Custom dataset class: precompute label mapping during

initialization
class CustomImageDataset(Dataset):
def __init__(self, df: pd.DataFrame, label_map: dict, transforms):
# Use a copy and precompute numeric labels
df = df.copy()
df["numeric_label"] = df["label"].map(label_map)
self.df = df.reset_index(drop=True)
self.transforms = transforms

def __len__(self):
return len(self.df)

def getitem(self, idx):

image_path = self.df.iloc[idx, 0]
label = self.df.iloc[idx]["numeric_label"]
image = Image.open(image_path).convert("RGB")
image = self.transforms(image)
return image, label

# Create dataset instances

train_dataset = CustomImageDataset(df_train, label_map,
auto_transforms)
valid_dataset = CustomImageDataset(df_valid, label_map,
auto_transforms)

# Use fewer workers for DataLoader (e.g., 4)

BATCH_SIZE = 8
NUM_WORKERS = 4

train_dataloader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
valid_dataloader = DataLoader(dataset=valid_dataset,
batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

# Visualize a batch
batch_images, batch_labels = next(iter(train_dataloader))
print("Batch images shape:", batch_images.shape, "and Batch labels
shape:", batch_labels.shape)

# Check GPU availability and set device

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'Using device: {device}')

# Load ViT-16 model and modify last layer for current number of
classes
model = vit_b_16(weights=weights)
summary(model=model, input_size=[8, 3, 224, 224], col_width=15,
col_names=["input_size", "output_size", "num_params", "trainable"],
row_settings=["var_names"])

for param in model.conv_proj.parameters():

param.requires_grad = False
for param in model.encoder.parameters():
param.requires_grad = False

# Confirm parameters have been frozen

summary(model=model, input_size=[8, 3, 224, 224], col_width=15,
col_names=["input_size", "output_size", "num_params", "trainable"],
row_settings=["var_names"])

output_shape = len(classes)
model.heads = nn.Sequential(OrderedDict([('head',
nn.Linear(in_features=768, out_features=output_shape))]))
summary(model=model, input_size=[8, 3, 224, 224], col_width=15,
col_names=["input_size", "output_size", "num_params", "trainable"],
row_settings=["var_names"])

# Define loss and optimizer

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

def train_step(model: torch.nn.Module, dataloader:

torch.utils.data.DataLoader,
loss_fn: torch.nn.Module, optimizer:
torch.optim.Optimizer):
model.train()
train_loss = 0.
train_accuracy = 0.
for _, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
y_pred_logit = model(X)
loss = loss_fn(y_pred_logit, y)
train_loss += loss.item()
loss.backward()
optimizer.step()
y_pred_prob = torch.softmax(y_pred_logit, dim=1)
y_pred_class = torch.argmax(y_pred_prob, dim=1)
train_accuracy += accuracy_score(y.cpu().numpy(),
y_pred_class.detach().cpu().numpy())
train_loss = train_loss / len(dataloader)
train_accuracy = train_accuracy / len(dataloader)
return train_loss, train_accuracy

def save_checkpoint(filename, model, epoch, loss, optimizer, metric):

state = {'filename': filename,
'model': model.state_dict(),
'epoch': epoch,
'loss': loss,
'optimizer': optimizer.state_dict(),
'metric': metric}
torch.save(state, filename)

def valid_step(model: torch.nn.Module, dataloader:

torch.utils.data.DataLoader,
loss_fn: torch.nn.Module):
model.eval()
valid_loss = 0.
valid_accuracy = 0.
with torch.inference_mode():
for _, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
y_pred_logit = model(X)
loss = loss_fn(y_pred_logit, y)
valid_loss += loss.item()
y_pred_prob = torch.softmax(y_pred_logit, dim=1)
y_pred_class = torch.argmax(y_pred_prob, dim=1)
valid_accuracy += accuracy_score(y.cpu().numpy(),
y_pred_class.detach().cpu().numpy())
valid_loss = valid_loss / len(dataloader)
valid_accuracy = valid_accuracy / len(dataloader)
return valid_loss, valid_accuracy

def train(model: torch.nn.Module, train_dataloader:

torch.utils.data.DataLoader,
valid_dataloader: torch.utils.data.DataLoader, loss_fn:
torch.nn.Module,
optimizer: torch.optim.Optimizer, epochs: int = 10):
results = {"train_loss": [], "train_accuracy": [], "valid_loss":
[], "valid_accuracy": []}
best_valid_loss = float('inf')
for epoch in tqdm(range(epochs)):
train_loss, train_accuracy = train_step(model=model,
dataloader=train_dataloader, loss_fn=loss_fn, optimizer=optimizer)
valid_loss, valid_accuracy = valid_step(model=model,
dataloader=valid_dataloader, loss_fn=loss_fn)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
save_checkpoint("best_model.pth", model, epoch,
best_valid_loss, optimizer, valid_accuracy)
print(f'Epoch: {epoch + 1} | Train Loss: {train_loss:.4f} |
Train Accuracy: {train_accuracy:.4f} | Valid Loss: {valid_loss:.4f} |
Valid Accuracy: {valid_accuracy:.4f}')
results["train_loss"].append(train_loss)
results["train_accuracy"].append(train_accuracy)
results["valid_loss"].append(valid_loss)
results["valid_accuracy"].append(valid_accuracy)
return results

# Training parameters and seeds

EPOCHS = 100
torch.cuda.manual_seed(SEED)
torch.manual_seed(SEED)
MODEL_RESULTS = train(model, train_dataloader, valid_dataloader,
loss_fn, optimizer, EPOCHS)

def loss_metric_curve_plot(model_results: dict):

train_loss = model_results["train_loss"]
valid_loss = model_results["valid_loss"]
train_accuracy = [float(value) for value in
model_results["train_accuracy"]]
valid_accuracy = [float(value) for value in
model_results["valid_accuracy"]]
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
axes = axes.flat
axes[0].plot(train_loss, color="red", label="Train")
axes[0].plot(valid_loss, color="blue", label="Valid",
linestyle='--')
axes[0].spines["top"].set_visible(False)
axes[0].spines["right"].set_visible(False)
axes[0].set_title("CrossEntropyLoss", fontsize=12,
fontweight="bold", color="black")
axes[0].set_xlabel("Epochs", fontsize=10, fontweight="bold",
color="black")
axes[0].set_ylabel("Loss", fontsize=10, fontweight="bold",
color="black")
axes[0].legend()
axes[1].plot(train_accuracy, color="red", label="Train")
axes[1].plot(valid_accuracy, color="blue", label="Valid",
linestyle='--')
axes[1].spines["top"].set_visible(False)
axes[1].spines["right"].set_visible(False)
axes[1].set_title("Accuracy", fontsize=12, fontweight="bold",
color="black")
axes[1].set_xlabel("Epochs", fontsize=10, fontweight="bold",
color="black")
axes[1].set_ylabel("Score", fontsize=10, fontweight="bold",
color="black")
axes[1].legend()
fig.tight_layout()
fig.show()

loss_metric_curve_plot(MODEL_RESULTS)

def predictions(test_dataloader: torch.utils.data.DataLoader):

checkpoint = torch.load("best_model.pth")
loaded_model = vit_b_16(weights=weights)
loaded_model.heads = nn.Sequential(OrderedDict([('head',
nn.Linear(in_features=768, out_features=output_shape))]))
loaded_model.load_state_dict(checkpoint["model"])
loaded_model.to(device)
loaded_model.eval()
y_pred_test = []
with torch.inference_mode():
for X, _ in tqdm(test_dataloader):
X = X.to(device)
y_pred_logit = loaded_model(X)
y_pred_prob = torch.softmax(y_pred_logit, dim=1)
y_pred_class = torch.argmax(y_pred_prob, dim=1)
y_pred_test.append(y_pred_class.detach().cpu())
y_pred_test = torch.cat(y_pred_test)
return y_pred_test
# Prepare test DataLoader
test_dataset = CustomImageDataset(df_test, label_map, auto_transforms)
test_dataloader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
y_pred_test = predictions(test_dataloader)
print(f'Accuracy Test =
{round(accuracy_score(df_test["label"].map(label_map),
y_pred_test.numpy()), 4)}')

confusion_matrix_test =
confusion_matrix(df_test["label"].map(label_map), y_pred_test.numpy())
fig, ax = plt.subplots(figsize=(10, 4.5))
sns.heatmap(confusion_matrix_test, cmap='Oranges', annot=True,
annot_kws={"fontsize": 9, "fontweight": "bold"},
linewidths=1.2, fmt=' ', linecolor="white", square=True,
xticklabels=classes, yticklabels=classes,
cbar=False, ax=ax)
ax.set_title("Confusion Matrix Test", fontsize=15, fontweight="bold",
color="darkblue")
ax.tick_params('x', rotation=90)
fig.show()

Total Images = 408

========================================
Total Classes = 7
========================================
* Banana Black Sigatoka Disease: 67 images
* Banana Bract Mosaic Virus Disease: 50 images
* Banana Healthy Leaf: 86 images
* Banana Insect Pest Disease: 86 images
* Banana Moko Disease: 55 images
* Banana Panama Disease: 41 images
* Banana Yellow Sigatoka Disease: 23 images
path
label
0 /kaggle/input/banana-disease-recognition-datas... Banana Panama
Disease
1 /kaggle/input/banana-disease-recognition-datas... Banana Panama
Disease
2 /kaggle/input/banana-disease-recognition-datas... Banana Panama
Disease
3 /kaggle/input/banana-disease-recognition-datas... Banana Panama
Disease
4 /kaggle/input/banana-disease-recognition-datas... Banana Panama
Disease
{'Banana Black Sigatoka Disease': 0, 'Banana Bract Mosaic Virus
Disease': 1, 'Banana Healthy Leaf': 2, 'Banana Insect Pest Disease':
3, 'Banana Moko Disease': 4, 'Banana Panama Disease': 5, 'Banana
Yellow Sigatoka Disease': 6}
ImageClassification(
crop_size=[224]
resize_size=[256]
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
interpolation=InterpolationMode.BILINEAR
)
Batch images shape: torch.Size([8, 3, 224, 224]) and Batch labels
shape: torch.Size([8])
Using device: cuda

Downloading: "https://download.pytorch.org/models/vit_b_16-
c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-
c867db91.pth
100%|██████████| 330M/330M [00:01<00:00, 210MB/s]

{"model_id":"e97a19f7213640598339fe3c82118a19","version_major":2,"vers
ion_minor":0}

Epoch: 1 | Train Loss: 0.7721 | Train Accuracy: 0.7569 | Valid Loss:

{"model_id":"3fbdaf5bc856475697b142497520467b","version_major":2,"vers
ion_minor":0}

Accuracy Test = 0.9516

from sklearn.metrics import accuracy_score, precision_score,
recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Map ground truth labels to numerical values

true_labels = df_test["label"].map(label_map).values
predicted_labels = y_pred_test.numpy()

# Calculate overall accuracy

accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Test Accuracy: {accuracy:.4f}")
# Calculate precision, recall, and f1-score with different averaging
methods
precision_macro = precision_score(true_labels, predicted_labels,
average='macro')
recall_macro = recall_score(true_labels, predicted_labels,
average='macro')
f1_macro = f1_score(true_labels, predicted_labels, average='macro')

precision_micro = precision_score(true_labels, predicted_labels,

average='micro')
recall_micro = recall_score(true_labels, predicted_labels,
average='micro')
f1_micro = f1_score(true_labels, predicted_labels, average='micro')

precision_weighted = precision_score(true_labels, predicted_labels,

average='weighted')
recall_weighted = recall_score(true_labels, predicted_labels,
average='weighted')
f1_weighted = f1_score(true_labels, predicted_labels,
average='weighted')

print("Metric Scores (Macro Average):")

print(f"Precision: {precision_macro:.4f}")
print(f"Recall: {recall_macro:.4f}")
print(f"F1 Score: {f1_macro:.4f}")

print("\nMetric Scores (Micro Average):")

print(f"Precision: {precision_micro:.4f}")
print(f"Recall: {recall_micro:.4f}")
print(f"F1 Score: {f1_micro:.4f}")

print("\nMetric Scores (Weighted Average):")

print(f"Precision: {precision_weighted:.4f}")
print(f"Recall: {recall_weighted:.4f}")
print(f"F1 Score: {f1_weighted:.4f}")

# Detailed classification report

print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels,
target_names=classes))

# Compute and visualize the confusion matrix

cm = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(10, 4.5))
sns.heatmap(cm, cmap='Oranges', annot=True, annot_kws={"fontsize": 9,
"fontweight": "bold"},
linewidths=1.2, fmt='d', linecolor="white", square=True,
xticklabels=classes, yticklabels=classes, cbar=False)
plt.title("Confusion Matrix Test", fontsize=15, fontweight="bold",
color="darkblue")
plt.xlabel("Predicted Classes", fontsize=12, fontweight="bold",
color="black")
plt.ylabel("True Classes", fontsize=12, fontweight="bold",
color="black")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

Test Accuracy: 0.9516

Metric Scores (Macro Average):
Precision: 0.9280
Recall: 0.9604
F1 Score: 0.9403

Metric Scores (Micro Average):

Precision: 0.9516
Recall: 0.9516
F1 Score: 0.9516

Metric Scores (Weighted Average):

Precision: 0.9562
Recall: 0.9516
F1 Score: 0.9518

Classification Report:
precision recall f1-score
support

Banana Black Sigatoka Disease 0.89 0.80 0.84

10
Banana Bract Mosaic Virus Disease 1.00 1.00 1.00
8
Banana Healthy Leaf 1.00 1.00 1.00
13
Banana Insect Pest Disease 1.00 0.92 0.96
13
Banana Moko Disease 1.00 1.00 1.00
9
Banana Panama Disease 0.86 1.00 0.92
6
Banana Yellow Sigatoka Disease 0.75 1.00 0.86
3

accuracy 0.95
62
macro avg 0.93 0.96 0.94
62
weighted avg 0.96 0.95 0.95
62

# 1. ResNet50V2 Implementation
# Import necessary libraries
import torch
from torch import nn
import torchvision.models as models
from collections import OrderedDict

# Create ResNet50 model

resnet_model =
models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# Freeze base layers

for param in resnet_model.parameters():
param.requires_grad = False

# Modify the final layer for our classification task

in_features = resnet_model.fc.in_features
resnet_model.fc = nn.Linear(in_features, len(classes))
resnet_model = resnet_model.to(device)
# Define optimizer for ResNet50
resnet_optimizer = optim.Adam(resnet_model.parameters(), lr=0.001)

# 2. Custom CNN Implementation

class CustomCNN(nn.Module):
def __init__(self, num_classes):
super(CustomCNN, self).__init__()

# Feature extractor - convolutional layers

self.features = nn.Sequential(
# First block
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),

# Second block
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),

# Third block
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),

# Fourth block
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)

# Classifier - fully connected layers

self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(256 * 14 * 14, 512), # For 224x224 input images
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)

def forward(self, x):

x = self.features(x)
x = self.classifier(x)
return x
# Create Custom CNN model
cnn_model = CustomCNN(len(classes))
cnn_model = cnn_model.to(device)

# Define optimizer for Custom CNN

cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# 3. Train ResNet50V2 model

print("=" * 50)
print("Training ResNet50V2 model")
print("=" * 50)

RESNET_RESULTS = train(resnet_model, train_dataloader,

valid_dataloader, loss_fn, resnet_optimizer, EPOCHS)

# 4. Train Custom CNN model

print("=" * 50)
print("Training Custom CNN model")
print("=" * 50)

CNN_RESULTS = train(cnn_model, train_dataloader, valid_dataloader,

loss_fn, cnn_optimizer, EPOCHS)

# 5. Evaluate ResNet50V2 on test set

def evaluate_model(model, test_dataloader, model_name):
model.eval()
y_pred_test = []
with torch.inference_mode():
for X, _ in tqdm(test_dataloader):
X = X.to(device)
y_pred_logit = model(X)
y_pred_prob = torch.softmax(y_pred_logit, dim=1)
y_pred_class = torch.argmax(y_pred_prob, dim=1)
y_pred_test.append(y_pred_class.detach().cpu())
y_pred_test = torch.cat(y_pred_test)

accuracy = accuracy_score(df_test["label"].map(label_map),
y_pred_test.numpy())
print(f'{model_name} Test Accuracy = {round(accuracy, 4)}')

conf_matrix = confusion_matrix(df_test["label"].map(label_map),
y_pred_test.numpy())
fig, ax = plt.subplots(figsize=(10, 4.5))
sns.heatmap(conf_matrix, cmap='Blues', annot=True,
annot_kws={"fontsize": 9, "fontweight": "bold"},
linewidths=1.2, fmt=' ', linecolor="white",
square=True,
xticklabels=classes, yticklabels=classes, cbar=False,
ax=ax)
ax.set_title(f"{model_name} Confusion Matrix", fontsize=15,
fontweight="bold", color="darkblue")
ax.tick_params('x', rotation=90)
plt.tight_layout()
plt.show()

return accuracy, y_pred_test

# Evaluate ResNet50V2
resnet_accuracy, resnet_preds = evaluate_model(resnet_model,
test_dataloader, "ResNet50V2")

# Evaluate Custom CNN

cnn_accuracy, cnn_preds = evaluate_model(cnn_model, test_dataloader,
"Custom CNN")

# 6. Compare all models' performance

vit_accuracy = accuracy_score(df_test["label"].map(label_map),
y_pred_test.numpy())

# Plot comparison
model_names = ['ViT-B/16', 'ResNet50V2', 'Custom CNN']
accuracies = [vit_accuracy, resnet_accuracy, cnn_accuracy]

plt.figure(figsize=(10, 6))
bars = plt.bar(model_names, accuracies, color=['orange', 'skyblue',
'lightgreen'])

# Add accuracy values on top of bars

for bar, acc in zip(bars, accuracies):
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
f'{acc:.4f}', ha='center', fontweight='bold')

plt.title('Model Performance Comparison', fontsize=15,

fontweight='bold')
plt.ylabel('Test Accuracy', fontsize=12)
plt.ylim(0, max(accuracies) + 0.1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 7. Plot training curves for all models

plt.figure(figsize=(15, 10))

# Training Loss
plt.subplot(2, 2, 1)
plt.plot(MODEL_RESULTS["train_loss"], 'o-', color='orange',
label='ViT')
plt.plot(RESNET_RESULTS["train_loss"], 'o-', color='skyblue',
label='ResNet50V2')
plt.plot(CNN_RESULTS["train_loss"], 'o-', color='lightgreen',
label='Custom CNN')
plt.title("Training Loss", fontsize=12, fontweight="bold")
plt.xlabel("Epochs", fontsize=10)
plt.ylabel("Loss", fontsize=10)
plt.legend()
plt.grid(True, alpha=0.3)

# Validation Loss
plt.subplot(2, 2, 2)
plt.plot(MODEL_RESULTS["valid_loss"], 'o-', color='orange',
label='ViT')
plt.plot(RESNET_RESULTS["valid_loss"], 'o-', color='skyblue',
label='ResNet50V2')
plt.plot(CNN_RESULTS["valid_loss"], 'o-', color='lightgreen',
label='Custom CNN')
plt.title("Validation Loss", fontsize=12, fontweight="bold")
plt.xlabel("Epochs", fontsize=10)
plt.ylabel("Loss", fontsize=10)
plt.legend()
plt.grid(True, alpha=0.3)

# Training Accuracy
plt.subplot(2, 2, 3)
plt.plot(MODEL_RESULTS["train_accuracy"], 'o-', color='orange',
label='ViT')
plt.plot(RESNET_RESULTS["train_accuracy"], 'o-', color='skyblue',
label='ResNet50V2')
plt.plot(CNN_RESULTS["train_accuracy"], 'o-', color='lightgreen',
label='Custom CNN')
plt.title("Training Accuracy", fontsize=12, fontweight="bold")
plt.xlabel("Epochs", fontsize=10)
plt.ylabel("Accuracy", fontsize=10)
plt.legend()
plt.grid(True, alpha=0.3)

# Validation Accuracy
plt.subplot(2, 2, 4)
plt.plot(MODEL_RESULTS["valid_accuracy"], 'o-', color='orange',
label='ViT')
plt.plot(RESNET_RESULTS["valid_accuracy"], 'o-', color='skyblue',
label='ResNet50V2')
plt.plot(CNN_RESULTS["valid_accuracy"], 'o-', color='lightgreen',
label='Custom CNN')
plt.title("Validation Accuracy", fontsize=12, fontweight="bold")
plt.xlabel("Epochs", fontsize=10)
plt.ylabel("Accuracy", fontsize=10)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

Downloading: "https://download.pytorch.org/models/resnet50-
11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-
11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 178MB/s]

==================================================
Training ResNet50V2 model
==================================================

{"model_id":"949cdceee7e340a38b38db2cf219f103","version_major":2,"vers
ion_minor":0}

Epoch: 1 | Train Loss: 1.5718 | Train Accuracy: 0.4806 | Valid Loss:

{"model_id":"afb7427359e84896b6b6ead4b9b8de68","version_major":2,"vers
ion_minor":0}

Epoch: 1 | Train Loss: 18.4033 | Train Accuracy: 0.3090 | Valid Loss:

{"model_id":"d1e86ab2cc074314b9cf63fce9314e88","version_major":2,"vers
ion_minor":0}

ResNet50V2 Test Accuracy = 0.9032

{"model_id":"caa2e57f58bb4cc2bfedf6effcb7930b","version_major":2,"vers
ion_minor":0}

Custom CNN Test Accuracy = 0.629

from sklearn.metrics import accuracy_score, precision_score,
recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Convert ground truth labels and model predictions to numpy arrays

true_labels = df_test["label"].map(label_map).values

# ----------------------------- ResNet50V2 Evaluation

-----------------------------
resnet_pred = resnet_preds.numpy()

print("ResNet50V2 Evaluation Metrics:")

accuracy_resnet = accuracy_score(true_labels, resnet_pred)
precision_resnet = precision_score(true_labels, resnet_pred,
average="macro")
recall_resnet = recall_score(true_labels, resnet_pred,
average="macro")
f1_resnet = f1_score(true_labels, resnet_pred, average="macro")

print(f"Accuracy: {accuracy_resnet:.4f}")
print(f"Precision: {precision_resnet:.4f}")
print(f"Recall: {recall_resnet:.4f}")
print(f"F1 Score: {f1_resnet:.4f}")
print("\nClassification Report:")
print(classification_report(true_labels, resnet_pred,
target_names=classes))

cm_resnet = confusion_matrix(true_labels, resnet_pred)

plt.figure(figsize=(10, 4.5))
sns.heatmap(cm_resnet, cmap="Blues", annot=True,
annot_kws={"fontsize":9, "fontweight":"bold"},
linewidths=1.2, fmt="d", linecolor="white", square=True,
xticklabels=classes, yticklabels=classes, cbar=False)
plt.title("ResNet50V2 Confusion Matrix", fontsize=15,
fontweight="bold", color="darkblue")
plt.xlabel("Predicted Classes", fontsize=12, fontweight="bold",
color="black")
plt.ylabel("True Classes", fontsize=12, fontweight="bold",
color="black")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

# ----------------------------- Custom CNN Evaluation

-----------------------------
cnn_pred = cnn_preds.numpy()

print("Custom CNN Evaluation Metrics:")

accuracy_cnn = accuracy_score(true_labels, cnn_pred)
precision_cnn = precision_score(true_labels, cnn_pred,
average="macro")
recall_cnn = recall_score(true_labels, cnn_pred, average="macro")
f1_cnn = f1_score(true_labels, cnn_pred, average="macro")

print(f"Accuracy: {accuracy_cnn:.4f}")
print(f"Precision: {precision_cnn:.4f}")
print(f"Recall: {recall_cnn:.4f}")
print(f"F1 Score: {f1_cnn:.4f}")
print("\nClassification Report:")
print(classification_report(true_labels, cnn_pred,
target_names=classes))

cm_cnn = confusion_matrix(true_labels, cnn_pred)

plt.figure(figsize=(10, 4.5))
sns.heatmap(cm_cnn, cmap="Blues", annot=True, annot_kws={"fontsize":9,
"fontweight":"bold"},
linewidths=1.2, fmt="d", linecolor="white", square=True,
xticklabels=classes, yticklabels=classes, cbar=False)
plt.title("Custom CNN Confusion Matrix", fontsize=15,
fontweight="bold", color="darkblue")
plt.xlabel("Predicted Classes", fontsize=12, fontweight="bold",
color="black")
plt.ylabel("True Classes", fontsize=12, fontweight="bold",
color="black")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

ResNet50V2 Evaluation Metrics:

Accuracy: 0.9032
Precision: 0.8813
Recall: 0.9144
F1 Score: 0.8923

Classification Report:
precision recall f1-score
support

Banana Black Sigatoka Disease 0.88 0.70 0.78

10
Banana Bract Mosaic Virus Disease 0.89 1.00 0.94
8
Banana Healthy Leaf 1.00 1.00 1.00
13
Banana Insect Pest Disease 0.92 0.92 0.92
13
Banana Moko Disease 0.88 0.78 0.82
9
Banana Panama Disease 0.86 1.00 0.92
6
Banana Yellow Sigatoka Disease 0.75 1.00 0.86
3

accuracy 0.90
62
macro avg 0.88 0.91 0.89
62
weighted avg 0.91 0.90 0.90
62
Custom CNN Evaluation Metrics:
Accuracy: 0.6290
Precision: 0.6514
Recall: 0.6237
F1 Score: 0.6073

Classification Report:
precision recall f1-score
support

Banana Black Sigatoka Disease 0.43 0.30 0.35

10
Banana Bract Mosaic Virus Disease 0.40 0.75 0.52
8
Banana Healthy Leaf 0.76 1.00 0.87
13
Banana Insect Pest Disease 1.00 0.54 0.70
13
Banana Moko Disease 0.80 0.44 0.57
9
Banana Panama Disease 0.50 0.67 0.57
6
Banana Yellow Sigatoka Disease 0.67 0.67 0.67
3
accuracy 0.63
62
macro avg 0.65 0.62 0.61
62
weighted avg 0.69 0.63 0.62
62

Lean IT Enabling and Sustaining Your Lean Transformation
100% (6)
Lean IT Enabling and Sustaining Your Lean Transformation
372 pages
Debugging Essay
100% (1)
Debugging Essay
9 pages
Skill 7
No ratings yet
Skill 7
11 pages
Code
No ratings yet
Code
4 pages
Val
No ratings yet
Val
9 pages
CV Lab Final AwaisKhan EE A
No ratings yet
CV Lab Final AwaisKhan EE A
7 pages
Lab 9
No ratings yet
Lab 9
29 pages
Assignment 3 DS5620
No ratings yet
Assignment 3 DS5620
11 pages
Notebook - Agave Plant Maturation Model Inference and Testing
No ratings yet
Notebook - Agave Plant Maturation Model Inference and Testing
7 pages
Softmax Regression Mnist
No ratings yet
Softmax Regression Mnist
3 pages
CIFAR - 10 - Dataset - Using - CNN - Aniiiii - HTML
No ratings yet
CIFAR - 10 - Dataset - Using - CNN - Aniiiii - HTML
8 pages
MobileNetV2 Code
No ratings yet
MobileNetV2 Code
3 pages
Ccnet Only
No ratings yet
Ccnet Only
6 pages
Week 7 - Mnist-Mlp
No ratings yet
Week 7 - Mnist-Mlp
7 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
10 pages
Presentation
No ratings yet
Presentation
31 pages
PyTorch Made Easy A Quick Overview
No ratings yet
PyTorch Made Easy A Quick Overview
55 pages
Lab 2-Image-Classification-Using-NNs
No ratings yet
Lab 2-Image-Classification-Using-NNs
6 pages
NN From Scratch
No ratings yet
NN From Scratch
5 pages
HW4ML Project Code
No ratings yet
HW4ML Project Code
24 pages
Document 2
No ratings yet
Document 2
8 pages
Assignment3 AL
No ratings yet
Assignment3 AL
23 pages
MLP Pytorch Sigmoid Mse
No ratings yet
MLP Pytorch Sigmoid Mse
20 pages
MLP Pytorch Softmax Crossentr
No ratings yet
MLP Pytorch Softmax Crossentr
20 pages
Keras
No ratings yet
Keras
4 pages
Intro To Pytorch
No ratings yet
Intro To Pytorch
12 pages
Predicting Stock Prices With Deep Neural Networks 1723509410
No ratings yet
Predicting Stock Prices With Deep Neural Networks 1723509410
5 pages
Train
No ratings yet
Train
4 pages
Credit Card Clustering Autoencoder
No ratings yet
Credit Card Clustering Autoencoder
6 pages
Train
No ratings yet
Train
13 pages
Transfer Learning For Image Classification in Pytorch
No ratings yet
Transfer Learning For Image Classification in Pytorch
13 pages
Dinushasan Courseproject04: Sign in
No ratings yet
Dinushasan Courseproject04: Sign in
19 pages
This Python Script Implements A Single
No ratings yet
This Python Script Implements A Single
6 pages
Adaline SGD
No ratings yet
Adaline SGD
4 pages
Experiment No 13 Final
No ratings yet
Experiment No 13 Final
9 pages
NN From Scratch PDF 1735495327
No ratings yet
NN From Scratch PDF 1735495327
19 pages
Mlp-Fromscratch Sigmoid-Mse
No ratings yet
Mlp-Fromscratch Sigmoid-Mse
13 pages
AM19 ADL U-Net-Model
No ratings yet
AM19 ADL U-Net-Model
37 pages
VGG16 Estudo
No ratings yet
VGG16 Estudo
10 pages
Big Data Assignment - 7
No ratings yet
Big Data Assignment - 7
7 pages
Fibercablelength Understanding
No ratings yet
Fibercablelength Understanding
5 pages
Assignment No 13
No ratings yet
Assignment No 13
3 pages
Debugging
No ratings yet
Debugging
9 pages
Training Code
No ratings yet
Training Code
27 pages
Deep Learning With PyTorch 1
No ratings yet
Deep Learning With PyTorch 1
1 page
Wild Fire CNN Accuracy 95
No ratings yet
Wild Fire CNN Accuracy 95
15 pages
Softmax Regression Scratch
No ratings yet
Softmax Regression Scratch
5 pages
Keras - Datasets Keras - Datasets: "X - Train Shape" "Y - Train Shape" "X - Test Shape" "Y - Test Shape"
No ratings yet
Keras - Datasets Keras - Datasets: "X - Train Shape" "Y - Train Shape" "X - Test Shape" "Y - Test Shape"
6 pages
CNN SVM
No ratings yet
CNN SVM
4 pages
Train Py
No ratings yet
Train Py
4 pages
ML Code Analysis
No ratings yet
ML Code Analysis
6 pages
Effects of Batches - Jupyter Notebook
No ratings yet
Effects of Batches - Jupyter Notebook
73 pages
HW4ML Project Starter Code Template
No ratings yet
HW4ML Project Starter Code Template
6 pages
DLV Lab Manual Print
No ratings yet
DLV Lab Manual Print
29 pages
Hand Written
No ratings yet
Hand Written
13 pages
6 Neural Network
No ratings yet
6 Neural Network
4 pages
361 Project Code
No ratings yet
361 Project Code
10 pages
Raw Nitex
No ratings yet
Raw Nitex
5 pages
PES1PG24CS018 Debjit DLTP Assignment-2 BERT Report
No ratings yet
PES1PG24CS018 Debjit DLTP Assignment-2 BERT Report
10 pages
Deep Learning
No ratings yet
Deep Learning
46 pages
ANN Detection Technique
No ratings yet
ANN Detection Technique
20 pages
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
Title: Echoes of The Forgotten World: Starfire
No ratings yet
Title: Echoes of The Forgotten World: Starfire
2 pages
Explainable Ai Driven Rainfall Prediction Using DL
No ratings yet
Explainable Ai Driven Rainfall Prediction Using DL
66 pages
Weather Report
No ratings yet
Weather Report
7 pages
Huffman Coding
No ratings yet
Huffman Coding
9 pages
Image Compression
No ratings yet
Image Compression
11 pages
Color Fundamentals
No ratings yet
Color Fundamentals
12 pages
STM32 Ipod Iphone Accessories Library - Presentation v0.2
No ratings yet
STM32 Ipod Iphone Accessories Library - Presentation v0.2
27 pages
WEB ASSA ABLOY ALCEA Telecom Industry Brochure
No ratings yet
WEB ASSA ABLOY ALCEA Telecom Industry Brochure
8 pages
Building Blocks of Visual Hierarchy
No ratings yet
Building Blocks of Visual Hierarchy
39 pages
Control Panel vs. Settings
No ratings yet
Control Panel vs. Settings
20 pages
Top 50 Microservices Interview Questions
No ratings yet
Top 50 Microservices Interview Questions
16 pages
C.V Sandu Cristian
No ratings yet
C.V Sandu Cristian
6 pages
Lab 02
No ratings yet
Lab 02
4 pages
CS504 GRAND QUIZ MEGA FILE (VUAnswer - Com) - 1
No ratings yet
CS504 GRAND QUIZ MEGA FILE (VUAnswer - Com) - 1
40 pages
Researchdemo 1
No ratings yet
Researchdemo 1
11 pages
Long TMK
No ratings yet
Long TMK
2 pages
LABEX3
No ratings yet
LABEX3
10 pages
Practicing Netiquettes
No ratings yet
Practicing Netiquettes
21 pages
Library Manager Manual Version 2.30 Human Font
No ratings yet
Library Manager Manual Version 2.30 Human Font
26 pages
Living in The IT Era Module 1 - Introduction To Information and Communication
No ratings yet
Living in The IT Era Module 1 - Introduction To Information and Communication
8 pages
Template Bus Scenario
No ratings yet
Template Bus Scenario
5 pages
Learning Objectives: While Loops: Explain Loop Syntax
No ratings yet
Learning Objectives: While Loops: Explain Loop Syntax
10 pages
Charles Bruyerre Resume
No ratings yet
Charles Bruyerre Resume
1 page
Online Cloud Engineering Bootcamp - QuickStart
No ratings yet
Online Cloud Engineering Bootcamp - QuickStart
8 pages
What's The Main Benefit of A Three-Tier Architecture?
No ratings yet
What's The Main Benefit of A Three-Tier Architecture?
2 pages
Declarative & Programmatic Security
No ratings yet
Declarative & Programmatic Security
12 pages
Ctwist: Circumferential Tread Wear Imaging System
No ratings yet
Ctwist: Circumferential Tread Wear Imaging System
2 pages
Aadhaar Verification Authentication
No ratings yet
Aadhaar Verification Authentication
61 pages
WK 4 Data Flow Diagram (DFD)
No ratings yet
WK 4 Data Flow Diagram (DFD)
26 pages
Cross Functional Requirements: (MM-Module)
No ratings yet
Cross Functional Requirements: (MM-Module)
3 pages
Slurm Guide
No ratings yet
Slurm Guide
78 pages
Mid Term Exam Questioner
No ratings yet
Mid Term Exam Questioner
4 pages
IBM Power10 Scale-Out L2 Quiz - Attempt Review
No ratings yet
IBM Power10 Scale-Out L2 Quiz - Attempt Review
13 pages
Veracity VSPAN-PLUS-12 VIEWSPAN-Plus-12 Datasheet DV1.5EN
No ratings yet
Veracity VSPAN-PLUS-12 VIEWSPAN-Plus-12 Datasheet DV1.5EN
4 pages

BLDD VIT ResNet50v2 CustomCNN

Uploaded by

BLDD VIT ResNet50v2 CustomCNN

Uploaded by

import pandas as pd

# Setting image path and getting list of images

# Visualize a few sample images

# Prepare dataframe with image paths and labels

for img_path in IMAGE_PATH_LIST:

df_path_and_label = pd.DataFrame({'path': images_path, 'label':

# Split the data

# Define label mapping: class string to numerical value

# Define transforms from ViT_B_16_Weights

# Updated Custom dataset class: precompute label mapping during

def __getitem__(self, idx):

# Create dataset instances

# Use fewer workers for DataLoader (e.g., 4)

# Check GPU availability and set device

for param in model.conv_proj.parameters():

# Confirm parameters have been frozen

# Define loss and optimizer

def train_step(model: torch.nn.Module, dataloader:

def save_checkpoint(filename, model, epoch, loss, optimizer, metric):

def valid_step(model: torch.nn.Module, dataloader:

def train(model: torch.nn.Module, train_dataloader:

# Training parameters and seeds

def loss_metric_curve_plot(model_results: dict):

def predictions(test_dataloader: torch.utils.data.DataLoader):

Total Images = 408

Epoch: 1 | Train Loss: 0.7721 | Train Accuracy: 0.7569 | Valid Loss:

Accuracy Test = 0.9516

# Map ground truth labels to numerical values

# Calculate overall accuracy

precision_micro = precision_score(true_labels, predicted_labels,

precision_weighted = precision_score(true_labels, predicted_labels,

print("Metric Scores (Macro Average):")

print("\nMetric Scores (Micro Average):")

print("\nMetric Scores (Weighted Average):")

# Detailed classification report

# Compute and visualize the confusion matrix

Test Accuracy: 0.9516

Metric Scores (Micro Average):

Metric Scores (Weighted Average):

Banana Black Sigatoka Disease 0.89 0.80 0.84

# Create ResNet50 model

# Freeze base layers

# Modify the final layer for our classification task

# 2. Custom CNN Implementation

# Feature extractor - convolutional layers

# Classifier - fully connected layers

def forward(self, x):

# Define optimizer for Custom CNN

# 3. Train ResNet50V2 model

RESNET_RESULTS = train(resnet_model, train_dataloader,

# 4. Train Custom CNN model

CNN_RESULTS = train(cnn_model, train_dataloader, valid_dataloader,

# 5. Evaluate ResNet50V2 on test set

return accuracy, y_pred_test

# Evaluate Custom CNN

# 6. Compare all models' performance

# Add accuracy values on top of bars

plt.title('Model Performance Comparison', fontsize=15,

# 7. Plot training curves for all models

Epoch: 1 | Train Loss: 1.5718 | Train Accuracy: 0.4806 | Valid Loss:

Epoch: 1 | Train Loss: 18.4033 | Train Accuracy: 0.3090 | Valid Loss:

ResNet50V2 Test Accuracy = 0.9032

Custom CNN Test Accuracy = 0.629

# Convert ground truth labels and model predictions to numpy arrays

# ----------------------------- ResNet50V2 Evaluation

print("ResNet50V2 Evaluation Metrics:")

cm_resnet = confusion_matrix(true_labels, resnet_pred)

# ----------------------------- Custom CNN Evaluation

print("Custom CNN Evaluation Metrics:")

cm_cnn = confusion_matrix(true_labels, cnn_pred)

ResNet50V2 Evaluation Metrics:

Banana Black Sigatoka Disease 0.88 0.70 0.78

Banana Black Sigatoka Disease 0.43 0.30 0.35

You might also like

def getitem(self, idx):