0% found this document useful (0 votes)
8 views

Debugging

This document contains code for training a siamese neural network model for audio-visual matching. It loads audio and image embeddings, creates training/test/validation datasets, defines the siamese network architecture, trains the model using triplet loss, and evaluates the trained model on a validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views

Debugging

This document contains code for training a siamese neural network model for audio-visual matching. It loads audio and image embeddings, creates training/test/validation datasets, defines the siamese network architecture, trains the model using triplet loss, and evaluates the trained model on a validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 9

# %%

from classes import *


from functions import *
import pickle
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

# %%
from unidecode import unidecode

# %%
audio_files = os.listdir('datasets/audio')

with open('datasets/audio_embeddings.pickle', 'rb') as file:


audio_embeddings = pickle.load(file)

audio_names = [key.split('/')[0] for key in audio_embeddings.keys()]

audio_df = pd.DataFrame([{'name': key.split('/')[0], 'audio_embedding': value} for


key, value in audio_embeddings.items()])

audio_df['name'] = audio_df['name'].apply(unidecode)

audio_df['audio_embedding'] = audio_df['audio_embedding'].apply(lambda x:
x/np.linalg.norm(x))

# %%
with open('datasets/image_embeddings.pickle', 'rb') as file:
image_embeddings = pickle.load(file)

image_df = pd.DataFrame([{'name': key.split('/')[0], 'image_embedding': value} for


key, value in image_embeddings.items()])

image_df['name'] = image_df['name'].apply(unidecode)

# %%
matches_df = pd.merge(image_df, audio_df, on='name', how='outer')

matches_df.columns = ['name', 'anchor', 'positive']

# %%
coincidences = 0
negatives = []
for i, row in matches_df.iterrows():
while True:
sample = matches_df.sample(n=1)
sample.reset_index(inplace=True, drop=True)
if sample['name'][0] != row['name']:
negatives.append(sample['positive'][0])
break
else:
coincidences += 1

matches_df['negative'] = negatives

# %%
train_set, test_set , _, _2 = train_test_split(matches_df, matches_df['name'],
test_size=0.2)
test_set, validation_set, _, _2 = train_test_split(test_set, test_set['name'],
test_size=0.5)

# %%

# %% [markdown]
# ### Siamese Network Model With Triplet Loss Training

# %%
siamese_model = SiameseNetwork([512, 192], [[256, 512, 256], [256, 512, 256]], 256)

# %%
training_triplet_dataset = TripletDataset(train_set)
testing_triplet_dataset = TripletDataset(test_set)
validation_triplet_dataset = TripletDataset(validation_set)

# %%
train_triplet_dataloader = DataLoader(training_triplet_dataset, batch_size=32,
shuffle=True)
test_triplet_dataloader = DataLoader(testing_triplet_dataset, batch_size=32,
shuffle=True)
validation_triplet_dataloader = DataLoader(validation_triplet_dataset,
batch_size=32, shuffle=True)

# %% [markdown]
# ##### Training with Early Stopping

# %%
optimizer = optim.SGD(siamese_model.parameters(), lr=0.1)
# optimizer = optim.Adam(siamese_model.parameters(), lr=0.001)
epochs = 150

# %%
training_losses = []
testing_losses = []
early_stopping_indicators = 0
for epoch in range(epochs):

total_loss = 0.0
total_testing_loss = 0.0
training_batches = 0
testing_batches = 0
for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
training_batches += 1
optimizer.zero_grad()
loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
loss.backward()
optimizer.step()
total_loss += loss.item()

# validation_losses.append(validation_loss)

for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


testing_batches += 1
testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
total_testing_loss += testing_loss.item()

total_training_loss_per_batch = total_loss/training_batches
training_losses.append(total_training_loss_per_batch)
total_testing_loss_per_batch = total_testing_loss/testing_batches
testing_losses.append(total_testing_loss_per_batch)
print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
if (epoch > 20) and (np.mean(testing_losses[-20:-10] < np.mean(testing_losses[-
10:]))):
print('Early stopping')
break

# %% [markdown]
# ##### A little extra training

# %%
# for epoch in range(epochs):
# for epoch in range(50, 100):
# total_loss = 0.0
# total_testing_loss = 0.0
# training_batches = 0
# testing_batches = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
# training_batches += 1
# optimizer.zero_grad()
# loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
# loss.backward()
# optimizer.step()
# total_loss += loss.item()

# # validation_losses.append(validation_loss)

# for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


# testing_batches += 1
# testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
# total_testing_loss += testing_loss.item()

# total_training_loss_per_batch = total_loss/training_batches
# training_losses.append(total_training_loss_per_batch)
# total_testing_loss_per_batch = total_testing_loss/testing_batches
# testing_losses.append(total_testing_loss_per_batch)
# print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
# if (epoch > 20) and (np.mean(testing_losses[-20:-10] <
np.mean(testing_losses[-10:]))):
# print('Early stopping')
# break

# # %%

# %%

epoch_list = [i+1 for i in range(epoch + 1)]

# %%
plt.plot(epoch_list, training_losses, label='Training Loss')
plt.plot(epoch_list, testing_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

ia = identification_accuracy(siamese_model, validation_triplet_dataloader)
# %%
print()
# %% [markdown]
# ### 1:2 Identification Accuracy

# %%
# i = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:

# ab = anchor_batch
# break

# %%
# ab

# %%
# siamese_model(anchor_batch, )

# # %%

# # %%

# # %%

# # %% [markdown]
# # ### Building A Classifier On Top Of The Siamese Network

# # %%
# class SimpleBinaryClassifier(nn.Module):
# def __init__(self):
# super(BinaryClassifier, self).__init__()
# self.fc = nn.Linear(1, 1) # Input size and output size are both 1 for a
single number input

# def forward(self, x):


# x = self.fc(x)
# return x

# # %%
# binary_classifier = SimpleBinaryClassifier(input_size=256, hidden_size=64)

# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)

# # %%
# criterion = nn.BCEWithLogitsLoss()

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # freezing the trained siamese model
# for param in siamese_model.parameters():
# param.requires_grad = False

# # %%
# binary_classifier = SiameseBinaryClassifier(siamese_model)

# # %%
# total_params = sum([param.numel() for param in binary_classifier.parameters()])

# # %%
# total_params

# # %%
# trainable_params = sum([param.numel() for param in binary_classifier.parameters()
if param.requires_grad])

# # %%
# trainable_params

# # %% [markdown]
# # ### Training The Classifier

# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)
# criterion = nn.BCEWithLogitsLoss()
# epochs = 50

# # %%

# for epoch in range(epochs):

# total_loss = 0.0
# total_correct = 0
# total_samples = 0

# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:


# optimizer.zero_grad()

# # anchor_positive_pairs = torch.cat((anchor_batch, positive_batch),


dim=0)
# # anchor_negative_pairs = torch.cat((anchor_batch, negative_batch),
dim=0)
# # positive_outputs = siamese_model(anchor_batch, positive_batch)
# # negative_outputs = siamese_model(anchor_batch, negative_batch)
# # outputs = torch.cat((positive_outputs, negative_outputs), dim=0)
# # outputs = outputs.detach()
# # positive_labels = torch.ones(anchor_batch.size(0), 1)
# # negative_labels = torch.zeros(anchor_batch.size(0), 1)
# # labels = torch.cat((positive_labels, negative_labels), dim=0)
# anchor_positive_outputs = siamese_model(anchor_batch, positive_batch)

# # Forward pass for anchor and negative pairs


# anchor_negative_outputs = siamese_model(anchor_batch, negative_batch)

# # Combine outputs
# inputs = torch.cat((anchor_positive_outputs, anchor_negative_outputs),
dim=0)

# # Create labels
# positive_labels = torch.ones(anchor_batch.size(0), 1) # Labels for
positive pairs
# negative_labels = torch.zeros(anchor_batch.size(0), 1) # Labels for
negative pairs
# labels = torch.cat((positive_labels, negative_labels), dim=0)

# # Detach anchor-positive and anchor-negative outputs


# anchor_positive_outputs_detached = anchor_positive_outputs.detach()
# anchor_negative_outputs_detached = anchor_negative_outputs.detach()

# # Combine detached outputs


# outputs = torch.cat((anchor_positive_outputs_detached,
anchor_negative_outputs_detached), dim=0)

# # Compute loss
# loss = criterion(outputs.squeeze(), labels.squeeze())
# # loss = criterion(outputs.squeeze(), labels.squeeze())
# loss.backward()

# optimizer.step()

# total_loss += loss.item()

# predicted_labels = (outputs > 0.5).float()


# total_correct += (predicted_labels == labels).sum().item()
# total_samples += labels.size(0)

# print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss /


len(train_triplet_dataloader)}, Accuracy: {total_correct / total_samples}")

# # %%

You might also like