0% found this document useful (0 votes)
8 views3 pages

Siamese Network Assignment

This document discusses training a siamese neural network model for audio-visual matching using triplet loss. It loads audio and image embeddings, creates training/test/validation datasets, trains the model over 500 epochs with early stopping, and calculates the identification accuracy on the validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views3 pages

Siamese Network Assignment

This document discusses training a siamese neural network model for audio-visual matching using triplet loss. It loads audio and image embeddings, creates training/test/validation datasets, trains the model over 500 epochs with early stopping, and calculates the identification accuracy on the validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

# %%

! pip install scikit-learn

# %%
! pip install unidecode matplotlib

# %%
from classes import *
from functions import *
import pickle
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

# %%
from unidecode import unidecode

# %%
audio_files = os.listdir('datasets/audio')

with open('datasets/audio_embeddings.pickle', 'rb') as file:


audio_embeddings = pickle.load(file)

audio_names = [key.split('/')[0] for key in audio_embeddings.keys()]

audio_df = pd.DataFrame([{'name': key.split('/')[0], 'audio_embedding': value} for


key, value in audio_embeddings.items()])

audio_df['name'] = audio_df['name'].apply(unidecode)

audio_df['audio_embedding'] = audio_df['audio_embedding'].apply(lambda x:
x/np.linalg.norm(x))

# %%
with open('datasets/image_embeddings.pickle', 'rb') as file:
image_embeddings = pickle.load(file)

image_df = pd.DataFrame([{'name': key.split('/')[0], 'image_embedding': value} for


key, value in image_embeddings.items()])

image_df['name'] = image_df['name'].apply(unidecode)

# %%
matches_df = pd.merge(image_df, audio_df, on='name', how='outer')

matches_df.columns = ['name', 'anchor', 'positive']

# %%
coincidences = 0
negatives = []
for i, row in matches_df.iterrows():
while True:
sample = matches_df.sample(n=1)
sample.reset_index(inplace=True, drop=True)
if sample['name'][0] != row['name']:
negatives.append(sample['positive'][0])
break
else:
coincidences += 1

matches_df['negative'] = negatives

# %%
train_set, test_set , _, _2 = train_test_split(matches_df, matches_df['name'],
test_size=0.2)
test_set, validation_set, _, _2 = train_test_split(test_set, test_set['name'],
test_size=0.5)

# %%

# %% [markdown]
# ### Siamese Network Model With Triplet Loss Training

# %%
siamese_model = SiameseNetwork([512, 192], [[256, 512, 256], [256, 512, 256]], 256)

# %%
training_triplet_dataset = TripletDataset(train_set)
testing_triplet_dataset = TripletDataset(test_set)
validation_triplet_dataset = TripletDataset(validation_set)

# %%
train_triplet_dataloader = DataLoader(training_triplet_dataset, batch_size=32,
shuffle=True)
test_triplet_dataloader = DataLoader(testing_triplet_dataset, batch_size=32,
shuffle=True)
validation_triplet_dataloader = DataLoader(validation_triplet_dataset,
batch_size=32, shuffle=True)

# %% [markdown]
# ##### Training with Early Stopping

# %%
# optimizer = optim.SGD(siamese_model.parameters(), lr=0.1)
optimizer = optim.Adam(siamese_model.parameters(), lr=0.0005)
epochs = 500

# %%
training_losses = []
testing_losses = []
early_stopping_indicators = 0
for epoch in range(epochs):

total_loss = 0.0
total_testing_loss = 0.0
training_batches = 0
testing_batches = 0
for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
training_batches += 1
optimizer.zero_grad()
loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
loss.backward()
optimizer.step()
total_loss += loss.item()

# validation_losses.append(validation_loss)

for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


testing_batches += 1
testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
total_testing_loss += testing_loss.item()

total_training_loss_per_batch = total_loss/training_batches
training_losses.append(total_training_loss_per_batch)
total_testing_loss_per_batch = total_testing_loss/testing_batches
testing_losses.append(total_testing_loss_per_batch)
print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
if (epoch > 8) and (np.mean(testing_losses[-6:-3] < np.mean(testing_losses[-
3:]))):
print('Early stopping')
break

# %%
epoch_list = [i+1 for i in range(epoch + 1)]

# %%
plt.plot(epoch_list, training_losses, label='Training Loss')
plt.plot(epoch_list, testing_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# %%
from classes import *

# %% [markdown]
# ### 1:2 Identification Accuracy

# %%
ia = identification_accuracy(siamese_model, validation_triplet_dataloader)

# %%
print(f"1:2 Identification Accuracy for validation set is {ia*100}%")

You might also like