0% found this document useful (0 votes)
5 views

assignment-9

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

assignment-9

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

assignment-9

November 12, 2024

[2]: import torch


import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from collections import Counter
from itertools import chain

# Sample parallel sentences for demonstration (replace with actual data)


source_sentences = ["hello", "how are you", "good morning"]
target_sentences = ["hola", "cómo estás", "buenos días"]

# Vocabulary building function


def build_vocab(sentences):
counter = Counter(chain.from_iterable(s.split() for s in sentences))
vocab = {word: idx + 3 for idx, (word, _) in enumerate(counter.
↪most_common())}

vocab["<pad>"] = 0
vocab["<sos>"] = 1
vocab["<eos>"] = 2
return vocab

# Build vocabulary for source and target languages


source_vocab = build_vocab(source_sentences)
target_vocab = build_vocab(target_sentences)

# Tokenize function
def tokenize(sentence, vocab):
tokens = ["<sos>"] + sentence.split() + ["<eos>"]
return [vocab[token] if token in vocab else vocab["<pad>"] for token in␣
↪tokens]

# Prepare data for training


train_data = [(torch.tensor(tokenize(src, source_vocab)), torch.
↪tensor(tokenize(tgt, target_vocab)))

for src, tgt in zip(source_sentences, target_sentences)]

# Define Dataset and DataLoader

1
class TranslationDataset(Dataset):
def __init__(self, data):
self.data = data

def __len__(self):
return len(self.data)

def __getitem__(self, idx):


return self.data[idx]

dataset = TranslationDataset(train_data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda␣
↪x: x)

# Encoder model
class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hidden_dim, n_layers):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.lstm = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True)

def forward(self, src):


embedded = self.embedding(src)
outputs, (hidden, cell) = self.lstm(embedded)
return outputs, hidden, cell

# Attention model
class Attention(nn.Module):
def __init__(self, hidden_dim):
super(Attention, self).__init__()
self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
self.v = nn.Linear(hidden_dim, 1, bias=False)

def forward(self, hidden, encoder_outputs):


src_len = encoder_outputs.shape[1]
hidden = hidden[-1].unsqueeze(1).repeat(1, src_len, 1)
energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs),␣
↪dim=2)))

attention = self.v(energy).squeeze(2)
return torch.softmax(attention, dim=1)

# Decoder model with attention


class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, attention):
super(Decoder, self).__init__()
self.output_dim = output_dim
self.embedding = nn.Embedding(output_dim, emb_dim)

2
self.lstm = nn.LSTM(emb_dim + hidden_dim, hidden_dim, n_layers,␣
↪batch_first=True)
self.fc_out = nn.Linear(hidden_dim * 2, output_dim)
self.attention = attention

def forward(self, tgt, hidden, cell, encoder_outputs):


tgt = tgt.unsqueeze(1)
embedded = self.embedding(tgt)
attn_weights = self.attention(hidden, encoder_outputs)
context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs)
lstm_input = torch.cat((embedded, context), dim=2)
output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
prediction = self.fc_out(torch.cat((output, context), dim=2).squeeze(1))
return prediction, hidden, cell

# Seq2Seq model combining encoder and decoder


class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device

def forward(self, src, tgt):


encoder_outputs, hidden, cell = self.encoder(src)
outputs = torch.zeros(tgt.shape[0], tgt.shape[1], self.decoder.
↪output_dim).to(self.device)

input = tgt[:, 0]
for t in range(1, tgt.shape[1]):
output, hidden, cell = self.decoder(input, hidden, cell,␣
↪encoder_outputs)

outputs[:, t] = output
input = output.argmax(1)
return outputs

# Hyperparameters and model initialization


INPUT_DIM = len(source_vocab)
OUTPUT_DIM = len(target_vocab)
EMB_DIM = 256
HIDDEN_DIM = 512
N_LAYERS = 2

encoder = Encoder(INPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS)


attention = Attention(HIDDEN_DIM)
decoder = Decoder(OUTPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS, attention)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Seq2Seq(encoder, decoder, device).to(device)

3
# Training setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=target_vocab["<pad>"])

# Training loop
def train(model, dataloader, optimizer, criterion):
model.train()
epoch_loss = 0
for batch in dataloader:
src, tgt = zip(*batch)
src, tgt = torch.nn.utils.rnn.pad_sequence(src,␣
↪padding_value=source_vocab["<pad>"], batch_first=True), \

torch.nn.utils.rnn.pad_sequence(tgt,␣
↪padding_value=target_vocab["<pad>"], batch_first=True)

src, tgt = src.to(device), tgt.to(device)

optimizer.zero_grad()
output = model(src, tgt)

output_dim = output.shape[-1]
output = output[:, 1:].reshape(-1, output_dim)
tgt = tgt[:, 1:].reshape(-1)

loss = criterion(output, tgt)


loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(dataloader)

# Training epochs
for epoch in range(10):
loss = train(model, dataloader, optimizer, criterion)
print(f'Epoch {epoch+1}, Loss: {loss:.4f}')

Epoch 1, Loss: 2.0400


Epoch 2, Loss: 1.8393
Epoch 3, Loss: 1.5252
Epoch 4, Loss: 1.2470
Epoch 5, Loss: 0.8623
Epoch 6, Loss: 0.5609
Epoch 7, Loss: 0.4295
Epoch 8, Loss: 0.1870
Epoch 9, Loss: 0.2638
Epoch 10, Loss: 0.1014

You might also like