0% found this document useful (0 votes)
12 views

EncoderDecoderSeq2Seq DeepLSTM

aergeragafdv

Uploaded by

anuragpanda222
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

EncoderDecoderSeq2Seq DeepLSTM

aergeragafdv

Uploaded by

anuragpanda222
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

Seq2Seq Encoder Decoder Deep LSTM

PyTorch
Defining Models and Custom Datasets:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.lstm1 = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True,
bidirectional=True)
self.lstm2 = nn.LSTM(hidden_dim * 2, hidden_dim, n_layers, dropout=dropout, batch_first=True)

def forward(self, src):


embedded = self.embedding(src)
outputs, (hidden, cell) = self.lstm1(embedded)
outputs, (hidden, cell) = self.lstm2(outputs)
return outputs, (hidden, cell)

class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Decoder, self).__init__()
self.embedding = nn.Embedding(output_dim, emb_dim)
self.lstm = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)

def forward(self, input, hidden, cell):


embedded = self.embedding(input)
output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
prediction = self.fc(output)
return prediction, (hidden, cell)

class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device

def forward(self, src, trg, teacher_forcing_ratio=0.5):


batch_size = trg.size(0)
trg_len = trg.size(1)
vocab_size = self.decoder.fc.out_features
outputs = torch.zeros(batch_size, trg_len, vocab_size).to(self.device)

encoder_outputs, (hidden, cell) = self.encoder(src)

input = trg[:, 0]

for t in range(1, trg_len):


output, (hidden, cell) = self.decoder(input.unsqueeze(1), hidden, cell)
outputs[:, t, :] = output.squeeze(1)

teacher_force = torch.rand(1).item() < teacher_forcing_ratio


input = trg[:, t] if teacher_force else output.argmax(2).squeeze(1)

return outputs

class Seq2SeqDataset(Dataset):
def __init__(self, input_length, output_length, vocab_size, size):
self.input_length = input_length
self.output_length = output_length
self.vocab_size = vocab_size
self.size = size

def __len__(self):
return self.size

def __getitem__(self, idx):


src = torch.randint(1, self.vocab_size, (self.input_length,))
trg = torch.randint(1, self.vocab_size, (self.output_length,))
return src, trg

Define hyperparameters, model parameters, initializing the models:


# Hyperparameters
input_length = 10
output_length = 12
vocab_size = 20
size = 1000

dataset = Seq2SeqDataset(input_length, output_length, vocab_size, size)


dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Model parameters
input_dim = vocab_size
emb_dim = 16
hidden_dim = 32
n_layers = 2
dropout = 0.1
output_dim = vocab_size

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


encoder = Encoder(input_dim, emb_dim, hidden_dim, n_layers, dropout).to(device)
decoder = Decoder(output_dim, emb_dim, hidden_dim, n_layers, dropout).to(device)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

Training and plotting:


# Initialize lists to store loss values
train_losses = []

# Training loop
n_epochs =20 # Adjust for more epochs if needed
for epoch in range(n_epochs):
model.train()
epoch_loss = 0
for src, trg in dataloader:
src, trg = src.to(device), trg.to(device)
optimizer.zero_grad()

output = model(src, trg)


trg = trg[:, 1:] # Exclude <sos> token
output_dim = output.shape[-1]
output = output[:, 1:].reshape(-1, output_dim)
trg = trg.reshape(-1)

loss = criterion(output, trg)


loss.backward()
optimizer.step()

epoch_loss += loss.item()

avg_loss = epoch_loss / len(dataloader)


train_losses.append(avg_loss)
print(f'Epoch {epoch + 1}, Loss: {avg_loss}')

# Plot the training loss


plt.plot(range(1, n_epochs + 1), train_losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.show()

Evaluates the model on a batch of data from the dataloader to get a sample output, then plots the source, target,
and output sequences using a dummy vocabulary for visualization.
def get_sample_output(model, dataloader):
model.eval()
src, trg = next(iter(dataloader))
src, trg = src.to(device), trg.to(device)
with torch.no_grad():
output = model(src, trg, teacher_forcing_ratio=0)
return src, trg, output

# Get sample output


src, trg, output = get_sample_output(model, dataloader)

# Function to convert indices to words (dummy implementation here)


def indices_to_words(indices, vocab):
return [vocab[idx] for idx in indices]

# Dummy vocab
vocab = {i: str(i) for i in range(vocab_size)}

# Plot sample sequences


def plot_samples(src, trg, output, vocab):
src = src[0].cpu().numpy()
trg = trg[0].cpu().numpy()
output = output[0].cpu().numpy()
output = output.argmax(axis=-1)

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title('Source')
plt.bar(range(len(src)), src, tick_label=indices_to_words(src, vocab))

plt.subplot(1, 3, 2)
plt.title('Target')
plt.bar(range(len(trg)), trg, tick_label=indices_to_words(trg, vocab))

plt.subplot(1, 3, 3)
plt.title('Output')
plt.bar(range(len(output)), output, tick_label=indices_to_words(output, vocab))

plt.show()

plot_samples(src, trg, output, vocab)

TensorFlow
Defining Models and Custom Datasets:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# Custom Dataset Class


class Seq2SeqDataset(tf.data.Dataset):
def _generator(num_samples, input_seq_length, output_seq_length, input_dim, output_dim):
for _ in range(num_samples):
encoder_input = tf.random.uniform((input_seq_length, input_dim))
decoder_input = tf.random.uniform((output_seq_length, output_dim))
decoder_output = tf.random.uniform((output_seq_length, output_dim))
yield (encoder_input, decoder_input), decoder_output

def __new__(cls, num_samples, input_seq_length, output_seq_length, input_dim, output_dim):


return tf.data.Dataset.from_generator(
cls._generator,
output_signature=(
(tf.TensorSpec(shape=(input_seq_length, input_dim), dtype=tf.float32),
tf.TensorSpec(shape=(output_seq_length, output_dim), dtype=tf.float32)),
tf.TensorSpec(shape=(output_seq_length, output_dim), dtype=tf.float32)
),
args=(num_samples, input_seq_length, output_seq_length, input_dim, output_dim)
)

# Encoder Model
class Encoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim):
super(Encoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, input_dim))

def call(self, inputs):


x = self.input_layer(inputs)
states = []
for lstm in self.lstm_layers:
x, state_h, state_c = lstm(x)
states.append([state_h, state_c])
return states

# Decoder Model
class Decoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, output_dim):
super(Decoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.output_layer = tf.keras.layers.Dense(output_dim)
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, output_dim))

def call(self, inputs, initial_states):


x = self.input_layer(inputs)
states = initial_states
for i, lstm in enumerate(self.lstm_layers):
x, state_h, state_c = lstm(x, initial_state=states[i])
states[i] = [state_h, state_c]
outputs = self.output_layer(x)
return outputs, states

# Seq2Seq Model
class Seq2Seq(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim, output_dim):
super(Seq2Seq, self).__init__()
self.encoder = Encoder(num_layers, lstm_units, dropout_rate, input_dim)
self.decoder = Decoder(num_layers, lstm_units, dropout_rate, output_dim)

def call(self, encoder_input, decoder_input):


encoder_states = self.encoder(encoder_input)
decoder_output, _ = self.decoder(decoder_input, encoder_states)
return decoder_output

Define hyperparameters, model parameters, initializing the models:


# Parameters for the dataset
num_samples = 1000
input_seq_length = 20
output_seq_length = 20
input_dim = 10
output_dim = 10

dataset = Seq2SeqDataset(num_samples, input_seq_length, output_seq_length, input_dim, output_dim)


dataset = dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

# Define the model


num_layers = 3
lstm_units = 256
dropout_rate = 0.5

seq2seq = Seq2Seq(num_layers, lstm_units, dropout_rate, input_dim, output_dim)


optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MeanSquaredError()

Training and plotting:


# Custom training step to calculate accuracy
@tf.function
def train_step(seq2seq_model, optimizer, loss_fn, inputs, targets):
with tf.GradientTape() as tape:
predictions = seq2seq_model(inputs[0], inputs[1])
loss = loss_fn(targets, predictions)

gradients = tape.gradient(loss, seq2seq_model.trainable_variables)


optimizer.apply_gradients(zip(gradients, seq2seq_model.trainable_variables))

return loss, predictions

# Training loop
epochs = 10
train_loss_results = []
train_accuracy_results = []

for epoch in range(epochs):


epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.Mean()

for (batch, (inputs, targets)) in enumerate(dataset):


loss, predictions = train_step(seq2seq, optimizer, loss_fn, inputs, targets)
epoch_loss_avg.update_state(loss)

# Calculate accuracy as mean absolute error


batch_accuracy = tf.reduce_mean(tf.abs(targets - predictions))
epoch_accuracy.update_state(batch_accuracy)

train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())

print(f"Epoch {epoch+1}: Loss: {epoch_loss_avg.result()}, Accuracy: {epoch_accuracy.result()}")


# Visualize the results
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')

axes[0].set_ylabel("Loss")
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].plot(train_accuracy_results)
plt.show()

You might also like