0% found this document useful (0 votes)
16 views

EncoderDecoderSeq2Seq DeepLSTM

aergeragafdv

Uploaded by

anuragpanda222
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

EncoderDecoderSeq2Seq DeepLSTM

aergeragafdv

Uploaded by

anuragpanda222
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

Seq2Seq Encoder Decoder Deep LSTM

PyTorch
Defining Models and Custom Datasets:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.lstm1 = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True,
bidirectional=True)
self.lstm2 = nn.LSTM(hidden_dim * 2, hidden_dim, n_layers, dropout=dropout, batch_first=True)

def forward(self, src):


embedded = self.embedding(src)
outputs, (hidden, cell) = self.lstm1(embedded)
outputs, (hidden, cell) = self.lstm2(outputs)
return outputs, (hidden, cell)

class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Decoder, self).__init__()
self.embedding = nn.Embedding(output_dim, emb_dim)
self.lstm = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)

def forward(self, input, hidden, cell):


embedded = self.embedding(input)
output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
prediction = self.fc(output)
return prediction, (hidden, cell)

class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device

def forward(self, src, trg, teacher_forcing_ratio=0.5):


batch_size = trg.size(0)
trg_len = trg.size(1)
vocab_size = self.decoder.fc.out_features
outputs = torch.zeros(batch_size, trg_len, vocab_size).to(self.device)

encoder_outputs, (hidden, cell) = self.encoder(src)

input = trg[:, 0]

for t in range(1, trg_len):


output, (hidden, cell) = self.decoder(input.unsqueeze(1), hidden, cell)
outputs[:, t, :] = output.squeeze(1)

teacher_force = torch.rand(1).item() < teacher_forcing_ratio


input = trg[:, t] if teacher_force else output.argmax(2).squeeze(1)

return outputs

class Seq2SeqDataset(Dataset):
def __init__(self, input_length, output_length, vocab_size, size):
self.input_length = input_length
self.output_length = output_length
self.vocab_size = vocab_size
self.size = size

def __len__(self):
return self.size

def __getitem__(self, idx):


src = torch.randint(1, self.vocab_size, (self.input_length,))
trg = torch.randint(1, self.vocab_size, (self.output_length,))
return src, trg

Define hyperparameters, model parameters, initializing the models:


# Hyperparameters
input_length = 10
output_length = 12
vocab_size = 20
size = 1000

dataset = Seq2SeqDataset(input_length, output_length, vocab_size, size)


dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Model parameters
input_dim = vocab_size
emb_dim = 16
hidden_dim = 32
n_layers = 2
dropout = 0.1
output_dim = vocab_size

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


encoder = Encoder(input_dim, emb_dim, hidden_dim, n_layers, dropout).to(device)
decoder = Decoder(output_dim, emb_dim, hidden_dim, n_layers, dropout).to(device)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

Training and plotting:


# Initialize lists to store loss values
train_losses = []

# Training loop
n_epochs =20 # Adjust for more epochs if needed
for epoch in range(n_epochs):
model.train()
epoch_loss = 0
for src, trg in dataloader:
src, trg = src.to(device), trg.to(device)
optimizer.zero_grad()

output = model(src, trg)


trg = trg[:, 1:] # Exclude <sos> token
output_dim = output.shape[-1]
output = output[:, 1:].reshape(-1, output_dim)
trg = trg.reshape(-1)

loss = criterion(output, trg)


loss.backward()
optimizer.step()

epoch_loss += loss.item()

avg_loss = epoch_loss / len(dataloader)


train_losses.append(avg_loss)
print(f'Epoch {epoch + 1}, Loss: {avg_loss}')

# Plot the training loss


plt.plot(range(1, n_epochs + 1), train_losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.show()

Evaluates the model on a batch of data from the dataloader to get a sample output, then plots the source, target,
and output sequences using a dummy vocabulary for visualization.
def get_sample_output(model, dataloader):
model.eval()
src, trg = next(iter(dataloader))
src, trg = src.to(device), trg.to(device)
with torch.no_grad():
output = model(src, trg, teacher_forcing_ratio=0)
return src, trg, output

# Get sample output


src, trg, output = get_sample_output(model, dataloader)

# Function to convert indices to words (dummy implementation here)


def indices_to_words(indices, vocab):
return [vocab[idx] for idx in indices]

# Dummy vocab
vocab = {i: str(i) for i in range(vocab_size)}

# Plot sample sequences


def plot_samples(src, trg, output, vocab):
src = src[0].cpu().numpy()
trg = trg[0].cpu().numpy()
output = output[0].cpu().numpy()
output = output.argmax(axis=-1)

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title('Source')
plt.bar(range(len(src)), src, tick_label=indices_to_words(src, vocab))

plt.subplot(1, 3, 2)
plt.title('Target')
plt.bar(range(len(trg)), trg, tick_label=indices_to_words(trg, vocab))

plt.subplot(1, 3, 3)
plt.title('Output')
plt.bar(range(len(output)), output, tick_label=indices_to_words(output, vocab))

plt.show()

plot_samples(src, trg, output, vocab)

TensorFlow
Defining Models and Custom Datasets:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# Custom Dataset Class


class Seq2SeqDataset(tf.data.Dataset):
def _generator(num_samples, input_seq_length, output_seq_length, input_dim, output_dim):
for _ in range(num_samples):
encoder_input = tf.random.uniform((input_seq_length, input_dim))
decoder_input = tf.random.uniform((output_seq_length, output_dim))
decoder_output = tf.random.uniform((output_seq_length, output_dim))
yield (encoder_input, decoder_input), decoder_output

def __new__(cls, num_samples, input_seq_length, output_seq_length, input_dim, output_dim):


return tf.data.Dataset.from_generator(
cls._generator,
output_signature=(
(tf.TensorSpec(shape=(input_seq_length, input_dim), dtype=tf.float32),
tf.TensorSpec(shape=(output_seq_length, output_dim), dtype=tf.float32)),
tf.TensorSpec(shape=(output_seq_length, output_dim), dtype=tf.float32)
),
args=(num_samples, input_seq_length, output_seq_length, input_dim, output_dim)
)

# Encoder Model
class Encoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim):
super(Encoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, input_dim))

def call(self, inputs):


x = self.input_layer(inputs)
states = []
for lstm in self.lstm_layers:
x, state_h, state_c = lstm(x)
states.append([state_h, state_c])
return states

# Decoder Model
class Decoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, output_dim):
super(Decoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.output_layer = tf.keras.layers.Dense(output_dim)
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, output_dim))

def call(self, inputs, initial_states):


x = self.input_layer(inputs)
states = initial_states
for i, lstm in enumerate(self.lstm_layers):
x, state_h, state_c = lstm(x, initial_state=states[i])
states[i] = [state_h, state_c]
outputs = self.output_layer(x)
return outputs, states

# Seq2Seq Model
class Seq2Seq(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim, output_dim):
super(Seq2Seq, self).__init__()
self.encoder = Encoder(num_layers, lstm_units, dropout_rate, input_dim)
self.decoder = Decoder(num_layers, lstm_units, dropout_rate, output_dim)

def call(self, encoder_input, decoder_input):


encoder_states = self.encoder(encoder_input)
decoder_output, _ = self.decoder(decoder_input, encoder_states)
return decoder_output

Define hyperparameters, model parameters, initializing the models:


# Parameters for the dataset
num_samples = 1000
input_seq_length = 20
output_seq_length = 20
input_dim = 10
output_dim = 10

dataset = Seq2SeqDataset(num_samples, input_seq_length, output_seq_length, input_dim, output_dim)


dataset = dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

# Define the model


num_layers = 3
lstm_units = 256
dropout_rate = 0.5

seq2seq = Seq2Seq(num_layers, lstm_units, dropout_rate, input_dim, output_dim)


optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MeanSquaredError()

Training and plotting:


# Custom training step to calculate accuracy
@tf.function
def train_step(seq2seq_model, optimizer, loss_fn, inputs, targets):
with tf.GradientTape() as tape:
predictions = seq2seq_model(inputs[0], inputs[1])
loss = loss_fn(targets, predictions)

gradients = tape.gradient(loss, seq2seq_model.trainable_variables)


optimizer.apply_gradients(zip(gradients, seq2seq_model.trainable_variables))

return loss, predictions

# Training loop
epochs = 10
train_loss_results = []
train_accuracy_results = []

for epoch in range(epochs):


epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.Mean()

for (batch, (inputs, targets)) in enumerate(dataset):


loss, predictions = train_step(seq2seq, optimizer, loss_fn, inputs, targets)
epoch_loss_avg.update_state(loss)

# Calculate accuracy as mean absolute error


batch_accuracy = tf.reduce_mean(tf.abs(targets - predictions))
epoch_accuracy.update_state(batch_accuracy)

train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())

print(f"Epoch {epoch+1}: Loss: {epoch_loss_avg.result()}, Accuracy: {epoch_accuracy.result()}")


# Visualize the results
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')

axes[0].set_ylabel("Loss")
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].plot(train_accuracy_results)
plt.show()

You might also like