EncoderDecoderSeq2Seq DeepLSTM
EncoderDecoderSeq2Seq DeepLSTM
PyTorch
Defining Models and Custom Datasets:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.lstm1 = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True,
bidirectional=True)
self.lstm2 = nn.LSTM(hidden_dim * 2, hidden_dim, n_layers, dropout=dropout, batch_first=True)
class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, dropout):
super(Decoder, self).__init__()
self.embedding = nn.Embedding(output_dim, emb_dim)
self.lstm = nn.LSTM(emb_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
input = trg[:, 0]
return outputs
class Seq2SeqDataset(Dataset):
def __init__(self, input_length, output_length, vocab_size, size):
self.input_length = input_length
self.output_length = output_length
self.vocab_size = vocab_size
self.size = size
def __len__(self):
return self.size
# Model parameters
input_dim = vocab_size
emb_dim = 16
hidden_dim = 32
n_layers = 2
dropout = 0.1
output_dim = vocab_size
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()
# Training loop
n_epochs =20 # Adjust for more epochs if needed
for epoch in range(n_epochs):
model.train()
epoch_loss = 0
for src, trg in dataloader:
src, trg = src.to(device), trg.to(device)
optimizer.zero_grad()
epoch_loss += loss.item()
Evaluates the model on a batch of data from the dataloader to get a sample output, then plots the source, target,
and output sequences using a dummy vocabulary for visualization.
def get_sample_output(model, dataloader):
model.eval()
src, trg = next(iter(dataloader))
src, trg = src.to(device), trg.to(device)
with torch.no_grad():
output = model(src, trg, teacher_forcing_ratio=0)
return src, trg, output
# Dummy vocab
vocab = {i: str(i) for i in range(vocab_size)}
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title('Source')
plt.bar(range(len(src)), src, tick_label=indices_to_words(src, vocab))
plt.subplot(1, 3, 2)
plt.title('Target')
plt.bar(range(len(trg)), trg, tick_label=indices_to_words(trg, vocab))
plt.subplot(1, 3, 3)
plt.title('Output')
plt.bar(range(len(output)), output, tick_label=indices_to_words(output, vocab))
plt.show()
TensorFlow
Defining Models and Custom Datasets:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
# Encoder Model
class Encoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim):
super(Encoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, input_dim))
# Decoder Model
class Decoder(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, output_dim):
super(Decoder, self).__init__()
self.lstm_layers = [tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True,
dropout=dropout_rate) for _ in range(num_layers)]
self.output_layer = tf.keras.layers.Dense(output_dim)
self.input_layer = tf.keras.layers.InputLayer(input_shape=(None, output_dim))
# Seq2Seq Model
class Seq2Seq(tf.keras.Model):
def __init__(self, num_layers, lstm_units, dropout_rate, input_dim, output_dim):
super(Seq2Seq, self).__init__()
self.encoder = Encoder(num_layers, lstm_units, dropout_rate, input_dim)
self.decoder = Decoder(num_layers, lstm_units, dropout_rate, output_dim)
# Training loop
epochs = 10
train_loss_results = []
train_accuracy_results = []
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
axes[0].set_ylabel("Loss")
axes[0].plot(train_loss_results)
axes[1].set_ylabel("Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].plot(train_accuracy_results)
plt.show()