0% found this document useful (0 votes)
13 views3 pages

RNN Text Generation

Rnn

Uploaded by

kishoranbu14
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views3 pages

RNN Text Generation

Rnn

Uploaded by

kishoranbu14
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

RNN Language Model for Text Generation

Python Code:

# Step 1: Preprocessing Text


def preprocess_text(text):
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove punctuation
text = text.lower() # Convert to lowercase
words = text.split() # Tokenize
return words

# Sample text (use a larger corpus for better results)


corpus = """
Joseph Vijay Chandrasekhar (born 22 June 1974), known professionally as Vijay, is an
Indian actor and playback singer who works in Tamil cinema. ...
"""

# Preprocess and create vocabulary


words = preprocess_text(corpus)
vocab = sorted(set(words))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

# Encode the text


encoded = [word_to_idx[word] for word in words]

# Step 2: Dataset Preparation


sequence_length = 5 # Number of words in input sequence

class TextDataset(Dataset):
def __init__(self, encoded_text, sequence_length):
self.data = []
for i in range(len(encoded_text) - sequence_length):
input_seq = encoded_text[i:i + sequence_length]
target = encoded_text[i + sequence_length]
self.data.append((input_seq, target))

def __len__(self):
return len(self.data)

def __getitem__(self, idx):


return torch.tensor(self.data[idx][0]), torch.tensor(self.data[idx][1])
dataset = TextDataset(encoded, sequence_length)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Step 3: Define the RNN Model


class RNNLanguageModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim):
super(RNNLanguageModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, vocab_size)

def forward(self, x, hidden):


embedded = self.embedding(x) # (batch, seq_len, embedding_dim)
out, hidden = self.rnn(embedded, hidden) # (batch, seq_len, hidden_dim)
out = self.fc(out[:, -1, :]) # Output for the last time step
return out, hidden

def init_hidden(self, batch_size):


return torch.zeros(1, batch_size, hidden_dim)

# Hyperparameters
embedding_dim = 64
hidden_dim = 128
learning_rate = 0.01
epochs = 20

# Initialize model, criterion, and optimizer


model = RNNLanguageModel(vocab_size, embedding_dim, hidden_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the Model


for epoch in range(epochs):
total_loss = 0
for inputs, targets in dataloader:
batch_size = inputs.size(0)
hidden = model.init_hidden(batch_size)

# Forward pass
outputs, hidden = model(inputs, hidden)
loss = criterion(outputs, targets)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_loss += loss.item()

print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")

# Step 5: Text Generation


def generate_text(model, start_sequence, num_words):
model.eval()
input_seq = [word_to_idx[word] for word in preprocess_text(start_sequence)]
generated = start_sequence

hidden = model.init_hidden(1)
input_tensor = torch.tensor(input_seq).unsqueeze(0) # Add batch dimension

for _ in range(num_words):
with torch.no_grad():
output, hidden = model(input_tensor, hidden)
next_word_idx = torch.argmax(output, dim=1).item()
next_word = idx_to_word[next_word_idx]

generated += " " + next_word


input_tensor = torch.tensor([[next_word_idx]]) # Update input

return generated

# Generate text
start_sequence = "Vijay"
num_words = 20
generated_text = generate_text(model, start_sequence, num_words)
print("\nGenerated Text:")
print(generated_text)

You might also like