0% found this document useful (0 votes)
8 views6 pages

DL 7

The document provides a Python program for machine translation using an Encoder-Decoder model implemented with TensorFlow and Keras. It includes hyperparameters, a sample vocabulary, dummy data for training, model architecture, training process, and functions for encoding and decoding sentences. The program demonstrates the translation of the input sentence 'I am learning' into French as 'J'apprends'.

Uploaded by

MATHAN KUMAR M
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views6 pages

DL 7

The document provides a Python program for machine translation using an Encoder-Decoder model implemented with TensorFlow and Keras. It includes hyperparameters, a sample vocabulary, dummy data for training, model architecture, training process, and functions for encoding and decoding sentences. The program demonstrates the translation of the input sentence 'I am learning' into French as 'J'apprends'.

Uploaded by

MATHAN KUMAR M
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

EX 7: Machine Translation using Encoder-Decoder model

Program:

import tensorflow as tf

from tensorflow.keras.models import Model

from tensorflow.keras.layers import Input, LSTM, Dense

import numpy as np

# ------------------- HYPERPARAMETERS -------------------

latent_dim = 256 # Size of the LSTM hidden state

num_encoder_tokens = 5 # Small vocab for demo purposes

num_decoder_tokens = 5

max_encoder_seq_length = 5 # Small sequence length

max_decoder_seq_length = 5

# ------------------- SAMPLE VOCABULARY -------------------

input_token_index = {'I': 0, 'am': 1, 'learning': 2, '<PAD>': 3}

target_token_index = {'<START>': 0, 'J\'apprends': 1, '<END>': 2, '<PAD>': 3}

reverse_target_token_index = {0: '<START>', 1: "J'apprends", 2: '<END>', 3:


'<PAD>'}

# ------------------- DUMMY DATA (Simple Training Example) -------------------

encoder_input_data = np.zeros((1, max_encoder_seq_length,


num_encoder_tokens))

decoder_input_data = np.zeros((1, max_decoder_seq_length,


num_decoder_tokens))
decoder_target_data = np.zeros((1, max_decoder_seq_length,
num_decoder_tokens))

# Example input: "I am learning"

encoder_input_data[0, 0, 0] = 1 # "I"

encoder_input_data[0, 1, 1] = 1 # "am"

encoder_input_data[0, 2, 2] = 1 # "learning"

# Example output: "<START> J'apprends <END>"

decoder_input_data[0, 0, 0] = 1 # "<START>"

decoder_target_data[0, 0, 1] = 1 # "J'apprends"

decoder_target_data[0, 1, 2] = 1 # "<END>"

# ------------------- MODEL ARCHITECTURE -------------------

# Encoder Model

encoder_inputs = Input(shape=(None, num_encoder_tokens))

encoder_lstm = LSTM(latent_dim, return_state=True)

_, state_h, state_c = encoder_lstm(encoder_inputs)

encoder_states = [state_h, state_c]

# Decoder Model

decoder_inputs = Input(shape=(None, num_decoder_tokens))

decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
initial_state=encoder_states)

decoder_dense = Dense(num_decoder_tokens, activation='softmax')

decoder_outputs = decoder_dense(decoder_outputs)
# Full Model

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# ------------------- TRAINING -------------------

model.fit([encoder_input_data, decoder_input_data], decoder_target_data,

batch_size=1, epochs=10)

# ------------------- INFERENCE MODE -------------------

# Encoder model for inference

encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model for inference

decoder_state_input_h = Input(shape=(latent_dim,))

decoder_state_input_c = Input(shape=(latent_dim,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(

decoder_inputs, initial_state=decoder_states_inputs)

decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_inputs] + decoder_states_inputs,

[decoder_outputs] + decoder_states)

# ------------------- ENCODING FUNCTION -------------------

def encode_input_sentence(sentence):

"""Encodes an input sentence into a padded sequence of tokens."""


tokens = [input_token_index.get(word, 3) for word in sentence.split()] #
Default to <PAD>

encoder_input = np.zeros((1, max_encoder_seq_length,


num_encoder_tokens))

for t, token in enumerate(tokens):

encoder_input[0, t, token] = 1.0

return encoder_input

# ------------------- DECODING FUNCTION -------------------

def decode_sequence(input_seq):

"""Generates the output sentence word-by-word using the decoder."""

states_value = encoder_model.predict(input_seq)

# Start with the <START> token

target_seq = np.zeros((1, 1, num_decoder_tokens))

target_seq[0, 0, target_token_index['<START>']] = 1.0

decoded_sentence = ''

stop_condition = False

while not stop_condition:

output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

sampled_token_index = np.argmax(output_tokens[0, -1, :])

sampled_word = reverse_target_token_index.get(sampled_token_index,
'<UNK>')

if sampled_word == '<END>' or len(decoded_sentence.split()) >


max_decoder_seq_length:

stop_condition = True

else:
decoded_sentence += ' ' + sampled_word

# Update the target sequence and states

target_seq = np.zeros((1, 1, num_decoder_tokens))

target_seq[0, 0, sampled_token_index] = 1.0

states_value = [h, c]

return decoded_sentence.strip()

# ------------------- TEST TRANSLATION -------------------

input_sentence = "I am learning"

input_seq = encode_input_sentence(input_sentence)

translated_sentence = decode_sequence(input_seq)

print(f"Input: {input_sentence}")

print(f"Translated: {translated_sentence}")

Output:

You might also like