Prac 5
Prac 5
(3).
import numpy as np
import re
(4).
data = """Deep learning (also known as deep structured learning) is part of a
broader family of machine learning methods based on artificial
neural networks with representation learning. Learning can be supervised, semi-
supervised or unsupervised. Deep-learning
architectures such as deep neural networks, deep belief networks, deep
reinforcement learning, recurrent neural networks,
convolutional neural networks and Transformers have been applied to fields
including computer vision, speech recognition, natural
language processing, machine translation, bioinformatics, drug design, medical
image analysis, climate science, material inspection
and board game programs, where they have produced results comparable to and in some
cases surpassing human expert performance"""
(5). data
(6).
sentences = data.split('.')
sentences
(7).
clean_sent=[]
for sentence in sentences:
clean_sent.append(sentence)
if sentence=="":
continue
sentence = re.sub('[^A-Za-z0-9]+', ' ', (sentence))
sentence = re.sub(r'(?:^| )\w (?:$| )', ' ', (sentence)).strip()
sentence = sentence.lower()
clean_sent
(8).
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(clean_sent)
sequences = tokenizer.texts_to_sequences(clean_sent)
print(sequences)
(9).
index_to_word = {}
word_to_index = {}
(10).
for i, sequence in enumerate(sequences):
# print(sequence)
word_in_sentence = clean_sent[i].split()
# print(word_in_sentence)
for j, value in enumerate(sequence):
# print(j, value)
index_to_word[value] = word_in_sentence[j-1]
word_to_index[word_in_sentence[j-1]] = value
print(index_to_word, "\n")
print(word_to_index)
(11).
vocab_size = len(tokenizer.word_index) + 1
emb_size = 10
context_size = 2
contexts = []
targets = []
for sequence in sequences:
for i in range(context_size, len(sequence) - context_size):
target = sequence[i]
context = [sequence[i - 2], sequence[i - 1], sequence[i + 1], sequence[i +
2]]
# print(context)
contexts.append(context)
targets.append(target)
print(contexts, "\n")
print(targets)
(12).
#printing features with target
for i in range(5):
words = []
target = index_to_word.get(targets[i])
for j in contexts[i]:
words.append(index_to_word.get(j))
print(words," -> ", target)
(13).
# Convert the contexts and targets to numpy arrays
X = np.array(contexts)
Y = np.array(targets)
(14).
# print(X)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Lambda
model = Sequential([
Embedding(input_dim=vocab_size, output_dim=emb_size,
input_length=2*context_size),
Lambda(lambda x: tf.reduce_mean(x, axis=1)),
Dense(256, activation='relu'),
Dense(512, activation='relu'),
Dense(vocab_size, activation='softmax')
])
(15).
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
history = model.fit(X, Y, epochs=80)
(16.)
import seaborn as sns
sns.lineplot(model.history.history)
(17).
from sklearn.decomposition import PCA
embeddings = model.get_weights()[0]
pca = PCA(n_components=2)
reduced_embeddings = pca.fit_transform(embeddings)
print("'Deep learning (also known as deep structured learning) is part of a broader
family of machine learning methods based on artifici'")
(18).
# test model: select some sentences from above paragraph
test_sentenses = [
"known as structured learning",
"where they produced results",
"cases surpassing expert performance"
]
for sent in test_sentenses:
test_words = sent.split(" ")
# print(test_words)
x_test =[]
for i in test_words:
x_test.append(word_to_index.get(i))
x_test = np.array([x_test])
# print(x_test)
pred = model.predict(x_test)
pred = np.argmax(pred[0])
print("pred ", test_words, "\n=", index_to_word.get(pred),"\n\n")