Course 3 - Week 2 - Exercise - Answer - Ipynb - Colaboratory
Course 3 - Week 2 - Exercise - Answer - Ipynb - Colaboratory
Open in Colab
import csv
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
!wget --no-check-certificate \
https://storage.googleapis.com/laurencemoroney-blog.appspot.com/bbc-text.csv \
-O /tmp/bbc-text.csv
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_portion = .8
sentences = []
labels = []
stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and",
print(len(stopwords))
# Expected Output
# 153
153
print(len(labels))
print(len(sentences))
print(sentences[0])
# Expected Output
# 2225
# 2225
# tv future hands viewers home theatre systems plasma high-definition tvs digital video r
2225
2225
tv future hands viewers home theatre systems plasma high-definition tvs digital vid
train_sentences = sentences[:train_size]
train_labels = labels[:train_size]
validation_sentences = sentences[train_size:]
validation_labels = labels[train_size:]
print(train_size)
print(len(train_sentences))
print(len(train_labels))
print(len(validation_sentences))
print(len(validation_labels))
train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, padding=padding_type, maxlen=max_length)
print(len(train_sequences[0]))
print(len(train_padded[0]))
print(len(train_sequences[1]))
print(len(train_padded[1]))
print(len(train_sequences[10]))
print(len(train_padded[10]))
# Expected Ouput
# 449
# 120
# 200
# 120
# 192
# 120
449
120
200
120
192
120
validation_sequences = tokenizer.texts_to_sequences(validation_sentences)
validation_padded = pad_sequences(validation_sequences, padding=padding_type, maxlen=max_le
print(len(validation_sequences))
print(validation_padded.shape)
# Expected output
# 445
# (445, 120)
445
(445, 120)
label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))
validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels))
print(training_label_seq[0])
print(training_label_seq[1])
print(training_label_seq[2])
print(training_label_seq.shape)
print(validation_label_seq[0])
print(validation_label_seq[1])
print(validation_label_seq[2])
print(validation_label_seq.shape)
# Expected output
# [4]
# [2]
# [1]
# (1780, 1)
# [5]
# [4]
# [3]
# (445, 1)
[4]
[2]
[1]
(1780, 1)
[5]
[4]
[3]
(445, 1)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation='relu'),
tf.keras.layers.Dense(6, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()
# Expected Output
# Layer (type) Output Shape Param #
# =================================================================
# embedding (Embedding) (None, 120, 16) 16000
# _________________________________________________________________
# global_average_pooling1d (Gl (None, 16) 0
# _________________________________________________________________
# dense (Dense) (None, 24) 408
# _________________________________________________________________
# dense_1 (Dense) (None, 6) 150
# =================================================================
# Total params: 16,558
# Trainable params: 16,558
# Non-trainable params: 0
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 120, 16) 16000
_________________________________________________________________
global_average_pooling1d (Gl (None, 16) 0
_________________________________________________________________
dense (Dense) (None, 24) 408
_________________________________________________________________
dense_1 (Dense) (None, 6) 150
=================================================================
num_epochs
Total = 30
params: 16,558
Trainable
history params: 16,558
= model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(v
Non-trainable params: 0
_________________________________________________________________
Epoch 1/30
56/56 - 0s - loss: 1.7632 - accuracy: 0.2315 - val_loss: 1.7279 - val_accuracy: 0.229
Epoch 2/30
56/56 - 0s - loss: 1.6797 - accuracy: 0.2326 - val_loss: 1.6301 - val_accuracy: 0.269
Epoch 3/30
56/56 - 0s - loss: 1.5684 - accuracy: 0.4124 - val_loss: 1.5137 - val_accuracy: 0.429
Epoch 4/30
56/56 - 0s - loss: 1.4253 - accuracy: 0.4809 - val_loss: 1.3512 - val_accuracy: 0.525
Epoch 5/30
56/56 - 0s - loss: 1.2274 - accuracy: 0.5961 - val_loss: 1.1528 - val_accuracy: 0.671
Epoch 6/30
56/56 - 0s - loss: 1.0137 - accuracy: 0.7511 - val_loss: 0.9600 - val_accuracy: 0.813
Epoch 7/30
56/56 - 0s - loss: 0.8240 - accuracy: 0.8579 - val_loss: 0.8014 - val_accuracy: 0.860
Epoch
import 8/30
matplotlib.pyplot as plt
56/56 - 0s - loss: 0.6696 - accuracy: 0.9107 - val_loss: 0.6733 - val_accuracy: 0.885
Epoch 9/30
56/56 - 0s - loss: 0.5459 - accuracy: 0.9281 - val_loss: 0.5711 - val_accuracy: 0.901
def plot_graphs(history,
Epoch 10/30 string):
plt.plot(history.history[string])
56/56 - 0s - loss: 0.4440 - accuracy: 0.9438 - val_loss: 0.4865 - val_accuracy: 0.921
plt.plot(history.history['val_'+string])
Epoch 11/30
56/56 - 0s - loss: 0.3640 - accuracy:
plt.xlabel("Epochs") 0.9567 - val_loss: 0.4199 - val_accuracy: 0.921
Epoch 12/30
plt.ylabel(string)
56/56 - 0s - loss:
plt.legend([string, 0.3000 - accuracy:
'val_'+string]) 0.9596 - val_loss: 0.3700 - val_accuracy: 0.921
Epoch 13/30
plt.show()
56/56 - 0s - loss: 0.2512 - accuracy: 0.9691 - val_loss: 0.3320 - val_accuracy: 0.923
Epoch 14/30
plot_graphs(history, "accuracy")
56/56 - 0s - loss: 0.2149 - accuracy: 0.9725 - val_loss: 0.3016 - val_accuracy: 0.928
plot_graphs(history,
Epoch 15/30 "loss")
56/56 - 0s - loss: 0.1848 - accuracy: 0.9747 - val_loss: 0.2825 - val_accuracy: 0.928
Epoch 16/30
56/56 - 0s - loss: 0.1620 - accuracy: 0.9781 - val_loss: 0.2639 - val_accuracy: 0.932
Epoch 17/30
56/56 - 0s - loss: 0.1425 - accuracy: 0.9815 - val_loss: 0.2504 - val_accuracy: 0.934
Epoch 18/30
56/56 - 0s - loss: 0.1274 - accuracy: 0.9815 - val_loss: 0.2400 - val_accuracy: 0.932
Epoch 19/30
56/56 - 0s - loss: 0.1130 - accuracy: 0.9848 - val_loss: 0.2305 - val_accuracy: 0.932
Epoch 20/30
56/56 - 0s - loss: 0.1015 - accuracy: 0.9871 - val_loss: 0.2225 - val_accuracy: 0.932
Epoch 21/30
56/56 - 0s - loss: 0.0916 - accuracy: 0.9865 - val_loss: 0.2196 - val_accuracy: 0.934
Epoch 22/30
56/56 - 0s - loss: 0.0827 - accuracy: 0.9899 - val_loss: 0.2122 - val_accuracy: 0.934
Epoch 23/30
56/56 - 0s - loss: 0.0741 - accuracy: 0.9916 - val_loss: 0.2092 - val_accuracy: 0.932
Epoch 24/30
56/56 - 0s - loss: 0.0678 - accuracy: 0.9921 - val_loss: 0.2040 - val_accuracy: 0.934
Epoch 25/30
56/56 - 0s - loss: 0.0610 - accuracy: 0.9955 - val_loss: 0.2015 - val_accuracy: 0.937
Epoch 26/30
56/56 - 0s - loss: 0.0555 - accuracy: 0.9961 - val_loss: 0.1992 - val_accuracy: 0.939
Epoch 27/30
56/56 - 0s - loss: 0.0504 - accuracy: 0.9966 - val_loss: 0.1952 - val_accuracy: 0.941
Epoch 28/30
56/56 - 0s - loss: 0.0460 - accuracy: 0.9983 - val_loss: 0.1953 - val_accuracy: 0.939
Epoch 29/30
56/56 - 0s - loss: 0.0419 - accuracy: 0.9994 - val_loss: 0.1911 - val_accuracy: 0.948
Epoch 30/30
56/56 - 0s - loss: 0.0385 - accuracy: 0.9994 - val_loss: 0.1905 - val_accuracy: 0.939
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_sentence(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) # shape: (vocab_size, embedding_dim)
# Expected output
# (1000, 16)
(1000, 16)
import io
try:
from google.colab import files
except ImportError:
pass
else:
files.download('vecs.tsv')
files.download('meta.tsv')