lip_read(Deep Learning).ipynb - Colab
lip_read(Deep Learning).ipynb - Colab
ipynb - Colab
import os
import cv2
import tensorflow as tf
import numpy as np
from typing import List
from matplotlib import pyplot as plt
import imageio
%%capture
# Download the zip file containing the data from Google Drive using its ID
url = 'https://drive.google.com/uc?id=1YlvpDLix3S-U8fd-gqRwPcWXAXm8JwjL'
output = 'data.zip'
# Use gdown library to download the file from the URL and save it to output file
gdown.download(url,output,quiet=False)
gdown.extractall('data.zip')
#This function loads a video from a given path and preprocesses the frames. The function returns a list of preprocessed frames.
def load_video(path:str) -> List[float]:
# Crop the frame to focus only on the lips and append the preprocessed frame to the list
frames.append(frame[190:236,80:220,:])
cap.release()
# Normalize the frames by subtracting the mean and dividing by the standard deviation
return tf.cast((frames - mean), tf.float32) / std
The vocabulary is: ['', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 1/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
char_to_num(['s','k','a','t','y'])
# Split the tokens into individual characters, convert them to numerical values using char_to_num, and return the result
return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
test_path = './data/s1/bbal6n.mpg'
tf.convert_to_tensor(test_path).numpy().decode('utf-8').split('/')[-1].split('.')[0]
'bbal6n'
plt.imshow(frames[28])
<matplotlib.image.AxesImage at 0x7ec7ee23fb20>
alignments
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 2/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
def mappable_function(path:str) ->List[str]:
# Call the load_data function using TensorFlow's py_function, which can be used to wrap Python functions
# and make them usable inside a TensorFlow graph.
# This allows us to load and preprocess data in parallel with TensorFlow's data pipeline.
result = tf.py_function(load_data, [path], (tf.float32, tf.int64))
# Return the loaded and preprocessed data
return result
len(test)
50
len(frames)
sample = data.as_numpy_iterator()
%%capture
val = sample.next(); val[0]
%%capture
#This code saves a gif animation from the frames of a video.
# val[0][0] likely contains float values. To convert to uint8, we multiply by 255
# and cast to the desired type. This ensures compatibility with imageio/PIL.
frames_uint8 = (val[0][0] * 255).astype(np.uint8)
#If frames_uint8 still has shape (H, W, 1), convert to (H, W, 3) or (H, W)
if frames_uint8.shape[-1] == 1:
try:
frames_uint8 = frames_uint8.repeat(3, axis=-1) # Convert to RGB if grayscale
except ValueError:
frames_uint8 = frames_uint8.squeeze(-1) # Remove the single channel dimension
plt.imshow(val[0][0][35])
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 3/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
<matplotlib.image.AxesImage at 0x7ec7e155e5f0>
data.as_numpy_iterator().next()[0][0].shape
model = Sequential()
#Add a Conv3D layer with 128 filters, kernel size of 3, and padding of 'same', with input shape of (75,46,140,1)
model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
model.add(Activation('relu'))
#Add a MaxPool3D layer with pool size of (1,2,2)
model.add(MaxPool3D((1,2,2)))
#Add another Conv3D layer with 256 filters, kernel size of 3, and padding of 'same'
model.add(Conv3D(256, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))
#Add another Conv3D layer with 75 filters, kernel size of 3, and padding of 'same'
model.add(Conv3D(75, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))
#Add a Bidirectional LSTM layer with 128 units, orthogonal kernel initializer, and return_sequences=True
model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))
model.summary()
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 4/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv3d (Conv3D) │ (None, 75, 46, 140, 128) │ 3,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ activation (Activation) │ (None, 75, 46, 140, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling3d (MaxPooling3D) │ (None, 75, 23, 70, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv3d_1 (Conv3D) │ (None, 75, 23, 70, 256) │ 884,992 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ activation_1 (Activation) │ (None, 75, 23, 70, 256) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling3d_1 (MaxPooling3D) │ (None, 75, 11, 35, 256) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv3d_2 (Conv3D) │ (None, 75, 11, 35, 75) │ 518,475 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ activation_2 (Activation) │ (None, 75, 11, 35, 75) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling3d_2 (MaxPooling3D) │ (None, 75, 5, 17, 75) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ time_distributed (TimeDistributed) │ (None, 75, 6375) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ bidirectional (Bidirectional) │ (None, 75, 256) │ 6,660,096 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout) │ (None, 75, 256) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ bidirectional_1 (Bidirectional) │ (None, 75, 256) │ 394,240 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_1 (Dropout) │ (None, 75, 256) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense) │ (None, 75, 41) │ 10,537 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 8,471,924 (32.32 MB)
Trainable params: 8,471,924 (32.32 MB)
Non-trainable params: 0 (0.00 B)
yhat = model.predict(val[0])
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 5/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
#This function computes the Connectionist Temporal Classification (CTC) loss between the predicted labels and true labels.
#It takes two inputs, y_true and y_pred, which are the true and predicted labels respectively.
# Create tensors of shape (batch_size, 1) with the input and label lengths
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
#The following code compiles the model with the Adam optimizer and sets the learning rate to 0.0001.
#It also sets the loss function to CTCLoss.
model.compile(optimizer=Adam(learning_rate=0.0001), loss=CTCLoss)
schedule_callback = LearningRateScheduler(scheduler)
example_callback = ProduceExample(test)
Downloading...
From (original): https://drive.google.com/uc?id=1vWscXs4Vt0a_1IH1-ct2TCgXAZT-N3_Y
From (redirected): https://drive.google.com/uc?id=1vWscXs4Vt0a_1IH1-ct2TCgXAZT-N3_Y&confirm=t&uuid=1ad794ff-6856-4ed2-8091-596705bf6
To: /content/checkpoints.zip
100%|██████████| 94.5M/94.5M [00:04<00:00, 20.6MB/s]
['models/checkpoint.index',
'models/__MACOSX/._checkpoint.index',
'models/checkpoint.data-00000-of-00001',
'models/__MACOSX/._checkpoint.data-00000-of-00001',
'models/checkpoint',
'models/__MACOSX/._checkpoint']
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 6/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
try:
# If the checkpoint contains the entire model structure
model = tf.keras.models.load_model('models/checkpoint')
except:
# If the checkpoint only contains weights, you'll need to recreate the model structure
# and then load the weights using tf.train.Checkpoint
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore(tf.train.latest_checkpoint('models'))
#model.load_weights('models/checkpoint')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7ec7ee4f6b60>
WARNING:tensorflow:Detecting that an object or model or tf.train.Checkpoint is being deleted with unrestored values. See the followi
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-0.kernel
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-0.bias
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-1.kernel
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-1.bias
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-2.kernel
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-2.bias
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-5.kernel
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-5.bias
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer.beta_1
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer.beta_2
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer.decay
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer.learning_rate
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer.iter
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.forward_layer.cell.ker
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.forward_layer.cell.rec
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.forward_layer.cell.bia
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.backward_layer.cell.ke
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.backward_layer.cell.re
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-3.backward_layer.cell.bi
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.forward_layer.cell.ker
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.forward_layer.cell.rec
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.forward_layer.cell.bia
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.backward_layer.cell.ke
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.backward_layer.cell.re
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).layer_with_weights-4.backward_layer.cell.bi
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).optimizer's state 'm' for (root).layer_with
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7ec7ee4f5210>
test_data = test.as_numpy_iterator()
sample = test_data.next()
yhat = model.predict(sample[0])
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 7/8
11/28/24, 11:31 PM lip_read(Deep Learning).ipynb - Colab
print('~'*100, 'PREDICTIONS')
[tf.strings.reduce_join([num_to_char(word) for word in sentence]) for sentence in decoded]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PREDICTIONS
[<tf.Tensor: shape=(), dtype=string, numpy=b'8!'>,
<tf.Tensor: shape=(), dtype=string, numpy=b'r!b'>]
https://colab.research.google.com/drive/1JquegixBdWTti8ORrANEfi1FMG_9WVRc 8/8