0% found this document useful (0 votes)
4 views

vertopal.com_LSTM_Autoencoder_

Uploaded by

Boch
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

vertopal.com_LSTM_Autoencoder_

Uploaded by

Boch
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

from google.

colab import drive


drive.mount('/content/drive')

# Define dataset path and extract if needed


gz_file_path = '/content/drive/MyDrive/UCSD_Anomaly_Dataset.tar.gz'

import tarfile
with tarfile.open(gz_file_path, 'r:gz') as tar:
tar.extractall('/content/UCSD_Anomaly_Dataset')

!pip install tensorflow opencv-python

Drive already mounted at /content/drive; to attempt to forcibly


remount, call drive.mount("/content/drive", force_remount=True).
Requirement already satisfied: tensorflow in
/usr/local/lib/python3.10/dist-packages (2.17.0)
Requirement already satisfied: opencv-python in
/usr/local/lib/python3.10/dist-packages (4.10.0.84)
Requirement already satisfied: absl-py>=1.0.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0)
Requirement already satisfied: astunparse>=1.6.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3)
Requirement already satisfied: flatbuffers>=24.3.25 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (24.3.25)
Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1
in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.6.0)
Requirement already satisfied: google-pasta>=0.1.1 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0)
Requirement already satisfied: h5py>=3.10.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (3.12.1)
Requirement already satisfied: libclang>=13.0.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (18.1.1)
Requirement already satisfied: ml-dtypes<0.5.0,>=0.3.1 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.1)
Requirement already satisfied: opt-einsum>=2.3.2 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (3.4.0)
Requirement already satisfied: packaging in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (24.1)
Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!
=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (3.20.3)
Requirement already satisfied: requests<3,>=2.21.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (2.32.3)
Requirement already satisfied: setuptools in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (75.1.0)
Requirement already satisfied: six>=1.12.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0)
Requirement already satisfied: termcolor>=1.1.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (2.5.0)
Requirement already satisfied: typing-extensions>=3.6.6 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (4.12.2)
Requirement already satisfied: wrapt>=1.11.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0)
Requirement already satisfied: grpcio<2.0,>=1.24.3 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.64.1)
Requirement already satisfied: tensorboard<2.18,>=2.17 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (2.17.0)
Requirement already satisfied: keras>=3.2.0 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (3.4.1)
Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (0.37.1)
Requirement already satisfied: numpy<2.0.0,>=1.23.5 in
/usr/local/lib/python3.10/dist-packages (from tensorflow) (1.26.4)
Requirement already satisfied: wheel<1.0,>=0.23.0 in
/usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0-
>tensorflow) (0.44.0)
Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-
packages (from keras>=3.2.0->tensorflow) (13.9.3)
Requirement already satisfied: namex in
/usr/local/lib/python3.10/dist-packages (from keras>=3.2.0-
>tensorflow) (0.0.8)
Requirement already satisfied: optree in
/usr/local/lib/python3.10/dist-packages (from keras>=3.2.0-
>tensorflow) (0.13.0)
Requirement already satisfied: charset-normalizer<4,>=2 in
/usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0-
>tensorflow) (3.4.0)
Requirement already satisfied: idna<4,>=2.5 in
/usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0-
>tensorflow) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in
/usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0-
>tensorflow) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in
/usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0-
>tensorflow) (2024.8.30)
Requirement already satisfied: markdown>=2.6.8 in
/usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17-
>tensorflow) (3.7)
Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0
in /usr/local/lib/python3.10/dist-packages (from
tensorboard<2.18,>=2.17->tensorflow) (0.7.2)
Requirement already satisfied: werkzeug>=1.0.1 in
/usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17-
>tensorflow) (3.0.6)
Requirement already satisfied: MarkupSafe>=2.1.1 in
/usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1-
>tensorboard<2.18,>=2.17->tensorflow) (3.0.2)
Requirement already satisfied: markdown-it-py>=2.2.0 in
/usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0-
>tensorflow) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in
/usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0-
>tensorflow) (2.18.0)
Requirement already satisfied: mdurl~=0.1 in
/usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0-
>rich->keras>=3.2.0->tensorflow) (0.1.2)

import numpy as np
import os
from PIL import Image

def load_and_preprocess_frames(directory_path, frame_height=160,


frame_width=160):
all_videos = []
for video_folder in sorted(os.listdir(directory_path)):
video_path = os.path.join(directory_path, video_folder)
if os.path.isdir(video_path):
frames = []
for filename in sorted(os.listdir(video_path)):
frame_path = os.path.join(video_path, filename)
if filename.lower().endswith('.tif'):
try:
with Image.open(frame_path) as img:
img = img.resize((frame_width,
frame_height))
frame = np.array(img).flatten() # Flatten
each frame
frame = frame / 255.0 # Normalize
frames.append(frame)
except OSError as e:
print(f"Error loading file {frame_path}: {e}")
# Skip problematic files
if frames:
all_videos.append(np.array(frames))
return all_videos

# Example usage
train_directory_ped1 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped1/
Train'
train_directory_ped2 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped2/
Train'
train_videos_ped1 = load_and_preprocess_frames(train_directory_ped1)
train_videos_ped2 = load_and_preprocess_frames(train_directory_ped2)
train_videos = train_videos_ped1 + train_videos_ped2
def split_into_sequences(video_frames, sequence_length=16):
num_frames = len(video_frames)
sequences = []
for i in range(0, num_frames - sequence_length + 1,
sequence_length):
sequence = video_frames[i:i + sequence_length]
sequences.append(sequence)
return np.array(sequences)

# Prepare the training data


train_sequences = np.concatenate([split_into_sequences(video,
sequence_length=16) for video in train_videos], axis=0)
print("Prepared training data shape:", train_sequences.shape)

Prepared training data shape: (562, 16, 25600)

from tensorflow.keras.models import Sequential


from tensorflow.keras.layers import LSTM, RepeatVector,
TimeDistributed, Dense

def build_lstm_autoencoder(sequence_length, input_dim):


model = Sequential()
model.add(LSTM(128, activation='relu',
input_shape=(sequence_length, input_dim), return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=False))
model.add(RepeatVector(sequence_length))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(input_dim)))
model.compile(optimizer='adam', loss='mse')
return model

sequence_length = 16 # Number of frames per sequence


input_dim = 160 * 160 # Flattened frame size
lstm_autoencoder = build_lstm_autoencoder(sequence_length, input_dim)

/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/
rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim`
argument to a layer. When using Sequential models, prefer using an
`Input(shape)` object as the first layer in the model instead.
super().__init__(**kwargs)

# Reshape train data to (num_sequences, sequence_length, input_dim)


train_sequences = train_sequences.reshape((train_sequences.shape[0],
sequence_length, input_dim))

# Train the LSTM autoencoder on normal sequences


lstm_autoencoder.fit(train_sequences, train_sequences, epochs=10,
batch_size=2, validation_split=0.2)
Epoch 1/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 29s 89ms/step - loss: 630.3209 -
val_loss: 2.4493
Epoch 2/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 9s 40ms/step - loss: 1.4530 - val_loss:
0.8214
Epoch 3/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 9s 39ms/step - loss: 0.3946 - val_loss:
0.4026
Epoch 4/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 9s 41ms/step - loss: 0.1782 - val_loss:
0.2826
Epoch 5/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 9s 40ms/step - loss: 0.1101 - val_loss:
0.2217
Epoch 6/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 10s 39ms/step - loss: 0.0821 - val_loss:
0.1765
Epoch 7/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 10s 39ms/step - loss: 0.0619 - val_loss:
0.1615
Epoch 8/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 11s 41ms/step - loss: 0.0537 - val_loss:
0.1326
Epoch 9/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 10s 40ms/step - loss: 0.0462 - val_loss:
0.1367
Epoch 10/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 11s 42ms/step - loss: 0.0420 - val_loss:
0.1236

<keras.src.callbacks.history.History at 0x7c147a365bd0>

# Define ground truth for UCSDped1


ground_truth_frames_ped1 = [
list(range(60, 153)),
list(range(50, 176)),
list(range(91, 201)),
list(range(31, 169)),
list(range(5, 91)) + list(range(140, 201)),
list(range(1, 101)) + list(range(110, 201)),
list(range(1, 176)),
list(range(1, 95)),
list(range(1, 49)),
list(range(1, 141)),
list(range(70, 166)),
list(range(130, 201)),
list(range(1, 157)),
list(range(1, 201)),
list(range(138, 201)),
list(range(123, 201)),
list(range(1, 48)),
list(range(54, 121)),
list(range(64, 139)),
list(range(45, 176)),
list(range(31, 201)),
list(range(16, 108)),
list(range(8, 166)),
list(range(50, 172)),
list(range(40, 136)),
list(range(77, 145)),
list(range(10, 123)),
list(range(105, 201)),
list(range(1, 16)) + list(range(45, 114)),
list(range(175, 201)),
list(range(1, 181)),
list(range(1, 53)) + list(range(65, 116)),
list(range(5, 166)),
list(range(1, 122)),
list(range(86, 201)),
list(range(15, 109))
]

# Define ground truth for UCSDped2


ground_truth_frames_ped2 = [
list(range(61, 180)),
list(range(95, 180)),
list(range(1, 146)),
list(range(31, 180)),
list(range(1, 129)),
list(range(1, 162)),
list(range(46, 180)),
list(range(1, 180)),
list(range(1, 120)),
list(range(1, 150)),
list(range(1, 180)),
list(range(88, 180))
]

# Combine ground truth annotations for both datasets


ground_truth_frames_combined = ground_truth_frames_ped1 +
ground_truth_frames_ped2

# Load and preprocess test data


test_directory_ped1 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped1/
Test'
test_directory_ped2 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped2/
Test'
test_videos_ped1 = load_and_preprocess_frames(test_directory_ped1)
test_videos_ped2 = load_and_preprocess_frames(test_directory_ped2)
test_videos = test_videos_ped1 + test_videos_ped2

# Prepare test data sequences


test_sequences = np.concatenate([split_into_sequences(video,
sequence_length) for video in test_videos], axis=0)
test_sequences = test_sequences.reshape((test_sequences.shape[0],
sequence_length, input_dim))

Error loading file


/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped1/Test/
Test017/142.tif: -2

# Predict and calculate reconstruction errors


reconstructed_test_data = lstm_autoencoder.predict(test_sequences)
from sklearn.metrics import mean_squared_error

reconstruction_errors = [
mean_squared_error(original.flatten(), reconstructed.flatten())
for original, reconstructed in zip(test_sequences,
reconstructed_test_data)
]

18/18 ━━━━━━━━━━━━━━━━━━━━ 6s 164ms/step

import numpy as np

# Set the threshold for anomaly detection


threshold = np.percentile(reconstruction_errors, 38) # 95th
percentile
print("Anomaly threshold:", threshold)

# Generate binary predictions based on threshold


predicted_labels = [1 if error > threshold else 0 for error in
reconstruction_errors]

Anomaly threshold: 0.0297560810270515

# Create ground truth labels for test set based on


ground_truth_frames_combined
test_labels = []
for i, gt_frames in enumerate(ground_truth_frames_combined):
sequence_labels = [1 if frame in gt_frames else 0 for frame in
range(sequence_length)]
test_labels.extend(sequence_labels)

test_labels = np.array(test_labels[:len(predicted_labels)]) # Ensure


length matches

# Evaluate the model


from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(test_labels, predicted_labels)


recall = recall_score(test_labels, predicted_labels)
f1 = f1_score(test_labels, predicted_labels)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Precision: 0.5364
Recall: 0.8519
F1 Score: 0.6583

INTERPRETATION

** Recall (0.8519):** A high recall means that the model is good at identifying most of the actual
anomalies.

** Precision (0.5364):** Precision is lower, indicating that there are some normal sequences
labeled as anomalies.

The model is slightly too sensitive, flagging some normal sequences as anomalies.

F1 score(0.6583) : Reflecting a balance between the two metrics.

You might also like