0% found this document useful (0 votes)

163 views11 pages

LSTM From Scratch in Python

Uploaded by

Neel24787bose

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

163 views11 pages

LSTM From Scratch in Python

Uploaded by

Neel24787bose

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 11

LSTM

May 6, 2024

1 LSTM From Scratch in Python

By Cristian Leo

[1]: import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

# Custom classes (built from scratch)

from src.model import WeightInitializer
from src.trainer import PlotManager, EarlyStopping

[2]: class LSTM:

"""
Long Short-Term Memory (LSTM) network.

Parameters:
- input_size: int, dimensionality of input space

1
- hidden_size: int, number of LSTM units
- output_size: int, dimensionality of output space
- init_method: str, weight initialization method (default: 'xavier')
"""
def __init__(self, input_size, hidden_size, output_size,␣
↪init_method='xavier'):

self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weight_initializer = WeightInitializer(method=init_method)

# Initialize weights
self.wf = self.weight_initializer.initialize((hidden_size, hidden_size␣
↪+ input_size))
self.wi = self.weight_initializer.initialize((hidden_size, hidden_size␣
↪+ input_size))
self.wo = self.weight_initializer.initialize((hidden_size, hidden_size␣
↪+ input_size))
self.wc = self.weight_initializer.initialize((hidden_size, hidden_size␣
↪+ input_size))

# Initialize biases
self.bf = np.zeros((hidden_size, 1))
self.bi = np.zeros((hidden_size, 1))
self.bo = np.zeros((hidden_size, 1))
self.bc = np.zeros((hidden_size, 1))

# Initialize output layer weights and biases

self.why = self.weight_initializer.initialize((output_size,␣
↪hidden_size))

self.by = np.zeros((output_size, 1))

@staticmethod
def sigmoid(z):
"""
Sigmoid activation function.

Parameters:
- z: np.ndarray, input to the activation function

Returns:
- np.ndarray, output of the activation function
"""
return 1 / (1 + np.exp(-z))

@staticmethod

2
def dsigmoid(y):
"""
Derivative of the sigmoid activation function.

Parameters:
- y: np.ndarray, output of the sigmoid activation function

Returns:
- np.ndarray, derivative of the sigmoid function
"""
return y * (1 - y)

@staticmethod
def dtanh(y):
"""
Derivative of the hyperbolic tangent activation function.

Parameters:
- y: np.ndarray, output of the hyperbolic tangent activation function

Returns:
- np.ndarray, derivative of the hyperbolic tangent function
"""
return 1 - y * y

def forward(self, x):

"""
Forward pass through the LSTM network.

Parameters:
- x: np.ndarray, input to the network

Returns:
- np.ndarray, output of the network
- list, caches containing intermediate values for backpropagation
"""
caches = []
h_prev = np.zeros((self.hidden_size, 1))
c_prev = np.zeros((self.hidden_size, 1))
h = h_prev
c = c_prev

for t in range(x.shape[0]):
x_t = x[t].reshape(-1, 1)
combined = np.vstack((h_prev, x_t))

f = self.sigmoid(np.dot(self.wf, combined) + self.bf)

3
i = self.sigmoid(np.dot(self.wi, combined) + self.bi)
o = self.sigmoid(np.dot(self.wo, combined) + self.bo)
c_ = np.tanh(np.dot(self.wc, combined) + self.bc)

c = f * c_prev + i * c_
h = o * np.tanh(c)

cache = (h_prev, c_prev, f, i, o, c_, x_t, combined, c, h)

caches.append(cache)

h_prev, c_prev = h, c

y = np.dot(self.why, h) + self.by
return y, caches

def backward(self, dy, caches, clip_value=1.0):

"""
Backward pass through the LSTM network.

Parameters:
- dy: np.ndarray, gradient of the loss with respect to the output
- caches: list, caches from the forward pass
- clip_value: float, value to clip gradients to (default: 1.0)

Returns:
- tuple, gradients of the loss with respect to the parameters
"""
dWf, dWi, dWo, dWc = [np.zeros_like(w) for w in (self.wf, self.wi, self.
↪wo, self.wc)]

dbf, dbi, dbo, dbc = [np.zeros_like(b) for b in (self.bf, self.bi, self.

↪bo, self.bc)]

dWhy = np.zeros_like(self.why)
dby = np.zeros_like(self.by)

# Ensure dy is reshaped to match output size

dy = dy.reshape(self.output_size, -1)
dh_next = np.zeros((self.hidden_size, 1)) # shape must match␣
↪hidden_size

dc_next = np.zeros_like(dh_next)

for cache in reversed(caches):

h_prev, c_prev, f, i, o, c_, x_t, combined, c, h = cache

# Add gradient from next step to current output gradient

dh = np.dot(self.why.T, dy) + dh_next
dc = dc_next + (dh * o * self.dtanh(np.tanh(c)))

4
df = dc * c_prev * self.dsigmoid(f)
di = dc * c_ * self.dsigmoid(i)
do = dh * self.dtanh(np.tanh(c))
dc_ = dc * i * self.dtanh(c_)

dcombined_f = np.dot(self.wf.T, df)

dcombined_i = np.dot(self.wi.T, di)
dcombined_o = np.dot(self.wo.T, do)
dcombined_c = np.dot(self.wc.T, dc_)

dcombined = dcombined_f + dcombined_i + dcombined_o + dcombined_c

dh_next = dcombined[:self.hidden_size]
dc_next = f * dc

dWf += np.dot(df, combined.T)

dWi += np.dot(di, combined.T)
dWo += np.dot(do, combined.T)
dWc += np.dot(dc_, combined.T)

dbf += df.sum(axis=1, keepdims=True)

dbi += di.sum(axis=1, keepdims=True)
dbo += do.sum(axis=1, keepdims=True)
dbc += dc_.sum(axis=1, keepdims=True)

dWhy += np.dot(dy, h.T)

dby += dy

gradients = (dWf, dWi, dWo, dWc, dbf, dbi, dbo, dbc, dWhy, dby)

# Gradient clipping
for i in range(len(gradients)):
np.clip(gradients[i], -clip_value, clip_value, out=gradients[i])

return gradients

def update_params(self, grads, learning_rate):

"""
Update the parameters of the network using the gradients.

Parameters:
- grads: tuple, gradients of the loss with respect to the parameters
- learning_rate: float, learning rate
"""
dWf, dWi, dWo, dWc, dbf, dbi, dbo, dbc, dWhy, dby = grads

self.wf -= learning_rate * dWf

self.wi -= learning_rate * dWi

5
self.wo -= learning_rate * dWo
self.wc -= learning_rate * dWc

self.bf -= learning_rate * dbf

self.bi -= learning_rate * dbi
self.bo -= learning_rate * dbo
self.bc -= learning_rate * dbc

self.why -= learning_rate * dWhy

self.by -= learning_rate * dby

[3]: class LSTMTrainer:

"""
Trainer for the LSTM network.

Parameters:
- model: LSTM, the LSTM network to train
- learning_rate: float, learning rate for the optimizer
- patience: int, number of epochs to wait before early stopping
- verbose: bool, whether to print training information
- delta: float, minimum change in validation loss to qualify as an␣
↪improvement

"""
def __init__(self, model, learning_rate=0.01, patience=7, verbose=True,␣
↪delta=0):

self.model = model
self.learning_rate = learning_rate
self.train_losses = []
self.val_losses = []
self.early_stopping = EarlyStopping(patience, verbose, delta)

def train(self, X_train, y_train, X_val=None, y_val=None, epochs=10,␣

↪batch_size=1, clip_value=1.0):

"""
Train the LSTM network.

Parameters:
- X_train: np.ndarray, training data
- y_train: np.ndarray, training labels
- X_val: np.ndarray, validation data
- y_val: np.ndarray, validation labels
- epochs: int, number of training epochs
- batch_size: int, size of mini-batches
- clip_value: float, value to clip gradients to
"""
for epoch in range(epochs):
epoch_losses = []

6
for i in range(0, len(X_train), batch_size):
batch_X = X_train[i:i + batch_size]
batch_y = y_train[i:i + batch_size]
losses = []

for x, y_true in zip(batch_X, batch_y):

y_pred, caches = self.model.forward(x)
loss = self.compute_loss(y_pred, y_true.reshape(-1, 1))
losses.append(loss)

# Backpropagation to get gradients

dy = y_pred - y_true.reshape(-1, 1)
grads = self.model.backward(dy, caches,␣
↪clip_value=clip_value)

self.model.update_params(grads, self.learning_rate)

batch_loss = np.mean(losses)
epoch_losses.append(batch_loss)

avg_epoch_loss = np.mean(epoch_losses)
self.train_losses.append(avg_epoch_loss)

if X_val is not None and y_val is not None:

val_loss = self.validate(X_val, y_val)
self.val_losses.append(val_loss)

if epoch % 10 == 0:
print(f'Epoch {epoch + 1}/{epochs} - Loss: {avg_epoch_loss:.
↪5f}, Val Loss: {val_loss:.5f}')

# Check early stopping condition

self.early_stopping(val_loss)
if self.early_stopping.early_stop:
print("Early stopping")
break
else:
print(f'Epoch {epoch + 1}/{epochs} - Loss: {avg_epoch_loss:.
↪5f}')

def compute_loss(self, y_pred, y_true):

"""
Compute mean squared error loss.
"""
return np.mean((y_pred - y_true) ** 2)

def validate(self, X_val, y_val):

7
"""
Validate the model on a separate set of data.
"""
val_losses = []
for x, y_true in zip(X_val, y_val):
y_pred, _ = self.model.forward(x)
loss = self.compute_loss(y_pred, y_true.reshape(-1, 1))
val_losses.append(loss)
return np.mean(val_losses)

[11]: class TimeSeriesDataset:

"""
Dataset class for time series data.

Parameters:
- ticker: str, stock ticker symbol
- start_date: str, start date for data retrieval
- end_date: str, end date for data retrieval
- look_back: int, number of previous time steps to include in each sample
- train_size: float, proportion of data to use for training
"""
def __init__(self, start_date, end_date, look_back=1, train_size=0.67):
self.start_date = start_date
self.end_date = end_date
self.look_back = look_back
self.train_size = train_size

def load_data(self):
"""
Load stock data.

Returns:
- np.ndarray, training data
- np.ndarray, testing data
"""
df = pd.read_csv('data/google.csv')
df = df[(df['Date'] >= self.start_date) & (df['Date'] <= self.end_date)]
df = df.sort_index()
df = df.loc[self.start_date:self.end_date]
df = df[['Close']].astype(float) # Use closing price
df = self.MinMaxScaler(df.values) # Convert DataFrame to numpy array
train_size = int(len(df) * self.train_size)
train, test = df[0:train_size,:], df[train_size:len(df),:]
return train, test

def MinMaxScaler(self, data):

"""

8
Min-max scaling of the data.

Parameters:
- data: np.ndarray, input data
"""
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
return numerator / (denominator + 1e-7)

def create_dataset(self, dataset):

"""
Create the dataset for time series prediction.

Parameters:
- dataset: np.ndarray, input data

Returns:
- np.ndarray, input data
- np.ndarray, output data
"""
dataX, dataY = [], []
for i in range(len(dataset)-self.look_back):
a = dataset[i:(i + self.look_back), 0]
dataX.append(a)
dataY.append(dataset[i + self.look_back, 0])
return np.array(dataX), np.array(dataY)

def get_train_test(self):
"""
Get the training and testing data.

Returns:
- np.ndarray, training input
- np.ndarray, training output
- np.ndarray, testing input
- np.ndarray, testing output
"""
train, test = self.load_data()
trainX, trainY = self.create_dataset(train)
testX, testY = self.create_dataset(test)
return trainX, trainY, testX, testY

[12]: # Instantiate the dataset

dataset = TimeSeriesDataset('2005-01-01', '2020-12-31', train_size=0.7,␣
↪look_back=1)

trainX, trainY, testX, testY = dataset.get_train_test()

9
# Plot the data
# Combine train and test data
combined = np.concatenate((trainY, testY))

# Plot the data

plt.figure(figsize=(14, 5))
plt.plot(combined, label='Google Stock Price', linewidth=2, color='dodgerblue')
plt.title('Google Stock Price', fontsize=20)
plt.xlabel('Time', fontsize=16)
plt.ylabel('Normalized Stock Price', fontsize=16)
plt.grid(True)
plt.legend(fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()

[13]: # Reshape input to be [samples, time steps, features]

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

look_back = 1 # Number of previous time steps to include in each sample

hidden_size = 256 # Number of LSTM units
output_size = 1 # Dimensionality of the output space

lstm = LSTM(input_size=1, hidden_size=hidden_size, output_size=output_size)

# Create and train the LSTM using LSTMTrainer

trainer = LSTMTrainer(lstm, learning_rate=1e-3, patience=50, verbose=True,␣
↪delta=0.001)

trainer.train(trainX, trainY, testX, testY, epochs=1000, batch_size=32)

10
Epoch 1/1000 - Loss: 0.24629, Val Loss: 0.53735
Epoch 11/1000 - Loss: 0.07889, Val Loss: 0.11416
Epoch 21/1000 - Loss: 0.06242, Val Loss: 0.05693
Epoch 31/1000 - Loss: 0.05286, Val Loss: 0.03841
Epoch 41/1000 - Loss: 0.04575, Val Loss: 0.02845
Epoch 51/1000 - Loss: 0.04046, Val Loss: 0.02222
Epoch 61/1000 - Loss: 0.03655, Val Loss: 0.01831
Epoch 71/1000 - Loss: 0.03364, Val Loss: 0.01598
Epoch 81/1000 - Loss: 0.03149, Val Loss: 0.01472
Epoch 91/1000 - Loss: 0.02989, Val Loss: 0.01420
Epoch 101/1000 - Loss: 0.02870, Val Loss: 0.01417
Epoch 111/1000 - Loss: 0.02782, Val Loss: 0.01444
Epoch 121/1000 - Loss: 0.02717, Val Loss: 0.01490
Epoch 131/1000 - Loss: 0.02668, Val Loss: 0.01546
Early stopping

[15]: plot_manager = PlotManager()

# Inside your training loop

plot_manager.plot_losses(trainer.train_losses, trainer.val_losses)

# After your training loop

plot_manager.show_plots()

The Universal Encyclopedia of Scales
100% (1)
The Universal Encyclopedia of Scales
2,620 pages
CNN Short
No ratings yet
CNN Short
61 pages
Glove
100% (1)
Glove
10 pages
Activation Functions - Ipynb - Colaboratory
No ratings yet
Activation Functions - Ipynb - Colaboratory
10 pages
CNN RNN Assignment Set 4
0% (1)
CNN RNN Assignment Set 4
2 pages
PyBrain Slides
No ratings yet
PyBrain Slides
20 pages
Autoencoders - Presentation
No ratings yet
Autoencoders - Presentation
18 pages
Artificial Neural Networks Video Tutorial: Machine Learning 17CS73
No ratings yet
Artificial Neural Networks Video Tutorial: Machine Learning 17CS73
23 pages
Bian - Deep Learning On Smooth Manifolds
No ratings yet
Bian - Deep Learning On Smooth Manifolds
6 pages
Understanding Machine Learning Solution Manual: 2 Gentle Start
No ratings yet
Understanding Machine Learning Solution Manual: 2 Gentle Start
67 pages
TD107 Operation Manual PDF
No ratings yet
TD107 Operation Manual PDF
42 pages
Workflow To Bake Details From ZBrush To Lowpoly + Alternative Tutorial HQ PDF
No ratings yet
Workflow To Bake Details From ZBrush To Lowpoly + Alternative Tutorial HQ PDF
3 pages
Python
No ratings yet
Python
3 pages
Hyperparameters
No ratings yet
Hyperparameters
15 pages
Lecture 26-30 Unit 2
No ratings yet
Lecture 26-30 Unit 2
20 pages
Deep Learning Lab Practicals
No ratings yet
Deep Learning Lab Practicals
24 pages
CNN Architectures: Lenet, Alexnet, VGG, Googlenet, Resnet and More
No ratings yet
CNN Architectures: Lenet, Alexnet, VGG, Googlenet, Resnet and More
9 pages
Module2.3 Hyperparameter Optimization
No ratings yet
Module2.3 Hyperparameter Optimization
29 pages
Autoencoder Report 1
No ratings yet
Autoencoder Report 1
34 pages
ML Program Output
No ratings yet
ML Program Output
22 pages
RAG With Math
No ratings yet
RAG With Math
7 pages
Unit 2
No ratings yet
Unit 2
112 pages
Types of Neural Networks
No ratings yet
Types of Neural Networks
7 pages
9.deep Feedforward Networks
100% (1)
9.deep Feedforward Networks
13 pages
Unit 2 DL
No ratings yet
Unit 2 DL
44 pages
Lecture 1: Introduction To Reinforcement Learning: David Silver
No ratings yet
Lecture 1: Introduction To Reinforcement Learning: David Silver
46 pages
Unit 2 DL
No ratings yet
Unit 2 DL
43 pages
2.neural Network
No ratings yet
2.neural Network
19 pages
Advanced Information Retreival: Chapter 02: Modeling - Neural Network Model
No ratings yet
Advanced Information Retreival: Chapter 02: Modeling - Neural Network Model
31 pages
Train A Simple NN - Jupyter Notebook
No ratings yet
Train A Simple NN - Jupyter Notebook
4 pages
Unit III
No ratings yet
Unit III
38 pages
Optimization Techniques in Deep Learning
No ratings yet
Optimization Techniques in Deep Learning
14 pages
4.1 Reinforcement Learning 2
No ratings yet
4.1 Reinforcement Learning 2
31 pages
Omkar Sabnis B4-764 Experiment No. 7 Aim: Implementation of MC-Culloch Pitt Model For AND Gate Using Python. Theory
No ratings yet
Omkar Sabnis B4-764 Experiment No. 7 Aim: Implementation of MC-Culloch Pitt Model For AND Gate Using Python. Theory
10 pages
Assignment On RNN
No ratings yet
Assignment On RNN
1 page
PyTorch Workflow Fundamentals
No ratings yet
PyTorch Workflow Fundamentals
1 page
Deep Learning
No ratings yet
Deep Learning
127 pages
CNN PPT Unit Iv
No ratings yet
CNN PPT Unit Iv
134 pages
AI-Lecture 12 - Simple Perceptron
100% (1)
AI-Lecture 12 - Simple Perceptron
24 pages
Regularization: Swetha V, Research Scholar
No ratings yet
Regularization: Swetha V, Research Scholar
32 pages
DL Question Bank Answers
No ratings yet
DL Question Bank Answers
55 pages
Seminar Report Machine Learning
No ratings yet
Seminar Report Machine Learning
20 pages
02 ML Supervised Learning
No ratings yet
02 ML Supervised Learning
32 pages
CS7015 (Deep Learning) : Lecture 1
No ratings yet
CS7015 (Deep Learning) : Lecture 1
108 pages
CS230 Midterm Solutions Fall 2022
No ratings yet
CS230 Midterm Solutions Fall 2022
20 pages
Soft Max
No ratings yet
Soft Max
6 pages
AIML - 04 Single Layer Perceptron
No ratings yet
AIML - 04 Single Layer Perceptron
11 pages
2023 ML Assignment
No ratings yet
2023 ML Assignment
57 pages
AD601 Deep Learning Unit-2 Notes
No ratings yet
AD601 Deep Learning Unit-2 Notes
14 pages
Lab I TENSOR FLOW AND KERAS
No ratings yet
Lab I TENSOR FLOW AND KERAS
3 pages
Perceptron and Backpropagation
No ratings yet
Perceptron and Backpropagation
17 pages
PThread API Reference
No ratings yet
PThread API Reference
348 pages
Danka T Mathematics of Machine Learning Master Linear Algebr
100% (1)
Danka T Mathematics of Machine Learning Master Linear Algebr
729 pages
Deep Learning
No ratings yet
Deep Learning
2 pages
GossipFL A Decentralized Federated Learning Framework With Sparsified and Adaptive Communication
100% (1)
GossipFL A Decentralized Federated Learning Framework With Sparsified and Adaptive Communication
14 pages
Btech CSE
No ratings yet
Btech CSE
17 pages
5 Techiques To FineTune LLMs
No ratings yet
5 Techiques To FineTune LLMs
7 pages
02 Fundamentals of Neural Network
No ratings yet
02 Fundamentals of Neural Network
40 pages
Answers For End-Sem Exam Part - 2 (Deep Learning)
No ratings yet
Answers For End-Sem Exam Part - 2 (Deep Learning)
20 pages
Artificial Neural Network
No ratings yet
Artificial Neural Network
21 pages
Cloud computing Complete Self-Assessment Guide
From Everand
Cloud computing Complete Self-Assessment Guide
Gerardus Blokdyk
No ratings yet
The Datadog Handbook: A Guide to Monitoring, Metrics, and Tracing
From Everand
The Datadog Handbook: A Guide to Monitoring, Metrics, and Tracing
Robert Johnson
No ratings yet
TXT
No ratings yet
TXT
7 pages
L 5:probability
No ratings yet
L 5:probability
71 pages
Csc134 Computer & Information Processing Chapter 2: The System Unit
0% (1)
Csc134 Computer & Information Processing Chapter 2: The System Unit
29 pages
A Case Study of Supplier Selection For Lean Supply by Using A Mathematical Model
No ratings yet
A Case Study of Supplier Selection For Lean Supply by Using A Mathematical Model
9 pages
SCB
No ratings yet
SCB
14 pages
Analytical Chemistry 2021 Review Set
No ratings yet
Analytical Chemistry 2021 Review Set
4 pages
By Vvill Coauthor of I'elements of Radio"
No ratings yet
By Vvill Coauthor of I'elements of Radio"
26 pages
TOR Excel Training Levels 1 - 3
No ratings yet
TOR Excel Training Levels 1 - 3
7 pages
THIELE Grade100 A5 English PDF
No ratings yet
THIELE Grade100 A5 English PDF
44 pages
DAA Unit5 Part-1
No ratings yet
DAA Unit5 Part-1
7 pages
Instructions: 1. Testing A Diode
No ratings yet
Instructions: 1. Testing A Diode
5 pages
Dell Latitude PDF
No ratings yet
Dell Latitude PDF
8 pages
Logical Fallacies Jeopardy
No ratings yet
Logical Fallacies Jeopardy
29 pages
Materials and Design: Eugen Axinte
No ratings yet
Materials and Design: Eugen Axinte
16 pages
Tuning & Mass Calibration
No ratings yet
Tuning & Mass Calibration
50 pages
q2 Module 1 Science 10 Electromagnetic Waves
No ratings yet
q2 Module 1 Science 10 Electromagnetic Waves
12 pages
Lecture 1-2
No ratings yet
Lecture 1-2
24 pages
7.3 - Pulsed Neutron Logging Tools
No ratings yet
7.3 - Pulsed Neutron Logging Tools
7 pages
Excel VBAMacros
No ratings yet
Excel VBAMacros
86 pages
Aptitude
No ratings yet
Aptitude
271 pages
Aisl P 2
No ratings yet
Aisl P 2
52 pages
SPECIFICATIONS Cable 70mm 4core Cable
100% (2)
SPECIFICATIONS Cable 70mm 4core Cable
2 pages
GCE in Chemistry Course Planner
No ratings yet
GCE in Chemistry Course Planner
35 pages
w2 q1 Math2 Matatag DLL
No ratings yet
w2 q1 Math2 Matatag DLL
12 pages
Important MCQ - Materials Science
No ratings yet
Important MCQ - Materials Science
13 pages
Cost Accounting Final Exam Reviewer
No ratings yet
Cost Accounting Final Exam Reviewer
9 pages
R WW Drwa WChye 8 Co HVF JQ 9 K
No ratings yet
R WW Drwa WChye 8 Co HVF JQ 9 K
17 pages
Introduction To In-Vehicle Networking
No ratings yet
Introduction To In-Vehicle Networking
21 pages