#Data From Kaggle: June 1, 2024

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

rnn

June 1, 2024

[ ]: ! kaggle datasets download -d robikscube/hourly-energy-consumption #data from␣


↪kaggle

Dataset URL: https://www.kaggle.com/datasets/robikscube/hourly-energy-


consumption
License(s): CC0-1.0
Downloading hourly-energy-consumption.zip to /content
44% 5.00M/11.4M [00:00<00:00, 50.0MB/s]
100% 11.4M/11.4M [00:00<00:00, 88.4MB/s]

[ ]: !unzip "/content/hourly-energy-consumption.zip"

Archive: /content/hourly-energy-consumption.zip
inflating: AEP_hourly.csv
inflating: COMED_hourly.csv
inflating: DAYTON_hourly.csv
inflating: DEOK_hourly.csv
inflating: DOM_hourly.csv
inflating: DUQ_hourly.csv
inflating: EKPC_hourly.csv
inflating: FE_hourly.csv
inflating: NI_hourly.csv
inflating: PJME_hourly.csv
inflating: PJMW_hourly.csv
inflating: PJM_Load_hourly.csv
inflating: est_hourly.paruqet
inflating: pjm_hourly_est.csv

[ ]: import sys
import copy
import torch
import torch.nn as nn

[ ]: #creating sliding widow to convert time series into the supervised learning
def sliding_window(ts,features): #features: number of elements in a sliding␣
↪window

x=[]
y=[]

1
for i in range(features,len(ts)):
x.append(ts[i-features:i])
y.append([ts[features]])
return np.array(x),np.array(y)

[ ]: from sklearn.preprocessing import MinMaxScaler


def scale(ts):
global scaler
scaler=MinMaxScaler()
scaled_ts=scaler.fit_transform([ts])
return scaled_ts[0]

[ ]: from sklearn.model_selection import train_test_split


def get_train_val_test_data(ts,features,test_size):

#scaling the time series

x,y=sliding_window(ts,features)

x_temp,x_test,y_temp,y_test=x[0:-test_size],x[-test_size:],y[0:
↪-test_size],y[-test_size:]
train_size=round((len(ts)-test_size)*0.6)
x_train,x_val,y_train,y_val=x_temp[0:train_size],x_temp[train_size:],y_temp[0:
↪train_size],y_temp[train_size:]

#converting to tensor
x_train=torch.tensor(x_train,dtype=torch.float32)
x_val=torch.tensor(x_val,dtype=torch.float32)
x_test=torch.tensor(x_test,dtype=torch.float32)
y_train=torch.tensor(y_train,dtype=torch.float32)
y_val=torch.tensor(y_val,dtype=torch.float32)
y_test=torch.tensor(y_test,dtype=torch.float32)
return x_train,x_val,x_test,y_train,y_val,y_test

[ ]: class Rnn(nn.Module):
def __init__(self,in_size,hidden_size,out_size,num_layers):
"""
in_size: Number of features of your input vector
hidden_size: Number of hidden neurons
out_size: Number of features of your output vector
num_layers: Number of layers in model
"""
super().__init__() #calling parent class constructor
self.num_layers=num_layers
self.hidden_size=hidden_size
#creating recurrent neural network

2
self.rnn=nn.RNN(input_size=in_size,hidden_size=hidden_size,batch_first=True)
#creating fully connected layer
self.fc=nn.Linear(hidden_size,out_size)

def forward(self,x,h=None):
out,h=self.rnn(x,h)
out = out.contiguous().view(-1, self.hidden_size)
out=self.fc(out)
return out,h

[ ]: import pandas as pd
import numpy as np
raw_df=pd.read_csv("/content/AEP_hourly.csv",parse_dates=True)
raw_df

[ ]: Datetime AEP_MW
0 2004-12-31 01:00:00 13478.0
1 2004-12-31 02:00:00 12865.0
2 2004-12-31 03:00:00 12577.0
3 2004-12-31 04:00:00 12517.0
4 2004-12-31 05:00:00 12670.0
… … …
121268 2018-01-01 20:00:00 21089.0
121269 2018-01-01 21:00:00 20999.0
121270 2018-01-01 22:00:00 20820.0
121271 2018-01-01 23:00:00 20415.0
121272 2018-01-02 00:00:00 19993.0

[121273 rows x 2 columns]

[ ]: raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121273 entries, 0 to 121272
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Datetime 121273 non-null object
1 AEP_MW 121273 non-null float64
dtypes: float64(1), object(1)
memory usage: 1.9+ MB

[ ]: raw_df.describe()

[ ]: AEP_MW
count 121273.000000
mean 15499.513717

3
std 2591.399065
min 9581.000000
25% 13630.000000
50% 15310.000000
75% 17200.000000
max 25695.000000

[ ]: import matplotlib.pyplot as plt


plt.plot(raw_df["AEP_MW"][0:1000])

[ ]: [<matplotlib.lines.Line2D at 0x7e1784aba650>]

[181]: features=500
test_size=30000
x_train,x_val,x_test,y_train,y_val,y_test=get_train_val_test_data(raw_df["AEP_MW"],features,te

[182]: x_train.shape

[182]: torch.Size([54764, 500])

[183]: x_val.shape

4
[183]: torch.Size([36009, 500])

[184]: x_test.shape

[184]: torch.Size([30000, 500])

[185]: #Training the model


model=Rnn(in_size=500,hidden_size=50,out_size=1,num_layers=2)
model.train()

[185]: Rnn(
(rnn): RNN(500, 50, batch_first=True)
(fc): Linear(in_features=50, out_features=1, bias=True)
)

[192]: from torch.optim import Adam


optimizer=Adam(params=model.parameters(),lr=0.01)
best_model=None
training_loss=[]
validation_loss=[]
min_val_loss=sys.maxsize
epochs=150
h=None
for i in range(epochs):
output,h=model(x_train)
loss=nn.L1Loss()(output,y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
training_loss.append(loss.item())

#validating the model


output,h=model(x_val)
val_loss=nn.L1Loss()(output,y_val)
validation_loss.append(val_loss.item())
if min_val_loss>val_loss.item():
best_model=copy.deepcopy(model)
min_val_loss=val_loss.item()

[195]: # examine the training process


plt.title("Training")
plt.plot(training_loss,label="training")
plt.plot(validation_loss,label="validation")
plt.xlabel("epoches")
plt.ylabel("Mean Absolute Error")
plt.legend()

5
[195]: <matplotlib.legend.Legend at 0x7e1779877e50>

[194]: training_loss

[194]: [16887.40625,
16886.892578125,
16886.388671875,
16885.873046875,
16885.361328125,
16884.857421875,
16884.341796875,
16883.828125,
16883.330078125,
16882.814453125,
16882.3125,
16881.796875,
16881.283203125,
16880.779296875,
16880.265625,
16879.751953125,

6
16879.25,
16878.736328125,
16878.220703125,
16877.720703125,
16877.205078125,
16876.69140625,
16876.1875,
16875.673828125,
16875.16015625,
16874.654296875,
16874.140625,
16873.640625,
16873.126953125,
16872.611328125,
16872.111328125,
16871.595703125,
16871.08203125,
16870.578125,
16870.064453125,
16869.5625,
16869.048828125,
16868.53515625,
16868.033203125,
16867.517578125,
16867.00390625,
16866.5,
16865.986328125,
16865.47265625,
16864.96875,
16864.455078125,
16863.939453125,
16863.439453125,
16862.921875,
16862.412109375,
16861.91015625,
16861.39453125,
16860.890625,
16860.376953125,
16859.86328125,
16859.359375,
16858.845703125,
16858.330078125,
16857.830078125,
16857.314453125,
16856.814453125,
16856.298828125,
16855.78515625,

7
16855.28125,
16854.767578125,
16854.251953125,
16853.75,
16853.236328125,
16852.720703125,
16852.220703125,
16851.70703125,
16851.19140625,
16850.689453125,
16850.17578125,
16849.662109375,
16849.158203125,
16848.64453125,
16848.14453125,
16847.625,
16847.115234375,
16846.611328125,
16846.09765625,
16845.583984375,
16845.080078125,
16844.56640625,
16844.0625,
16843.548828125,
16843.03515625,
16842.53515625,
16842.017578125,
16841.505859375,
16841.00390625,
16840.48828125,
16839.974609375,
16839.470703125,
16838.955078125,
16838.44140625,
16837.9375,
16837.42578125,
16836.91015625,
16836.41015625,
16835.896484375,
16835.39453125,
16834.87890625,
16834.365234375,
16833.861328125,
16833.34765625,
16832.833984375,
16832.330078125,
16831.8203125,

8
16831.31640625,
16830.80078125,
16830.287109375,
16829.78515625,
16829.26953125,
16828.755859375,
16828.251953125,
16827.73828125,
16827.224609375,
16826.720703125,
16826.208984375,
16825.693359375,
16825.193359375,
16824.677734375,
16824.162109375,
16823.66015625,
16823.146484375,
16822.64453125,
16822.126953125,
16821.61328125,
16821.11328125,
16820.599609375,
16820.083984375,
16819.583984375,
16819.068359375,
16818.564453125,
16818.05078125,
16817.537109375,
16817.03515625,
16816.517578125,
16816.005859375,
16815.50390625,
16814.990234375,
16814.4765625,
16813.97265625,
16813.458984375,
16812.943359375,
16812.439453125,
16811.92578125,
16811.412109375]

[ ]: #evaluating the model


best_model.eval()
_,h_list=best_model(x_val)
h=(h_list[-1,:]).unsqueeze(-2)
predicted=[]
for i in x_test.tolist():

9
x=torch.tensor(data=[i],dtype=torch.float32)
y,h_list=best_model(x,h)
predicted.append(y.item())

10

You might also like