Time Series
Time Series
1. Data Ingesition
2. EDA of the Data
3. processing of Data
4. Model Building
5. Model Evalution
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import warnings
warnings.filterwarnings('ignore')
df=pd.read_csv('TSLA.csv')
df
df.isnull().sum()
Date 0
Open 0
High 0
Low 0
Close 0
Volume 0
Dividends 0
Stock Splits 0
dtype: int64
df = df[['Date','Close']]
df
Date Close
0 2023-01-01 102.375100
1 2023-01-02 103.268399
2 2023-01-03 104.661726
3 2023-01-04 107.514532
4 2023-01-05 109.147197
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 365 non-null object
1 Close 365 non-null float64
dtypes: float64(1), object(1)
memory usage: 5.8+ KB
df["Date"]=pd.to_datetime(df.Date)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 365 non-null datetime64[ns]
1 Close 365 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.8 KB
stock_df=df.set_index("Date")
stock_df
Close
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
... ...
2023-12-27 274.681922
2023-12-28 275.070082
2023-12-29 277.099232
2023-12-30 277.716507
2023-12-31 277.682775
stock_df.describe()
Close
count 365.000000
mean 199.661626
std 51.101389
min 102.375100
25% 147.327615
50% 205.663111
75% 238.942848
max 277.716507
stock_df.head()
Close
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
plt.plot(stock_df)
plt.show()
plt.hist(stock_df)
plt.hist(stock_df)
plt.show()
sns.distplot(stock_df)
plt.show()
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.show()
# plotting close price
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.show()
# Labeling
plt.xlabel('Dates', fontsize=20)
plt.xticks(fontsize=15)
plt.ylabel('Close Prices', fontsize=20)
plt.yticks(fontsize=15)
# Title
plt.title('Tesla Stock Closing Price Distribution', fontsize=30)
plt.grid(True)
plt.show()
stock_df["Close"]
Close
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
... ...
2023-12-27 274.681922
2023-12-28 275.070082
2023-12-29 277.099232
2023-12-30 277.716507
2023-12-31 277.682775
dtype: float64
Date
2023-01-01 NaN
2023-01-02 NaN
2023-01-03 NaN
2023-01-04 NaN
2023-01-05 NaN
... ...
2023-12-27 254.024091
2023-12-28 254.389435
2023-12-29 254.769750
2023-12-30 255.152673
2023-12-31 255.535278
dtype: float64
Close
Date
2023-01-01 NaN
2023-01-02 NaN
2023-01-03 NaN
2023-01-04 NaN
2023-01-05 NaN
... ...
2023-12-27 14.628715
2023-12-28 14.602063
2023-12-29 14.594201
2023-12-30 14.588376
2023-12-31 14.572035
dtype: float64
plt.plot(stock_df.Close)
plt.plot(rolemean)
plt.plot(rolestd)
[<matplotlib.lines.Line2D at 0x7da8e8eca110>]
from statsmodels.tsa.stattools import adfuller
adft=adfuller(stock_df['Close'])
p value 0.335269
lag 0.000000
dtype: float64
# p>0.05
# accept null hypothesis
def test_stationarity(timeseries):
# Determining rolling statistics
rolmean = timeseries.rolling(48).mean() # rolling mean
rolstd = timeseries.rolling(48).std() # rolling standard deviation
test_stationarity(stock_df.Close)
<Axes: ylabel='Close'>
result.seasonal
seasonal
Date
2023-01-01 -0.049962
2023-01-02 0.098094
2023-01-03 -0.012132
2023-01-04 0.071651
2023-01-05 0.282969
... ...
2023-12-27 -0.049962
2023-12-28 0.098094
2023-12-29 -0.012132
2023-12-30 0.071651
2023-12-31 0.282969
dtype: float64
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
... ...
2023-12-27 274.681922
2023-12-28 275.070082
2023-12-29 277.099232
2023-12-30 277.716507
2023-12-31 277.682775
dtype: float64
df_close
Close
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
... ...
2023-12-27 274.681922
2023-12-28 275.070082
2023-12-29 277.099232
2023-12-30 277.716507
2023-12-31 277.682775
dtype: float64
df_close=df_close.diff()
df_close=df_close.dropna()
test_stationarity(df_close)
Results of Dickey-Fuller Test:
Test Statistic -5.281090
p-value 0.000006
#Lags Used 7.000000
Number of Observations Used 356.000000
Critical Value (1%) -3.448853
Critical Value (5%) -2.869693
Critical Value (10%) -2.571114
dtype: float64
Close
Date
2023-01-02 0.893299
2023-01-03 1.393326
2023-01-04 2.852806
2023-01-05 1.632665
2023-01-06 0.547885
... ...
2023-10-28 -0.800922
2023-10-29 2.113884
2023-10-30 0.623045
2023-10-31 -0.615555
2023-11-01 1.324551
dtype: float64
df_close[-60:]#testing data
Close
Date
2023-11-02 -0.166372
2023-11-03 -1.044071
2023-11-04 -0.820504
2023-11-05 1.426642
2023-11-06 0.892228
2023-11-07 -0.178966
2023-11-08 1.583613
2023-11-09 -0.351853
2023-11-10 -0.247485
2023-11-10 -0.247485
2023-11-11 -0.109094
2023-11-12 0.598929
2023-11-13 0.740941
2023-11-14 0.324071
2023-11-15 0.351657
2023-11-16 0.286591
2023-11-17 -0.604990
2023-11-18 0.541170
2023-11-19 0.030313
2023-11-20 -0.329250
2023-11-21 -0.608250
2023-11-22 0.342926
2023-11-23 0.701763
2023-11-24 1.912550
2023-11-25 0.010383
2023-11-26 1.710726
2023-11-27 1.372687
2023-11-28 -0.686699
2023-11-29 0.954674
2023-11-30 -0.466640
2023-12-01 -2.042970
2023-12-02 0.638001
2023-12-03 -1.164504
2023-12-04 1.029772
2023-12-05 0.085211
2023-12-06 1.994400
2023-12-07 2.125879
2023-12-08 -0.086252
2023-12-09 -0.577820
2023-12-10 -0.268866
2023-12-11 -0.369897
2023-12-12 -0.074961
2023-12-13 0.421395
2023-12-14 0.703201
2023-12-15 1.114858
2023-12-16 0.921257
2023-12-17 -0.380721
2023-12-18 -1.068875
2023-12-19 1.768739
2023-12-20 0.322271
2023-12-21 -0.512357
2023-12-22 0.072897
2023-12-23 -1.323132
2023-12-24 0.106720
2023-12-25 -0.330315
2023-12-26 1.021908
2023-12-27 1.472730
2023-12-28 0.388160
2023-12-29 2.029151
2023-12-30 0.617275
2023-12-31 -0.033732
dtype: float64
<matplotlib.legend.Legend at 0x7da92d05a890>
Close
Date
2023-01-01 102.375100
2023-01-02 103.268399
2023-01-03 104.661726
2023-01-04 107.514532
2023-01-05 109.147197
... ...
2023-12-27 274.681922
2023-12-28 275.070082
2023-12-29 277.099232
2023-12-30 277.716507
2023-12-31 277.682775
dtype: float64
365-60
305
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
[0.8932991355088831,
1.3933263239742928,
2.852806278873757,
1.6326649420308286,
0.5478846329143892,
0.7696058342437482,
0.9625287430472156,
0.018231373084361735,
1.1077005040734065,
0.7497050402641605,
1.972947678168694,
0.40951767041809717,
1.1636240679130196,
1.2304959594809617,
0.18371201236193713,
2.4325497305482457,
-0.26391312202788697,
1.442625668491928,
-0.5318829334557194,
-2.413729246967847,
1.6712015109161484,
0.9317498913066089,
0.0698322548046093,
2.807609973898053,
-1.0265658609344541,
0.43923686869243284,
0.5207027332374281,
1.6977267356932941,
2.4579251484588553,
0.3907230052264481,
1.0285667069413762,
-0.4168248436978672,
-1.31013559865616,
-0.06614521282753572,
0.7182963135563796,
1.4772153494880627,
1.995977100207341,
-0.2607738551311627,
0.18751835521305793,
-0.1470745941979601,
-0.9302395758612363,
-1.175458451390483,
2.2248009695441056,
-0.02044980691758269,
0.19315671831344616,
-0.6671134180505476,
1.1357477139460173,
-0.8821431823303101,
0.35407616995422586,
-0.8295849273184785,
0.9959651825627134,
-0.06403624432709876,
-0.1254060218069526,
0.20201766368549556,
0.8513926357980495,
0.44520452621046047,
0.3740700567433919,
0.532466961761088,
-0.12477596304930216,
-0.01570540903489359,
0.1639978537589286,
-0.7857140190270115,
-1.2328669657189266,
1.1792026695864877,
-0.017910958370322305,
-1.4507965353131453,
1.5357081015208678,
-0.61471958905355,
0.41149953226522484,
1.0698880810996627,
0.8176723681895055,
1.6438973978009699,
-0.5466558718802901,
0.5599350920078052,
-0.17443936153424033,
-0.5136371255328243,
0.187439400478155,
0.5121577171133822,
0.4071279719513825,
-0.760302556354219,
1.2999894672940684,
0.667526643148932,
-0.7276142970002581,
1.912569456755989,
2.027267021302805,
2.3918575744206407,
-0.13105016392063362,
-0.17900711779094536,
1.4184955199386025,
0.38286385364656894,
1.3036863601885216,
0.5610416187842588,
1.835680127192603,
0.8274719712972285,
0.962973437639846,
0.43310279806178187,
2.0598256806272843,
0.8728259721148106,
1.1014355498728605,
2.7299102840982528,
-1.5219601321712162,
-0.5815011359226219,
1.236853495737762,
-0.2699754161442627,
1.9296890641726065,
0.6184725435801113,
-0.29059930396928735,
2.217915352321569,
2.415112577533364,
2.2554566313530415,
1.5617323934664569,
-0.9518744601043636,
2.5516783572660984,
0.41447323494159605,
1.4482892620561643,
1.3321046160507137,
0.23776838206009643,
1.2299132591679154,
1.5062757615648366,
0.7259905772023387,
-0.14271730621246093,
0.5038740865232683,
1.8941510595801674,
-0.20365688303775187,
0.04264250908897793,
0.3792145940770979,
2.384819429402455,
1.2267840907422567,
1.0470496207943256,
-0.9197373312707668,
1.105424410204506,
-0.12503909465468155,
0.4347971674349935,
-0.00047127816949910084,
1.4605530632271666,
0.8567678108843779,
0.4095450043537312,
0.9541648088898853,
-0.9846694681493204,
-1.0661810820396056,
1.0496226968534188,
0.6124782628608898,
1.3059418277300097,
3.0336473041813576,
1.6702186490335293,
-0.47266247272744977,
1.2718941508587136,
-1.0736219471799018,
0.56404071487745,
0.050955787937795094,
2.648999286346992,
-0.3239580264257995,
-0.4083176648713902,
0.736210905074671,
-0.47007975917725275,
1.2075272192262787,
-0.4642814851631556,
-0.648229305801209,
0.42947268677431794,
-0.21222472933988,
2.547204410309206,
1.4523247896722182,
0.14151962258091544,
0.021271593676090106,
0.2571075913099321,
0.41571940289978215,
-0.7933694966874612,
0.8053587181372848,
1.0925945456034185,
1.7128518843369989,
0.7429764591254013,
1.1948999450876556,
-0.12492871305823883,
-0.3450670390670041,
0.7897461900880103,
-0.1497014015942284,
0.08206034496333814,
-0.01971566829175231,
0.138933152095575,
0.4939003011037073,
-0.5239700280359045,
-1.155146324585104,
-0.5870156275400689,
0.686234065534677,
-1.335096902104965,
-0.445184759171525,
0.7279536732392273,
-0.5440324664817808,
2.058874594525122,
-0.27255959791790474,
-0.2724127433110368,
0.8499787304334347,
-0.7095231836120774,
1.1477017576514754,
0.40817688196634094,
1.3651581417331329,
1.3606390307013214,
2.229787485292121,
1.8274741977712665,
0.26484370559887793,
0.5955447688372146,
1.5488637598278103,
1.334956406980524,
1.048843017829114,
-1.0411073790201328,
0.12284371446423847,
-0.3749746318142684,
1.3140512763593222,
0.3287323868915166,
1.1016466869869816,
0.5882608524204045,
1.3703320827427206,
-0.1815827758780415,
0.2385799079773676,
-0.47976109312293147,
0.6954842758049438,
0.6059251624713227,
3.2347373527486525,
0.33844309895349056,
-0.303456523336763,
-0.002355769474434055,
-0.04602240039588423,
1.1438055364243667,
-1.6396435925654487,
1.2086371235202478,
0.5362330707300487,
0.9004626050362674,
-0.7666321295496061,
1.1056824150820432,
-1.0717533413471472,
-0.35213855788359183,
0.10624865407280026,
0.7082703765054248,
-0.3099064479051208,
0.9823418343268884,
2.307936411124075,
-2.0564375154723677,
1.2698091022463132,
0.8851898436832926,
0.24098021966440797,
-0.08847856271148657,
0.2326345171115065,
0.30425645941497237,
0.004451985076087794,
-1.0164009236605978,
1.7421066978022566,
1.9739693751788252,
-0.38503585552368236,
-1.0979274424854566,
0.9869216436693193,
-0.021929141850222322,
0.6957551437645861,
0.10236768419537157,
1.0597859114010078,
1.3301609680348747,
-0.2621673797302151,
-1.0454573238503997,
-1.2060857036777577,
1.3796231147639162,
-0.9547226304205481,
0.27181288446479357,
0.3146558113507183,
-0.013781623356692307,
-1.435210642150338,
0.5538345451957127,
1.220740058501633,
0.6369303829316095,
0.25667955803731957,
0.4518904213170458,
0.8895449833110263,
-2.5488895093606345,
2.8701273263094436,
0.8968927087922793,
-0.08302734093248887,
-0.12824623694370985,
1.1801893787638278,
0.7215067046920751,
-2.1639138156295985,
2.479478937589903,
0.7481824800907759,
1.2967698907965826,
-0.12351585784816166,
1.714674382931122,
1.537481772986979,
0.9174169024166474,
-1.1684285505278353,
2.4679278706622654,
0.9080225036720435,
1.7723986293189284,
-0.24381132693795848,
-0.014754413257094257,
2.781816436072006,
-0.2887306275033268,
-0.20006864541986147,
2.056440550381069,
0.5849053174142966,
1.3754351704423016,
-0.32557474963380173,
1.0517465564922759,
-0.8009215689510256,
2.113884101388976,
0.6230446831372092,
-0.6155548195604865,
1.3245506334627066]
SARIMAX Results
Dep. Variable: y No. Observations: 304
- 304
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
len(history)
304
model.forecast()
array([0.56016095])
mean_squared_error([test_data[0]],model.forecast())
0.527849948848813
np.sqrt(mean_squared_error([test_data[0]],model.forecast()))
0.7265328270964864
# evaluate different combinations of p, d and q values for an ARIMA model to get the best order for ARIMA Model
def evaluate_models(dataset, test, p_values, d_values, q_values):
dataset=dataset.astype('float32')
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
rmse=train_arima_model(dataset, test, order)
if rmse<best_score:
best_score, best_cfg = rmse, order
print('ARIMA%s RMSE=%.3f' % (order,rmse))
except:
continue
print('Best ARIMA%s RMSE=%.3f'%(best_cfg,best_score))
p_values=range(0,3)
d_values=range(0,3)
q_values=range(0,3)
evaluate_models(train_data,test_data,p_values,d_values,q_values)
ARIMA(0, 0, 0) RMSE=0.932
ARIMA(0, 0, 1) RMSE=0.940
ARIMA(0, 0, 2) RMSE=0.940
ARIMA(0, 1, 0) RMSE=1.237
ARIMA(0, 1, 1) RMSE=0.933
ARIMA(0, 1, 2) RMSE=0.958
ARIMA(0, 2, 0) RMSE=2.140
ARIMA(0, 2, 1) RMSE=1.239
ARIMA(0, 2, 2) RMSE=0.938
ARIMA(1, 0, 0) RMSE=0.941
ARIMA(1, 0, 1) RMSE=0.941
ARIMA(1, 0, 2) RMSE=0.953
ARIMA(1, 1, 0) RMSE=1.097
ARIMA(1, 1, 1) RMSE=0.955
ARIMA(1, 1, 2) RMSE=0.968
ARIMA(1, 2, 0) RMSE=1.604
ARIMA(1, 2, 1) RMSE=1.098
ARIMA(1, 2, 2) RMSE=0.959
ARIMA(2, 0, 0) RMSE=0.940
ARIMA(2, 0, 1) RMSE=0.953
ARIMA(2, 0, 2) RMSE=0.913
ARIMA(2, 1, 0) RMSE=1.045
ARIMA(2, 1, 1) RMSE=0.960
ARIMA(2, 1, 2) RMSE=0.957
ARIMA(2, 2, 0) RMSE=1.303
ARIMA(2, 2, 1) RMSE=1.047
ARIMA(2, 2, 2) RMSE=0.965
Best ARIMA(2, 0, 2) RMSE=0.913
my RMSE 0.9404476463697088
plt.figure(figsize=(18,8))
plt.grid(True)
plt.plot(range(len(test_data)), test_data,label='True Test Close Value',linewidth = 5)
plt.plot(range(len(predictions)), predictions, label = 'Predictions on test data', linewidth = 5)
plt.xticks(fontsize = 15)
plt.xticks(fontsize = 15)
plt.legend(fontsize = 20, shadow=True, facecolor='lightpink', edgecolor = 'k')
plt.show()
fc_series=pd.Series(predictions,index=test_data.index)
#plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train_data, label='Training', color = 'blue')
plt.plot(test_data, label='Test', color = 'green', linewidth = 3)
plt.plot(fc_series, label='Forecast', color = 'red')
plt.title('Forecast vs Actuals on test data')
plt.legend(loc='upper left', fontsize=8)
plt.show
matplotlib.pyplot.show
def show(*args, **kwargs)
Parameters
----------
block : bool, optional
Whether to wait for all figures to be closed before returning.
If `True` block and run the GUI main loop until all figure windows
are closed.
If `False` ensure that all figure windows are displayed and return
immediately. In this case, you are responsible for ensuring
that the event loop is running to have responsive figures.
See Also
--------
ion : Enable interactive mode, which shows / updates the figure after
every plotting command, so that calling ``show()`` is not necessary.
ioff : Disable interactive mode.
savefig : Save the figure to an image file instead of showing it on screen.
Notes
-----
**Saving figures to file and showing a window at the same time**
plt.figure(figsize=(18,8))
plt.grid(True)
plt.plot(range(len(test_data)), test_data,label='True Test Close Value',linewidth = 5)
plt.plot(range(len(predictions)), predictions, label = 'Predictions on test data', linewidth = 5)
plt.xticks(fontsize = 15)
plt.xticks(fontsize = 15)
plt.legend(fontsize = 20, shadow=True, facecolor='lightpink', edgecolor = 'k')
plt.show()
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js