Amazon ARMA model
Loading and Visualizing Data
# Loads the Amazon stock data and set its frequency
import pandas as pd
df = pd.read_csv("AMZN.csv", parse_dates=['Date'], index_col='Date')
df = df.asfreq('D').fillna(method='ffill')
closing_prices = df['Close']
# Visualizing the closing prices over time using matplotlib.
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.plot(closing_prices, label='Closing Prices')
plt.title('Amazon Stock Closing Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()
Splitting Data
train_size = int(len(closing_prices) * 0.8)
train, test = closing_prices[:train_size], closing_prices[train_size:]
Time Series Analysis
# Plot ACF and PACF of the original time series to see if the data is stationary
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
acf_original = plot_acf(train)
pacf_original = plot_pacf(train)
# Perform Augmented Dickey-Fuller test for stationarity
from statsmodels.tsa.stattools import adfuller
adf_test = adfuller(train)
print(f'p-value: {adf_test[1]}')
p-value: 0.6863921026685542
Making Data stationary
# Differencing to make the series stationary
train_diff = train.diff().dropna()
# Visualizing the differenced data
plt.figure(figsize=(12, 6))
plt.plot(train_diff, label='1st Order Differenced Closing Prices')
plt.title('1st Order Differenced Amazon Stock Closing Prices')
plt.xlabel('Date')
plt.ylabel('Differenced Closing Price')
plt.legend()
plt.show()
# Again using ACF and Pacf to check its stationarity
acf_diff = plot_acf(train_diff)
pacf_diff = plot_pacf(train_diff)
# Perform Augmented Dickey-Fuller test for stationarity
adf_test_diff = adfuller(train_diff)
print(f'p-value: {adf_test_diff[1]}')
p-value: 0.0
Auto ARMA Model
from pmdarima import auto_arima
# Use auto_arima to find the best model
model = auto_arima(train_diff, suppress_warnings=True, seasonal=False)
# Get the best order
best_order = model.get_params()['order']
print("Best Order:", best_order)
Best Order: (2, 0, 2)
ARMA Model Fitting
import itertools
from statsmodels.tsa.arima.model import ARIMA
# Fit ARMA model with the best order
p, q = 2, 2
model = ARIMA(train_diff, order=(p, 0, q))
results = model.fit()
# Check AIC and BIC values
aic = results.aic
bic = results.bic
print(f'AIC: {aic}')
print(f'BIC: {bic}')
AIC: 6244.2862338379855
BIC: 6275.995159313397
# Check the residuals for randomness and stationarity
residuals = results.resid
plt.figure(figsize=(12, 6))
plt.plot(residuals)
plt.title('Residuals of ARMA Model')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.show()
Forecasting and Evaluation
# Forecast on the test values
forecast_steps = len(test)
forecast = results.forecast(steps=forecast_steps)
# Plot actual vs predicted values for the test set
plt.figure(figsize=(12, 6))
plt.plot(test.index, test.values, label='Actual')
plt.plot(test.index, forecast, label='Predicted')
plt.title('ARIMA Model - Actual vs Predicted on Test Set')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()
MSE and MAE
# Calculate Mean Squared Error (MSE) and Mean Absolute Error (MAE)
from sklearn.metrics import mean_squared_error, mean_absolute_error
mse = mean_squared_error(test, forecast)
mae = mean_absolute_error(test, forecast)
print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
Mean Squared Error (MSE): 14503.066111204844
Mean Absolute Error (MAE): 118.31058324851284
Forecasting the closing value
# Predict the closing value of the next day
last_observation = closing_prices.index[-1]
next_day = last_observation + pd.Timedelta(days=1)
predicted_next_day = results.forecast(steps=1)[0]
print(f'Predicted Closing Value for {next_day.date()}: {predicted_next_day}')
Predicted Closing Value for 2023-02-18: 0.3408541885702173
PSD
import numpy as np
from scipy.fft import fft
import matplotlib.pyplot as plt
closing_prices = df['Close'].values # Convert pandas Series to NumPy array
# Compute the FFT
fft_values = fft(closing_prices)
# Compute the Power Spectral Density (PSD)
psd_values = np.abs(fft_values) ** 2
# Plot the PSD
plt.figure(figsize=(12, 6))
plt.plot(psd_values)
plt.title('Power Spectral Density (PSD)')
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
# it suggests that the data may not exhibit strong periodic behavior or
significant frequency components.
Spectrogram
from scipy.signal import spectrogram
closing_prices = df['Close']
# Compute the spectrogram
frequencies, times, spectrogram_values = spectrogram(closing_prices)
print("times shape:", times.shape)
print("frequencies shape:", frequencies.shape)
print("spectrogram_values.T shape:", spectrogram_values.T.shape)
times shape: (8,)
frequencies shape: (129,)
spectrogram_values.T shape: (8, 129)
# Plot the spectrogram
plt.figure(figsize=(12, 6))
plt.pcolormesh(times, frequencies, np.log(spectrogram_values + 1e-10),
shading='auto')
plt.title('Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(label='Log Power')
plt.show()
AMAZON neural network
Import Libraries and Load Data
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
Load and Preprocess Data
# Load the data
data = pd.read_csv('AMZN.csv')
closing_prices = data['Close'].values.reshape(-1, 1)
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
closing_prices_normalized = scaler.fit_transform(closing_prices)
# Create a function to prepare the data for the LSTM model
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset) - look_back):
a = dataset[i:(i + look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
Create LSTM Dataset
# Define the look-back period
look_back = 10
# Prepare the dataset
X, Y = create_dataset(closing_prices_normalized, look_back)
Split Data into Training and Testing Sets
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
Reshape Data for LSTM Model
# Reshape input to be [samples, time steps, features] for LSTM model
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
Build and Train the LSTM Model
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, input_shape=(look_back, 1)))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(X_train, Y_train, epochs=50, batch_size=32, verbose=2)
Epoch 1/50
32/32 - 3s - loss: 0.0684 - 3s/epoch - 88ms/step
Epoch 2/50
32/32 - 0s - loss: 0.0053 - 268ms/epoch - 8ms/step
Epoch 3/50
32/32 - 0s - loss: 0.0023 - 277ms/epoch - 9ms/step
Epoch 4/50
32/32 - 0s - loss: 0.0021 - 266ms/epoch - 8ms/step
Epoch 5/50
32/32 - 0s - loss: 0.0021 - 267ms/epoch - 8ms/step
Epoch 6/50
32/32 - 0s - loss: 0.0020 - 267ms/epoch - 8ms/step
Epoch 7/50
32/32 - 0s - loss: 0.0020 - 278ms/epoch - 9ms/step
Epoch 8/50
32/32 - 0s - loss: 0.0020 - 262ms/epoch - 8ms/step
Epoch 9/50
32/32 - 0s - loss: 0.0020 - 277ms/epoch - 9ms/step
Epoch 10/50
32/32 - 0s - loss: 0.0019 - 261ms/epoch - 8ms/step
Epoch 11/50
32/32 - 0s - loss: 0.0019 - 273ms/epoch - 9ms/step
Epoch 12/50
32/32 - 0s - loss: 0.0019 - 267ms/epoch - 8ms/step
Epoch 13/50
32/32 - 0s - loss: 0.0019 - 267ms/epoch - 8ms/step
Epoch 14/50
32/32 - 0s - loss: 0.0019 - 284ms/epoch - 9ms/step
Epoch 15/50
32/32 - 0s - loss: 0.0018 - 314ms/epoch - 10ms/step
Epoch 16/50
32/32 - 0s - loss: 0.0018 - 264ms/epoch - 8ms/step
Epoch 17/50
32/32 - 0s - loss: 0.0017 - 247ms/epoch - 8ms/step
Epoch 18/50
32/32 - 0s - loss: 0.0017 - 281ms/epoch - 9ms/step
Epoch 19/50
32/32 - 0s - loss: 0.0016 - 326ms/epoch - 10ms/step
Epoch 20/50
32/32 - 0s - loss: 0.0016 - 289ms/epoch - 9ms/step
Epoch 21/50
32/32 - 0s - loss: 0.0018 - 285ms/epoch - 9ms/step
Epoch 22/50
32/32 - 0s - loss: 0.0016 - 262ms/epoch - 8ms/step
Epoch 23/50
32/32 - 0s - loss: 0.0015 - 311ms/epoch - 10ms/step
Epoch 24/50
32/32 - 0s - loss: 0.0016 - 263ms/epoch - 8ms/step
Epoch 25/50
32/32 - 0s - loss: 0.0015 - 315ms/epoch - 10ms/step
Epoch 26/50
32/32 - 0s - loss: 0.0014 - 270ms/epoch - 8ms/step
Epoch 27/50
32/32 - 0s - loss: 0.0014 - 270ms/epoch - 8ms/step
Epoch 28/50
32/32 - 0s - loss: 0.0014 - 266ms/epoch - 8ms/step
Epoch 29/50
32/32 - 0s - loss: 0.0014 - 265ms/epoch - 8ms/step
Epoch 30/50
32/32 - 0s - loss: 0.0014 - 280ms/epoch - 9ms/step
Epoch 31/50
32/32 - 0s - loss: 0.0014 - 275ms/epoch - 9ms/step
Epoch 32/50
32/32 - 0s - loss: 0.0013 - 291ms/epoch - 9ms/step
Epoch 33/50
32/32 - 0s - loss: 0.0012 - 292ms/epoch - 9ms/step
Epoch 34/50
32/32 - 0s - loss: 0.0013 - 293ms/epoch - 9ms/step
Epoch 35/50
32/32 - 0s - loss: 0.0013 - 308ms/epoch - 10ms/step
Epoch 36/50
32/32 - 0s - loss: 0.0013 - 293ms/epoch - 9ms/step
Epoch 37/50
32/32 - 0s - loss: 0.0012 - 301ms/epoch - 9ms/step
Epoch 38/50
32/32 - 0s - loss: 0.0013 - 285ms/epoch - 9ms/step
Epoch 39/50
32/32 - 0s - loss: 0.0012 - 287ms/epoch - 9ms/step
Epoch 40/50
32/32 - 0s - loss: 0.0011 - 282ms/epoch - 9ms/step
Epoch 41/50
32/32 - 0s - loss: 0.0012 - 263ms/epoch - 8ms/step
Epoch 42/50
32/32 - 0s - loss: 0.0011 - 254ms/epoch - 8ms/step
Epoch 43/50
32/32 - 0s - loss: 0.0011 - 260ms/epoch - 8ms/step
Epoch 44/50
32/32 - 0s - loss: 0.0011 - 265ms/epoch - 8ms/step
Epoch 45/50
32/32 - 0s - loss: 0.0010 - 264ms/epoch - 8ms/step
Epoch 46/50
32/32 - 0s - loss: 0.0010 - 267ms/epoch - 8ms/step
Epoch 47/50
32/32 - 0s - loss: 0.0011 - 271ms/epoch - 8ms/step
Epoch 48/50
32/32 - 0s - loss: 0.0010 - 289ms/epoch - 9ms/step
Epoch 49/50
32/32 - 0s - loss: 9.9199e-04 - 257ms/epoch - 8ms/step
Epoch 50/50
32/32 - 0s - loss: 9.7514e-04 - 258ms/epoch - 8ms/step
Out[37]:
<keras.src.callbacks.History at 0x16408a60110>
Evaluate Model Performance on Test Data
# Make predictions on the test data
predictions = model.predict(X_test)
8/8 [==============================] - 1s 4ms/step
# Inverse transform the predictions and actual values to the original scale
predictions = scaler.inverse_transform(predictions)
Y_test_original = scaler.inverse_transform(Y_test.reshape(-1, 1))
# Calculate the root mean squared error (RMSE)
rmse = np.sqrt(mean_squared_error(Y_test_original, predictions))
print('Root Mean Squared Error:', rmse)
Root Mean Squared Error: 3.300441897865308
Visualize Predictions
# Plot the predictions against actual values
plt.plot(Y_test_original, label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()
Predict Next Day's Closing Value
# Prepare the last 'look_back' days of data
last_data = closing_prices[-look_back:]
last_data_normalized = scaler.transform(last_data.reshape(-1, 1))
# Reshape and predict
last_data_reshaped = last_data_normalized.reshape(1, look_back, 1)
next_day_prediction_normalized = model.predict(last_data_reshaped)
1/1 [==============================] - 0s 31ms/step
# Inverse transform to get the predicted closing value
next_day_prediction = scaler.inverse_transform(next_day_prediction_normalized)
print('Predicted Next Day Closing Value:', next_day_prediction[0, 0])
Predicted Next Day Closing Value: 98.60197
J&J ARMA model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf,
plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,
mean_squared_error
Loading and Visualizing Data
# Step 1: Load the CSV file
df = pd.read_csv('jj.csv', parse_dates=['date'],
index_col='date')
# Step 2: Decompose the time series to understand trends,
patterns, and seasonality
result = seasonal_decompose(df['data'], model='additive',
period=4)
result.plot()
plt.show()
Check for Stationarity
# Step 3: Check for stationarity
def check_stationarity(timeseries):
adf_result = adfuller(timeseries, autolag='AIC')
kpss_result = kpss(timeseries)
print('ADF Statistic:', adf_result[0])
print('p-value (ADF):', adf_result[1])
print('Critical Values (ADF):', adf_result[4])
print('\nKPSS Statistic:', kpss_result[0])
print('p-value (KPSS):', kpss_result[1])
print('Critical Values (KPSS):', kpss_result[3])
check_stationarity(df['data'])
ADF Statistic: 2.7420165734574735
p-value (ADF): 1.0
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}
KPSS Statistic: 1.3634938602266964
p-value (KPSS): 0.01
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is smaller than the p-value returned.
kpss_result = kpss(timeseries)
# apply differencing
df['data_diff'] = df['data'] - df['data'].shift(1)
check_stationarity(df['data_diff'].dropna())
ADF Statistic: -0.40740976363804116
p-value (ADF): 0.9088542416911313
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}
KPSS Statistic: 0.22139627096747283
p-value (KPSS): 0.1
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.
kpss_result = kpss(timeseries)
# Drop rows with missing values
df = df.dropna()
Log differencing
# Apply log differencing
df['data_log_diff'] = np.log(df['data']) - np.log(df['data'].shift(1))
# Check stationarity of log-differenced data
check_stationarity(df['data_log_diff'].dropna())
df = df.dropna()
ADF Statistic: -4.210044890698209
p-value (ADF): 0.0006335725359760862
Critical Values (ADF): {'1%': -3.5194805351545413, '5%': -2.9003945086747343,
'10%': -2.5874984279778395}
KPSS Statistic: 0.20166745754525875
p-value (KPSS): 0.1
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.
kpss_result = kpss(timeseries)
ACF and PACF Plots
from statsmodels.tsa.arima.model import ARIMA
# Step 4: ACF and PACF analysis for log-differenced data
plot_acf(df['data_log_diff'].dropna(), lags=20)
plt.show()
plot_pacf(df['data_log_diff'].dropna(), lags=20)
plt.show()
Splitting Data
# Step 5: Split data into training and test sets
train_size = int(len(df) * 0.7)
train, test = df['data_log_diff'][:train_size], df['data_log_diff'][train_size:]
Auto Arima
from pmdarima import auto_arima
# Use auto-arima to find the best model
autoarima_model = auto_arima(train, seasonal=True, suppress_warnings=True)
print(autoarima_model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 57
Model: SARIMAX(3, 0, 2) Log Likelihood 54.168
Date: Sat, 16 Mar 2024 AIC -94.335
Time: 11:24:33 BIC -80.034
Sample: 0 HQIC -88.777
- 57
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.1618 0.025 6.490 0.000 0.113 0.211
ar.L1 -0.9634 0.088 -10.895 0.000 -1.137 -0.790
ar.L2 -0.9728 0.066 -14.730 0.000 -1.102 -0.843
ar.L3 -0.9187 0.036 -25.298 0.000 -0.990 -0.848
ma.L1 0.2672 0.166 1.612 0.107 -0.058 0.592
ma.L2 0.3396 0.154 2.202 0.028 0.037 0.642
sigma2 0.0079 0.002 4.197 0.000 0.004 0.012
=================================================================================
==
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB):
0.06
Prob(Q): 0.93 Prob(JB):
0.97
Heteroskedasticity (H): 1.09 Skew:
0.08
Prob(H) (two-sided): 0.85 Kurtosis:
3.02
=================================================================================
==
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-
step).
Model Fitting to ARMA Model
print(autoarima_model.order)
(3, 0, 2)
# 6. Model Fitting
arima_model = ARIMA(train, order=autoarima_model.order)
arima_fit = arima_model.fit()
# 7. Check residuals
residuals = arima_fit.resid
plt.plot(residuals)
plt.show()
Forecast Accuracy
# 8. Evaluate forecast accuracy
arima_forecast = arima_fit.forecast(steps=len(test))
mae = mean_absolute_error(test, arima_forecast)
mse = mean_squared_error(test, arima_forecast)
rmse = np.sqrt(mse)
print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
MAE: 0.14
MSE: 0.03
RMSE: 0.18
# 9. Plotting predictions on test data
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size:], df['data_log_diff'][train_size:], label='Actual
Log-Differenced')
plt.plot(df.index[train_size:], arima_forecast, label='ARIMA Forecast')
plt.legend()
plt.title('ARIMA Model Forecast on Test Data')
plt.show()
# 10. Forecast next 24 months
forecast_steps = 24
forecast_index = pd.date_range(df.index[-1], periods=forecast_steps + 1, freq='M')[1:]
arima_next_24_forecast = arima_fit.forecast(steps=forecast_steps)
# 11. Plotting forecasted values on log differenced data
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['data_log_diff'], label='Actual Log-Differenced')
plt.plot(forecast_index, arima_next_24_forecast, label='ARIMA Forecast Next 24 Months')
plt.legend()
plt.title('ARIMA Model Forecast for Next 24 Months')
plt.show()
# 10. Forecast next 24 months on actual data
n_forecast = 24
predict = arima_fit.get_prediction(end=arima_model.nobs +
n_forecast)
idx = np.arange(len(predict.predicted_mean))
fig, ax = plt.subplots()
ax.plot(df['data'], 'blue')
ax.plot(idx[-n_forecast:], predict.predicted_mean[-
n_forecast:], 'k--')
ax.set(title='Forecast of Johnson&Johnson Sales')
plt.show()
predictions_int = arima_fit.get_forecast(steps=24)
predictions_int.predicted_mean
57 -0.117143
58 0.126009
59 0.079427
60 0.070450
61 -0.099362
62 0.115915
63 0.081700
64 0.061709
65 -0.083857
66 0.107373
67 0.082908
68 0.054579
69 -0.070298
70 0.100121
71 0.083281
72 0.048791
73 -0.058409
74 0.093943
75 0.083007
76 0.044120
77 -0.047955
78 0.088657
79 0.082240
80 0.040376
Name: predicted_mean, dtype: float64
Power Spectral Density
import numpy as np
from scipy.fft import fft
import matplotlib.pyplot as plt
# Compute the FFT
fft_values = fft(df)
# Compute the Power Spectral Density (PSD)
psd_values = np.abs(fft_values) ** 2
# Plot the periodogram
plt.figure(figsize=(12, 6))
plt.plot(psd_values)
plt.title('Power Spectral Density (Periodogram)')
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
# Plot only the first half of the PSD (due to symmetry)
plt.figure(figsize=(12, 6))
plt.plot(psd_values[:len(df) // 2])
plt.title('Power Spectral Density (Periodogram)')
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
Spectrogram
from scipy.signal import spectrogram
# Compute the spectrogram
frequencies, times, spectrogram_values = spectrogram(df)
print("times shape:", times.shape)
print("frequencies shape:", frequencies.shape)
print("spectrogram_values.T shape:", spectrogram_values.T.shape)
times shape: (1,)
frequencies shape: (2,)
spectrogram_values.T shape: (1, 2, 82)
# Reshape the spectrogram_values to match the dimensions expected by pcolormesh
spectrogram_values_reshaped = spectrogram_values.T.squeeze()
times = np.arange(len(df))
# Plot the spectrogram
plt.figure(figsize=(12, 6))
plt.pcolormesh(times, frequencies, np.log(spectrogram_values_reshaped + 1e-10),
shading='auto')
plt.title('Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(label='Log Power')
plt.show()
jj neural network
Importing Libraries:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import
MinMaxScaler
from sklearn.metrics import
mean_squared_error
from tensorflow.keras.models import
Sequential
from tensorflow.keras.layers import Dense,
LSTM
import matplotlib.pyplot as plt
Load and Preprocess Data:
# Load the data from the CSV file
data = pd.read_csv('jj.csv')
sales_data =
data['data'].values.astype(float).reshape(-1, 1)
# Normalize the data using Min-Max scaling
scaler = MinMaxScaler(feature_range=(0, 1))
sales_data_normalized =
scaler.fit_transform(sales_data)
# Function to create sequences for time series
forecasting
def create_sequences(data, seq_length):
sequences = []
for i in range(len(data) - seq_length):
seq = data[i:i + seq_length]
sequences.append(seq)
return np.array(sequences)
# Set the sequence length
sequence_length = 12
# Create sequences for training
training_sequences =
create_sequences(sales_data_normalized,
sequence_length)
Split Data for Training and Testing:
# Split data into training and testing sets
training_size = int(len(training_sequences) * 0.8)
training_data = training_sequences[:training_size]
testing_data = training_sequences[training_size:]
# Split the data into input (X) and output (y)
X_train, y_train = training_data[:, :-1], training_data[:,
-1]
X_test, y_test = testing_data[:, :-1], testing_data[:, -1]
Prepare Input and Output Data:
# Reshape input to be 3D [samples, timesteps,
features]
X_train = X_train.reshape((X_train.shape[0],
X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0],
X_test.shape[1], 1))
Build the LSTM Model:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1],
1)))
model.add(Dense(1))
model.compile(optimizer='adam',
loss='mean_squared_error')
Train the Model:
# Train the model
model_fit= model.fit(X_train, y_train, epochs=50,
batch_size=32, verbose=2)
Epoch 1/50
2/2 - 0s - loss: 0.0011 - 23ms/epoch - 11ms/step
Epoch 2/50
2/2 - 0s - loss: 0.0011 - 21ms/epoch - 10ms/step
Epoch 3/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 4/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 5/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 6/50
2/2 - 0s - loss: 0.0011 - 33ms/epoch - 16ms/step
Epoch 7/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 8/50
2/2 - 0s - loss: 0.0011 - 22ms/epoch - 11ms/step
Epoch 9/50
2/2 - 0s - loss: 0.0011 - 31ms/epoch - 15ms/step
Epoch 10/50
2/2 - 0s - loss: 0.0011 - 29ms/epoch - 14ms/step
Epoch 11/50
2/2 - 0s - loss: 0.0011 - 21ms/epoch - 10ms/step
Epoch 12/50
2/2 - 0s - loss: 0.0011 - 23ms/epoch - 11ms/step
Epoch 13/50
2/2 - 0s - loss: 0.0011 - 28ms/epoch - 14ms/step
Epoch 14/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 15/50
2/2 - 0s - loss: 0.0011 - 31ms/epoch - 15ms/step
Epoch 16/50
2/2 - 0s - loss: 0.0010 - 29ms/epoch - 14ms/step
Epoch 17/50
2/2 - 0s - loss: 0.0010 - 20ms/epoch - 10ms/step
Epoch 18/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 19/50
2/2 - 0s - loss: 0.0010 - 10ms/epoch - 5ms/step
Epoch 20/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 21/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 22/50
2/2 - 0s - loss: 0.0010 - 38ms/epoch - 19ms/step
Epoch 23/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 24/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 25/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 26/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 27/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 28/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 29/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 30/50
2/2 - 0s - loss: 0.0010 - 23ms/epoch - 11ms/step
Epoch 31/50
2/2 - 0s - loss: 0.0010 - 23ms/epoch - 12ms/step
Epoch 32/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 33/50
2/2 - 0s - loss: 9.9725e-04 - 33ms/epoch - 17ms/step
Epoch 34/50
2/2 - 0s - loss: 9.9585e-04 - 31ms/epoch - 16ms/step
Epoch 35/50
2/2 - 0s - loss: 9.9480e-04 - 16ms/epoch - 8ms/step
Epoch 36/50
2/2 - 0s - loss: 9.8967e-04 - 38ms/epoch - 19ms/step
Epoch 37/50
2/2 - 0s - loss: 9.8387e-04 - 31ms/epoch - 16ms/step
Epoch 38/50
2/2 - 0s - loss: 9.8935e-04 - 16ms/epoch - 8ms/step
Epoch 39/50
2/2 - 0s - loss: 9.7819e-04 - 16ms/epoch - 8ms/step
Epoch 40/50
2/2 - 0s - loss: 9.7617e-04 - 38ms/epoch - 19ms/step
Epoch 41/50
2/2 - 0s - loss: 9.7232e-04 - 16ms/epoch - 8ms/step
Epoch 42/50
2/2 - 0s - loss: 9.7557e-04 - 16ms/epoch - 8ms/step
Epoch 43/50
2/2 - 0s - loss: 9.8442e-04 - 22ms/epoch - 11ms/step
Epoch 44/50
2/2 - 0s - loss: 9.6747e-04 - 31ms/epoch - 16ms/step
Epoch 45/50
2/2 - 0s - loss: 9.6187e-04 - 31ms/epoch - 16ms/step
Epoch 46/50
2/2 - 0s - loss: 9.6344e-04 - 31ms/epoch - 16ms/step
Epoch 47/50
2/2 - 0s - loss: 9.5741e-04 - 22ms/epoch - 11ms/step
Epoch 48/50
2/2 - 0s - loss: 9.5320e-04 - 31ms/epoch - 16ms/step
Epoch 49/50
2/2 - 0s - loss: 9.4968e-04 - 31ms/epoch - 16ms/step
Epoch 50/50
2/2 - 0s - loss: 9.4901e-04 - 22ms/epoch - 11ms/step
Test the Model:
# Test the model
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {test_loss}'
Test Loss: 0.01699160970747471
Make Future Predictions
# Make predictions for the next 24 months
future_predictions = []
current_sequence = list(X_test[-1].flatten())
for i in range(24):
input_sequence = np.array(current_sequence[-(sequence_length-1):]).reshape(1,
sequence_length-1, 1)
predicted_value = model.predict(input_sequence)[0][0]
future_predictions.append(predicted_value)
current_sequence.append(predicted_value)
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 29ms/step
1/1 [==============================] - 0s 33ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 28ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 29ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
Inverse Transform Predictions:
# Inverse transform the predictions to get actual sales values
future_predictions =
scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
Display Predictions:
# Display the predictions for the next 24 months
print(f'Predictions for the next 24 months: {future_predictions}')
Predictions for the next 24 months: [[13.525593 ]
[13.94045 ]
[14.161354 ]
[14.346481 ]
[14.5128765]
[14.775693 ]
[14.8436775]
[14.963552 ]
[15.025752 ]
[15.252812 ]
[15.261148 ]
[15.34032 ]
[15.458272 ]
[15.556854 ]
[15.646535 ]
[15.729873 ]
[15.808032 ]
[15.8783865]
[15.947483 ]
[16.013062 ]
[16.07672 ]
[16.133318 ]
[16.190027 ]
[16.244 ]]
# Plotting training loss
plt.figure(figsize=(10, 6))
plt.plot(model_fit.history['loss'], label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()