Forecast Live Approach1
Forecast Live Approach1
ipynb
import pandas as pd
import numpy as np
import psycopg2
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from sklearn.metrics import mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# --------------------------
# Configuration
# --------------------------
DB_CONFIG = {
'host': 'your_host',
'port': '5432',
'dbname': 'your_db',
'user': 'your_user',
'password': 'your_password'
}
TABLE_NAME = 'public.mrss_1'
# --------------------------
# Helper: Fetch dynamic date range
# --------------------------
def fetch_data(start_date, end_date):
engine = create_engine(f"postgresql://{DB_CONFIG['user']}:
{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/
{DB_CONFIG['dbname']}")
query = f'''
SELECT "timestamp", eaf_1_lf_1, eaf_2_lf_2, eaf_3_lf_3, eaf_4_lf_4
FROM {TABLE_NAME}
WHERE "timestamp" BETWEEN '{start_date}' AND '{end_date}'
'''
df = pd.read_sql(query, engine)
df['timestamp'] = pd.to_datetime(df['timestamp'])
return df
# --------------------------
# Data Aggregation - Approach 1
# --------------------------
def process_data_approach1(df):
df.set_index('timestamp', inplace=True)
df['total_eaf'] =
df[['eaf_1_lf_1','eaf_2_lf_2','eaf_3_lf_3','eaf_4_lf_4']].sum(axis=1)
return df['total_eaf'].resample('15T').mean().dropna()
# --------------------------
# Forecasting: Rolling live 4-hour prediction every 15 min
# --------------------------
def rolling_forecast_dynamic(start_time, interval_minutes=15):
results_arima, results_xgb, results_lstm = [], [], []
for i in range(0, 96):
current_time = start_time + timedelta(minutes=i * interval_minutes)
df_raw = fetch_data(start_date=current_time - timedelta(days=5),
end_date=current_time)
series = process_data_approach1(df_raw)
if len(series) < 30 or current_time not in series.index:
continue
# ARIMA
try:
model_arima = ARIMA(series[series.index < current_time], order=(2, 1,
2))
fit_arima = model_arima.fit()
pred_arima = fit_arima.forecast(steps=1)[0]
results_arima.append({'timestamp': current_time, 'actual': actual,
'predicted': pred_arima})
except:
continue
# XGBoost
df_feat = series[series.index < current_time].to_frame(name='total')
df_feat['lag1'] = df_feat['total'].shift(1)
df_feat['lag2'] = df_feat['total'].shift(2)
df_feat['lag3'] = df_feat['total'].shift(3)
df_feat.dropna(inplace=True)
X_train = df_feat[['lag1','lag2','lag3']]
y_train = df_feat['total']
model_xgb = XGBRegressor(n_estimators=100)
model_xgb.fit(X_train, y_train)
last_vals = series[series.index < current_time][-3:].values.reshape(1, -1)
pred_xgb = model_xgb.predict(last_vals)[0]
results_xgb.append({'timestamp': current_time, 'actual': actual,
'predicted': pred_xgb})
# LSTM
train_data = series[series.index < current_time]
scaler = MinMaxScaler()
scaled = scaler.fit_transform(train_data.to_frame())
X_lstm, y_lstm = [], []
for j in range(3, len(scaled)):
X_lstm.append(scaled[j-3:j])
y_lstm.append(scaled[j])
X_lstm, y_lstm = np.array(X_lstm), np.array(y_lstm)
model_lstm = Sequential()
model_lstm.add(LSTM(50, activation='relu', input_shape=(3,1)))
model_lstm.add(Dense(1))
model_lstm.compile(optimizer='adam', loss='mse')
model_lstm.fit(X_lstm, y_lstm, epochs=30, verbose=0)
pred_lstm_scaled = model_lstm.predict(scaled[-3:].reshape(1,3,1))[0][0]
pred_lstm = scaler.inverse_transform([[pred_lstm_scaled]])[0][0]
results_lstm.append({'timestamp': current_time, 'actual': actual,
'predicted': pred_lstm})
# --------------------------
# Main Execution
# --------------------------
now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
arima_df, xgb_df, lstm_df = rolling_forecast_dynamic(start_time=now)
arima_df.to_excel("arima_forecast_live_approach1.xlsx", index=False)
xgb_df.to_excel("xgboost_forecast_live_approach1.xlsx", index=False)
lstm_df.to_excel("lstm_forecast_live_approach1.xlsx", index=False)
# Visualize
for model_name, df in zip(['ARIMA', 'XGBoost', 'LSTM'], [arima_df, xgb_df,
lstm_df]):
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 4))
plt.plot(df['timestamp'], df['actual'], label='Actual')
plt.plot(df['timestamp'], df['predicted'], label='Predicted')
plt.title(f'{model_name} - Actual vs Predicted')
plt.xlabel('Time')
plt.ylabel('EAF (MW)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# Accuracy
for name, df in zip(['ARIMA', 'XGBoost', 'LSTM'], [arima_df, xgb_df, lstm_df]):
mae = mean_absolute_error(df['actual'], df['predicted'])
rmse = mean_squared_error(df['actual'], df['predicted'], squared=False)
mape = np.mean(np.abs((df['actual'] - df['predicted']) / df['actual'])) * 100
print(f"{name} -> MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")