0% found this document useful (0 votes)
10 views9 pages

Forage 1

The document is a Python script that analyzes natural gas price data using ARIMA modeling and seasonal decomposition. It includes functions to load data, visualize historical prices, build an ARIMA model, forecast future prices, and predict prices for specific dates. The script also provides a main function that interacts with the user to input file paths and dates for predictions.

Uploaded by

Shwetank Pandey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views9 pages

Forage 1

The document is a Python script that analyzes natural gas price data using ARIMA modeling and seasonal decomposition. It includes functions to load data, visualize historical prices, build an ARIMA model, forecast future prices, and predict prices for specific dates. The script also provides a main function that interacts with the user to input file paths and dates for predictions.

Uploaded by

Shwetank Pandey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from datetime import datetime, timedelta

from statsmodels.tsa.seasonal import seasonal_decompose

from statsmodels.tsa.arima.model import ARIMA

from sklearn.metrics import mean_squared_error

import warnings

warnings.filterwarnings('ignore')

# Function to load data

def load_data(filepath):

"""Load the natural gas price data from a CSV file."""

try:

# Read the CSV file

df = pd.read_csv(filepath)

# Check if the DataFrame has at least 2 columns (date and price)

if len(df.columns) < 2:

print("Error: CSV file must have at least 2 columns (date and price)")

return None

# Rename columns for clarity if needed

if df.columns[0].lower() not in ['date', 'month', 'period']:

# Assume first column is date and second column is price

df.columns = ['Date', 'Price'] + list(df.columns[2:])

# Convert date column to datetime

df['Date'] = pd.to_datetime(df['Date'])

# Set date as index


df = df.set_index('Date')

# Sort by date

df = df.sort_index()

print(f"Successfully loaded {len(df)} data points from


{df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-
%d')}")

return df

except Exception as e:

print(f"Error loading data: {e}")

return None

# Visualization function

def visualize_data(df):

"""Visualize the historical gas prices and seasonal patterns."""

# Create a figure with multiple subplots

fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Plot 1: Historical prices

df['Price'].plot(ax=axes[0], title='Historical Natural Gas Prices (2020-2024)')

axes[0].set_ylabel('Price')

axes[0].grid(True)

# Plot 2: Seasonal decomposition

if len(df) >= 12: # Need at least 12 data points for seasonal decomposition

# Frequency=12 for monthly data

decomposition = seasonal_decompose(df['Price'], model='multiplicative',


period=12)

# Plot trend component


decomposition.trend.plot(ax=axes[1], label='Trend')

# Plot seasonal component with transparent fill

seasonal = decomposition.seasonal

axes[1].plot(seasonal.index, seasonal, label='Seasonal Pattern',


color='green')

axes[1].fill_between(seasonal.index, seasonal, alpha=0.3, color='green')

axes[1].set_title('Trend and Seasonal Components')

axes[1].legend()

axes[1].grid(True)

plt.tight_layout()

plt.show()

# Monthly pattern visualization

monthly_avg = df.groupby(df.index.month)['Price'].mean()

plt.figure(figsize=(10, 6))

plt.bar(range(1, 13), monthly_avg.values)

plt.title('Average Natural Gas Prices by Month')

plt.xlabel('Month')

plt.ylabel('Average Price')

plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
'Oct', 'Nov', 'Dec'])

plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.show()

# ARIMA model for time series forecasting

def build_arima_model(df):

"""Build an ARIMA model for the natural gas price data."""

# Split data into training and testing sets (last 6 months for testing)
train_size = len(df) - 6

train, test = df.iloc[:train_size], df.iloc[train_size:]

# Try different ARIMA parameters

best_aic = float('inf')

best_params = None

# Grid search for best parameters

for p in range(0, 3):

for d in range(0, 2):

for q in range(0, 3):

try:

model = ARIMA(train['Price'], order=(p, d, q))

model_fit = model.fit()

if model_fit.aic < best_aic:

best_aic = model_fit.aic

best_params = (p, d, q)

except:

continue

print(f"Best ARIMA parameters: {best_params}")

# Fit the model with best parameters

final_model = ARIMA(df['Price'], order=best_params)

final_model_fit = final_model.fit()

# Evaluate on test data if available

if len(test) > 0:

predictions = final_model_fit.forecast(steps=len(test))

mse = mean_squared_error(test['Price'], predictions)

print(f"Mean Squared Error on test data: {mse:.4f}")


# Plot actual vs predicted for test period

plt.figure(figsize=(12, 6))

plt.plot(test.index, test['Price'], label='Actual')

plt.plot(test.index, predictions, label='Predicted')

plt.title('Natural Gas Price: Actual vs Predicted')

plt.legend()

plt.grid(True)

plt.show()

return final_model_fit

# Function to predict price for a specific date

def predict_price(model, date, df):

"""Predict the natural gas price for a specific date."""

date = pd.to_datetime(date)

# If date is in the historical data, return the actual value

if date in df.index:

return df.loc[date, 'Price']

# If date is before the first date in the data

if date < df.index.min():

print(f"Warning: Date {date.strftime('%Y-%m-%d')} is before the earliest


data point. Extrapolation may be unreliable.")

# Could implement historical extrapolation here if needed

# If date is after the last date in the data

if date > df.index.max():

# Calculate how many steps to forecast

days_diff = (date - df.index.max()).days


months_ahead = days_diff // 30 # Approximate months

if months_ahead == 0:

months_ahead = 1

# Forecast future values

forecast = model.forecast(steps=months_ahead)

price = forecast.iloc[-1]

return price

# Function to generate forecast for the next year

def forecast_next_year(model, df):

"""Generate and visualize forecast for the next year."""

# Last date in the dataset

last_date = df.index.max()

# Generate dates for the next 12 months

forecast_dates = pd.date_range(start=last_date + timedelta(days=30),


periods=12, freq='M')

# Forecast prices

forecast = model.forecast(steps=12)

# Create forecast DataFrame

forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecasted_Price':


forecast})

forecast_df = forecast_df.set_index('Date')

# Combine historical and forecasted data for visualization

combined = pd.DataFrame(index=pd.date_range(start=df.index.min(),
end=forecast_df.index.max(), freq='M'))
combined['Historical'] = df['Price']

combined['Forecasted'] = forecast_df['Forecasted_Price']

# Visualize historical and forecasted prices

plt.figure(figsize=(12, 6))

plt.plot(combined.index, combined['Historical'], label='Historical Prices')

plt.plot(combined.index, combined['Forecasted'], label='Forecasted Prices',


color='red')

# Add confidence intervals (simple approximation)

std_dev = df['Price'].std()

upper_bound = combined['Forecasted'] + 1.96 * std_dev

lower_bound = combined['Forecasted'] - 1.96 * std_dev

# Only add confidence intervals for the forecast period

mask = ~pd.isna(combined['Forecasted'])

plt.fill_between(combined.index[mask], lower_bound[mask],
upper_bound[mask], color='red', alpha=0.2)

plt.title('Natural Gas Price Forecast for Next 12 Months')

plt.xlabel('Date')

plt.ylabel('Price')

plt.grid(True)

plt.legend()

plt.tight_layout()

plt.show()

return forecast_df

# Main function

def main():
# Get file path from user

file_path = input("Enter the path to the natural gas price CSV file: ")

# Load data

df = load_data(file_path)

if df is None:

return

# Display basic statistics

print("\nBasic Statistics:")

print(df['Price'].describe())

# Visualize the data

print("\nVisualizing historical data...")

visualize_data(df)

# Build ARIMA model

print("\nBuilding ARIMA model...")

model = build_arima_model(df)

# Forecast next year

print("\nForecasting prices for the next 12 months...")

forecast = forecast_next_year(model, df)

print("\nForecasted Prices for Next 12 Months:")

print(forecast)

# Interactive price prediction

while True:

date_input = input("\nEnter a date (YYYY-MM-DD) to predict gas price (or 'q'


to quit): ")

if date_input.lower() == 'q':
break

try:

predicted_price = predict_price(model, date_input, df)

print(f"Predicted price for {date_input}: {predicted_price:.4f}")

except Exception as e:

print(f"Error: {e}")

if __name__ == "__main__":

main()

You might also like