0% found this document useful (0 votes)
3 views

Code4

Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Code4

Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/random.csv', parse_dates=['Date'],


index_col='Date')

#Correlation

import matplotlib.pyplot as plt

# Plot the time series

plt.figure(figsize=(14, 7))

plt.plot(df['CORN'], label='Corn Prices')

plt.plot(df['Raw Sugar'], label='Raw Sugar Prices')

plt.title('Corn vs. Raw Sugar Prices Over Time')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

# Correlation analysis

correlation = df['CORN'].corr(df['Raw Sugar'])

print(f"Correlation between Corn and Raw Sugar prices: {correlation}")

#Test Stationarity

from statsmodels.tsa.stattools import adfuller

# Define a function to perform the ADF test

def adf_test(series):

result = adfuller(series)

print(f'ADF Statistic: {result[0]}')


print(f'p-value: {result[1]}')

for key, value in result[4].items():

print('Critial Values:')

print(f' {key}, {value}')

print("Corn Price Stationarity Test")

adf_test(df['CORN'])

print("\nRaw Sugar Price Stationarity Test")

adf_test(df['Raw Sugar'])

# Differencing if needed

df['Corn_diff'] = df['CORN'].diff().dropna()

df['Raw_Sugar_diff'] = df['Raw Sugar'].diff().dropna()

print("Corn Price Stationarity Test")

adf_test(df['Corn_diff'].dropna())

print("\nRaw Sugar Price Stationarity Test")

adf_test(df['Raw_Sugar_diff'].dropna())

#Caluculate ACF

from pmdarima import auto_arima

# Auto-ARIMA to find best parameters for Corn

corn_auto_model = auto_arima(df['Corn_diff'].dropna(), seasonal=False, trace=True)

# Auto-ARIMA to find best parameters for Raw Sugar

sugar_auto_model = auto_arima(df['Raw_Sugar_diff'].dropna(), seasonal=False, trace=True)

#Forecast

from statsmodels.tsa.arima.model import ARIMA


# Fit ARIMA model on differenced data if required

corn_model = ARIMA(df['Corn_diff'].dropna(), order=(0,0,0)) # adjust order based on ACF and PACF

corn_fit = corn_model.fit()

print(corn_fit.summary())

# Forecast Corn Prices

corn_forecast = corn_fit.forecast(steps=12)

print("Corn Price Forecast:", corn_forecast)

# Similar approach for Raw Sugar

sugar_model = ARIMA(df['Raw_Sugar_diff'].dropna(), order=(0,0,0))

sugar_fit = sugar_model.fit()

print(sugar_fit.summary())

# Forecast Raw Sugar Prices

sugar_forecast = sugar_fit.forecast(steps=100)

print("Raw Sugar Price Forecast:", sugar_forecast)

# Assuming df.index is of type string, convert it to datetime

df.index = pd.to_datetime(df.index, format="%d-%m-%Y")

# Create a date range for the forecasted values

forecast_dates = pd.date_range(df.index[-1] + pd.Timedelta(days=1), periods=100, freq='D')

# Plotting

plt.figure(figsize=(14, 7))

# Plot original data

#plt.plot(df['Corn_diff'].iloc[-500:], label='Corn Prices', color='blue')

plt.plot(df['Raw_Sugar_diff'].iloc[-500:], label='Raw Sugar Prices', color='green')


# Plot forecasted data

#plt.plot(forecast_dates, corn_forecast, label='Corn Forecast', color='blue', linestyle='--')

plt.plot(forecast_dates, sugar_forecast, label='Raw Sugar Forecast', color='green', linestyle='--')

plt.title('Corn and Raw Sugar Prices with Forecast')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

# VAR Modelling

from statsmodels.tsa.api import VAR

# Fit a VAR model

model = VAR(df[['CORN', 'Raw Sugar']])

model_fitted = model.fit(maxlags=None, ic='aic')

# Display the model summary

print(model_fitted.summary())

print(len(model_fitted.coefs))

#if len(model_fitted.coefs) > 0:

# Forecast the next 10 periods

forecast_steps = 100

forecast = model_fitted.forecast(df[['CORN', 'Raw Sugar']].values[-model_fitted.k_ar:],


steps=forecast_steps)

# Convert forecast to a DataFrame

forecast_index = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps,


freq='D')

forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['CORN', 'Raw Sugar'])

print(forecast_df)
# Plot the forecasted values

plt.figure(figsize=(12, 6))

plt.plot(df.index, df['Raw Sugar'], label='Historical Sugar Price', color='blue')

plt.plot(forecast_df.index, forecast_df['Raw Sugar'], label='Forecasted Sugar Price', color='red')

plt.title('Sugar Price Forecast Using VAR Model')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

#train model

# Split data into training and testing sets (80/20 split)

train_size = int(len(df) * 0.8)

train, test = df[:train_size], df[train_size:]

# Fit the model on the training data

model_fitted_train = model.fit(maxlags=None, ic='aic')

# Predict on the test data

test_forecast = model_fitted_train.forecast(train[['CORN', 'Raw Sugar', 'USDBRL']].values[-


model_fitted_train.k_ar:], steps=len(test))

# Compare actual vs predicted

test_forecast_df = pd.DataFrame(test_forecast, index=test.index, columns=['CORN', 'Raw Sugar',


'USDBRL'])

plt.figure(figsize=(12, 6))

plt.plot(test.index, test['Raw Sugar'], label='Actual Sugar Price', color='blue')

plt.plot(test_forecast_df.index, test_forecast_df['Raw Sugar'], label='Predicted Sugar Price',


color='red')

plt.title('Actual vs Predicted Sugar Prices')

plt.xlabel('Date')

plt.ylabel('Price')
plt.legend()

plt.show()

#test model

from sklearn.metrics import mean_squared_error

# Calculate RMSE for the model's performance on test data

rmse = mean_squared_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'], squared=False)

print(f'Root Mean Squared Error (RMSE): {rmse}')

import numpy as np

# Define the function to calculate MAPE

def mean_absolute_percentage_error(y_true, y_pred):

# Convert to numpy arrays (in case they are lists or pandas series)

y_true = np.array(y_true)

y_pred = np.array(y_pred)

# Avoid division by zero by replacing zeros with a small value

epsilon = 1e-10

y_true = np.where(y_true == 0, epsilon, y_true)

# Calculate MAPE

mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

return mape

mape = mean_absolute_percentage_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'])

print(mape)

You might also like