0% found this document useful (0 votes)
38 views6 pages

Output - Group - Work - Project - 4652 - GWP1.ipynb - Colaboratory

group project

Uploaded by

Gaurav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
38 views6 pages

Output - Group - Work - Project - 4652 - GWP1.ipynb - Colaboratory

group project

Uploaded by

Gaurav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

# Install the 'statsmodels' library if you haven't already


# pip install statsmodels
#SKEWNESS
# To mitigate the impact of skewness, consider employing models,
## such as GARCH (Generalized Autoregressive Conditional Heteroskedasticity).

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Download financial data (e.g., stock prices) for demonstration


symbol = "AAPL"
start_date = "2022-01-01"
end_date = "2023-01-01"
data = yf.download(symbol, start=start_date, end=end_date)['Adj Close']

#calculate Skewness
data_skewness=data.skew()
print("\n Skewness:", data_skewness)

# Calculate daily returns


returns = data.pct_change().dropna()

# Fit a GARCH(1,1) model


lag_order = 20 # Choose an appropriate lag order for ACF calculation
autocorrelation = acf(returns**2, fft=False, nlags=lag_order)
q_value = np.argmax(np.abs(autocorrelation[1:])) + 1

# Specify and fit the GARCH(1,1) model


model = SARIMAX(returns**2, order=(0, 0, q_value), enforce_invertibility=False)
results = model.fit(disp=False)

# Display GARCH model summary


print(results.summary())

# Plot ACF of squared returns to help determine the lag order 'q'
plot_acf(returns**2, lags=lag_order, zero=False)
plt.title("ACF of Squared Returns")
plt.show()

################################################################################

[*********************100%%**********************] 1 of 1 completed

Skewness: 0.013465137067421093
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: Va

1 of 6 30/11/23, 2:02 am
Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: Va
self._init_dates(dates, freq)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: Va
self._init_dates(dates, freq)
SARIMAX Results
==============================================================================
Dep. Variable: Adj Close No. Observations: 250
Model: SARIMAX(0, 0, 9) Log Likelihood 1400.291
Date: Wed, 29 Nov 2023 AIC -2780.583
Time: 19:47:50 BIC -2745.368
Sample: 0 HQIC -2766.410
- 250
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ma.L1 0.1019 0.092 1.106 0.269 -0.079 0.282
ma.L2 -0.0069 0.093 -0.074 0.941 -0.189 0.175
ma.L3 0.0119 0.068 0.175 0.861 -0.122 0.146
ma.L4 0.0745 0.066 1.133 0.257 -0.054 0.204
ma.L5 0.1618 0.077 2.110 0.035 0.012 0.312
ma.L6 0.0528 0.076 0.697 0.486 -0.096 0.201
ma.L7 0.0064 0.108 0.059 0.953 -0.206 0.219
ma.L8 -0.0064 0.104 -0.062 0.951 -0.211 0.198
ma.L9 0.3430 0.043 8.058 0.000 0.260 0.426
sigma2 7.856e-07 3.71e-08 21.191 0.000 7.13e-07 8.58e-07
=================================================================================
Ljung-Box (L1) (Q): 0.34 Jarque-Bera (JB): 2790.
Prob(Q): 0.56 Prob(JB): 0.
Heteroskedasticity (H): 2.85 Skew: 3.
Prob(H) (two-sided): 0.00 Kurtosis: 18.
=================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-st

2 of 6 30/11/23, 2:02 am
Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

#KURTOSIS
#To mitigate Kurtosis, we consider alternative models like TARCH for handling
## kurtosis and utilize GARCH models to account for heteroscedasticity in time series

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Download financial data (e.g., stock prices) for demonstration


symbol = "AAPL"
start_date = "2022-01-01"
end_date = "2023-01-01"
data = yf.download(symbol, start=start_date, end=end_date)['Adj Close']

# Calculate daily returns


returns = data.pct_change().dropna()

# Fit a TARCH(1,1) model for handling kurtosis using statsmodels


lag_order = 20 # Choose an appropriate lag order for ACF calculation
autocorrelation = acf(returns**2, fft=False, nlags=lag_order)
o_value = np.argmax(np.abs(autocorrelation[1:])) + 1

# Specify and fit the TARCH(1,1) model


tarch_model = SARIMAX(returns**2, order=(0, 0, o_value), seasonal_order=(0, 0, 0
tarch_results = tarch_model.fit(disp=False)

# Display TARCH model summary


print(tarch_results.summary())

###############################################################################

[*********************100%%**********************] 1 of 1 completed
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: Va
self._init_dates(dates, freq)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: Va
self._init_dates(dates, freq)
SARIMAX Results
==============================================================================
Dep. Variable: Adj Close No. Observations: 250
Model: SARIMAX(0, 0, 9) Log Likelihood 1419.064
Date: Wed, 29 Nov 2023 AIC -2814.127
Time: 20:12:09 BIC -2771.870
Sample: 0 HQIC -2797.120
- 250
Covariance Type: opg

3 of 6 30/11/23, 2:02 am
Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

Covariance Type: opg


==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.0005 0.000 4.280 0.000 0.000 0.001
Adj Close 0.0040 0.002 2.406 0.016 0.001 0.007
ma.L1 0.0620 0.094 0.660 0.510 -0.122 0.246
ma.L2 -0.0540 0.104 -0.519 0.604 -0.258 0.150
ma.L3 -0.0469 0.068 -0.686 0.493 -0.181 0.087
ma.L4 0.0351 0.066 0.535 0.593 -0.093 0.164
ma.L5 0.1250 0.067 1.861 0.063 -0.007 0.257
ma.L6 0.0087 0.065 0.133 0.894 -0.119 0.137
ma.L7 -0.0395 0.108 -0.367 0.713 -0.250 0.171
ma.L8 -0.0647 0.100 -0.648 0.517 -0.260 0.131
ma.L9 0.2684 0.044 6.159 0.000 0.183 0.354
sigma2 6.849e-07 5.44e-08 12.599 0.000 5.78e-07 7.91e-07
=================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 2627.
Prob(Q): 1.00 Prob(JB): 0.
Heteroskedasticity (H): 2.99 Skew: 3.
Prob(H) (two-sided): 0.00 Kurtosis: 17.
=================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-st
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: Convergenc
warnings.warn("Maximum Likelihood optimization failed to "

#SENSITIVITY TO OUTLIERS
#To mitigate sensitivity to outliers HuberRegressor from scikit-learn is used,
## which utilizes the Huber loss function.

import yfinance as yf
import numpy as np
from sklearn.linear_model import HuberRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Fetch historical data for AAPL from Yahoo Finance


symbol = "AAPL"
start_date = "2020-01-01"
end_date = "2023-01-01"
data = yf.download(symbol, start=start_date, end=end_date)

# Extract the closing prices as the target variable (y)


y = data['Close'].values.reshape(-1, 1)

# Generate a feature representing the day index


X = np.arange(len(y)).reshape(-1, 1)

# Introduce an outlier by making the first closing price unusually high


y[0] = y.max() * 1.5

# Split the data into training and testing sets

4 of 6 30/11/23, 2:02 am
Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=

# Create a HuberRegressor model


huber_reg = HuberRegressor(epsilon=1.35)

# Fit the model to the training data


huber_reg.fit(X_train, y_train)

# Make predictions on the test data


y_pred = huber_reg.predict(X_test)

# Evaluate the model


mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Plot the true vs predicted values


plt.scatter(X_test, y_test, color='black', label='True values')
plt.scatter(X_test, y_pred, color='blue', label='Predicted values')
plt.title(f'Huber Regression with Outlier for {symbol}')
plt.xlabel('Day Index')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

[*********************100%%**********************] 1 of 1 completed
Mean Squared Error: 300.52260367986287
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py:1143: DataCon
y = column_or_1d(y, warn=True)

5 of 6 30/11/23, 2:02 am
Group_Work_Project_4652_GWP1.ipynb - Colaboratory https://colab.research.google.com/drive/19j96PkHCJMh2HbVI3m763...

6 of 6 30/11/23, 2:02 am

You might also like