0% found this document useful (0 votes)
10 views3 pages

Gumbel

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views3 pages

Gumbel

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Function to fit and transform data to uniform marginals


def fit_marginals(data):
shape, loc, scale = stats.gamma.fit(data['Rainfall'])
data['Rainfall_U'] = stats.gamma.cdf(data['Rainfall'], shape, loc, scale)

mu, std = stats.norm.fit(data['Temperature'])


data['Temperature_U'] = stats.norm.cdf(data['Temperature'], mu, std)

return data, (shape, loc, scale), (mu, std)

# Gumbel Copula Implementation


class GumbelCopula:
def __init__(self, theta):
self.theta = theta

def sample(self, n):


u = np.random.uniform(0, 1, n)
v = np.random.uniform(0, 1, n)
samples = np.zeros((n, 2))

for i in range(n):
samples[i, 0] = u[i]
samples[i, 1] = np.power(np.maximum(np.power(u[i], -self.theta) +
np.power(v[i], -self.theta) - 1, 0), -1 / self.theta)

return samples

# Generate synthetic data


np.random.seed(42)
rainfall = np.random.gamma(2, 15, 1000)
temperature = np.random.normal(20, 5, 1000)

data = pd.DataFrame({
'Rainfall': rainfall,
'Temperature': temperature
})

# Check for NaN values and drop them if any


data.dropna(inplace=True)

# Fit marginals and transform to uniform


data, gamma_params, norm_params = fit_marginals(data)

# Set theta for Gumbel Copula based on empirical analysis


theta = 1.5
copula = GumbelCopula(theta)

# Sample from the Gumbel copula


samples = copula.sample(1000)

# Transform back to original scale using inverse CDF


def transform_samples(samples, gamma_params, norm_params):
df = pd.DataFrame(samples, columns=['Rainfall_U', 'Temperature_U'])
df['Rainfall'] = stats.gamma.ppf(df['Rainfall_U'], *gamma_params)
df['Temperature'] = stats.norm.ppf(df['Temperature_U'], *norm_params)
return df

samples_gumbel_df = transform_samples(samples, gamma_params, norm_params)

# Visualization
plt.figure(figsize=(12, 6))
plt.scatter(data['Rainfall'], data['Temperature'], alpha=0.5, color='blue',
label='Original Data')
plt.scatter(samples_gumbel_df['Rainfall'], samples_gumbel_df['Temperature'],
alpha=0.5, color='orange', label='Gumbel Copula Samples')
plt.title('Original Data vs. Gumbel Copula Samples')
plt.xlabel('Rainfall')
plt.ylabel('Temperature')
plt.grid()
plt.legend()
plt.show()

# Kernel Density Estimates for Rainfall


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Rainfall'], label='Original Rainfall', color='blue', fill=True,
alpha=0.5)
sns.kdeplot(samples_gumbel_df['Rainfall'], label='Gumbel Copula', color='orange',
linestyle='--', fill=True, alpha=0.5)
plt.title('Rainfall Distribution Comparison')
plt.xlabel('Rainfall')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Kernel Density Estimates for Temperature


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Temperature'], label='Original Temperature', color='blue',
fill=True, alpha=0.5)
sns.kdeplot(samples_gumbel_df['Temperature'], label='Gumbel Copula',
color='orange', linestyle='--', fill=True, alpha=0.5)
plt.title('Temperature Distribution Comparison')
plt.xlabel('Temperature')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Comparison metrics for rainfall and temperature


def calculate_metrics(original, simulated):
mae_rainfall = mean_absolute_error(original['Rainfall'], simulated['Rainfall'])
mse_rainfall = mean_squared_error(original['Rainfall'], simulated['Rainfall'])
rmse_rainfall = np.sqrt(mse_rainfall)

mae_temperature = mean_absolute_error(original['Temperature'],
simulated['Temperature'])
mse_temperature = mean_squared_error(original['Temperature'],
simulated['Temperature'])
rmse_temperature = np.sqrt(mse_temperature)

return mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature,


mse_temperature, rmse_temperature

mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature, mse_temperature,


rmse_temperature = calculate_metrics(data, samples_gumbel_df)

# Print comparison metrics


print(f'Comparison Metrics for Gumbel Copula:')
print(f'Mean Absolute Error (MAE) - Rainfall: {mae_rainfall:.2f}')
print(f'Mean Squared Error (MSE) - Rainfall: {mse_rainfall:.2f}')
print(f'Root Mean Squared Error (RMSE) - Rainfall: {rmse_rainfall:.2f}')
print(f'Mean Absolute Error (MAE) - Temperature: {mae_temperature:.2f}')
print(f'Mean Squared Error (MSE) - Temperature: {mse_temperature:.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse_temperature:.2f}')

# AIC and BIC calculations


def calculate_aic_bic(original, simulated):
n = len(original)
rss = np.sum((original - simulated) ** 2)
k = 2 # number of parameters in the model (simplified)
aic = n * np.log(rss/n) + 2 * k
bic = n * np.log(rss/n) + k * np.log(n)
return aic, bic

aic_rainfall, bic_rainfall = calculate_aic_bic(data['Rainfall'],


samples_gumbel_df['Rainfall'])
aic_temperature, bic_temperature = calculate_aic_bic(data['Temperature'],
samples_gumbel_df['Temperature'])

# Print AIC and BIC


print(f'\nAIC and BIC for Gumbel Copula:')
print(f'AIC - Rainfall: {aic_rainfall:.2f}, BIC: {bic_rainfall:.2f}')
print(f'AIC - Temperature: {aic_temperature:.2f}, BIC: {bic_temperature:.2f}')

# Print parameters of the copula


print(f'\nGumbel Copula Parameters:')
print(f'Theta: {theta:.2f}')

You might also like