0% found this document useful (0 votes)
9 views4 pages

Frank

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views4 pages

Frank

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.optimize import minimize

# Function to fit and transform data to uniform marginals


def fit_marginals(data):
shape1, loc1, scale1 = stats.gamma.fit(data['Rainfall'])
data['Rainfall_U'] = stats.gamma.cdf(data['Rainfall'], shape1, loc1, scale1)

mu, std = stats.norm.fit(data['Temperature'])


data['Temperature_U'] = stats.norm.cdf(data['Temperature'], mu, std)

return data, (shape1, loc1, scale1), (mu, std)

# Frank Copula Implementation


class FrankCopula:
def __init__(self, theta):
self.theta = theta

def sample(self, n):


u = np.random.uniform(0, 1, n)
v = np.random.uniform(0, 1, n)
samples = np.zeros((n, 2))
for i in range(n):
samples[i, 0] = u[i]
samples[i, 1] = self.frank_copula(u[i], v[i])
return samples

def frank_copula(self, u, v):


if self.theta == 0:
return v # If theta is zero, return v (no dependency)

exp_theta_u = np.exp(-self.theta * u)
exp_theta_v = np.exp(-self.theta * v)
exp_theta_uv = np.exp(-self.theta * (u + v))

# Calculate the copula value


copula_value = -1 / self.theta * np.log(1 + (exp_theta_u - 1) *
(exp_theta_v - 1) / (np.exp(-self.theta) - 1))

return copula_value if copula_value > 0 else 1e-10 # Avoid returning


invalid values

@staticmethod
def fit(data_u):
def log_likelihood(theta):
if theta <= 0:
return np.inf
u, v = data_u[:, 0], data_u[:, 1]
likelihood = FrankCopula.frank_copula_static(u, v, theta)
likelihood = np.clip(likelihood, 1e-10, None) # Avoid log of zero
return -np.sum(np.log(likelihood))
initial_theta = 1.0
bounds = [(1e-5, None)]
result = minimize(log_likelihood, initial_theta, bounds=bounds)

return result.x[0]

@staticmethod
def frank_copula_static(u, v, theta):
if theta == 0:
return u * v # No dependency
copula_value = -1 / theta * np.log(
1 + (np.exp(-theta * u) - 1) * (np.exp(-theta * v) - 1) / (np.exp(-
theta) - 1))
return np.clip(copula_value, 1e-10, None) # Avoid log of zero

# Generate synthetic data


np.random.seed(42)
rainfall = np.random.gamma(2, 15, 1000)
temperature = np.random.normal(20, 5, 1000)

data = pd.DataFrame({
'Rainfall': rainfall,
'Temperature': temperature
})

# Check for NaN values and drop them if any


data.dropna(inplace=True)

# Fit marginals and transform to uniform


data, gamma_params, norm_params = fit_marginals(data)

# Fit the copula parameters


copula_params = FrankCopula.fit(data[['Rainfall_U', 'Temperature_U']].values)
theta = copula_params

# Create copula instance with fitted parameters


copula = FrankCopula(theta)

# Sample from the Frank copula


samples = copula.sample(1000)

# Transform back to original scale using inverse CDF with safeguards


def transform_samples(samples, gamma_params, norm_params):
df = pd.DataFrame(samples, columns=['Rainfall_U', 'Temperature_U'])

# Inverse transform with boundary checks


df['Rainfall'] = stats.gamma.ppf(np.clip(df['Rainfall_U'], 1e-10, 1 - 1e-10),
*gamma_params)
df['Temperature'] = stats.norm.ppf(np.clip(df['Temperature_U'], 1e-10, 1 - 1e-
10), *norm_params)

return df

samples_frank_df = transform_samples(samples, gamma_params, norm_params)

# Visualization
plt.figure(figsize=(12, 6))
plt.scatter(data['Rainfall'], data['Temperature'], alpha=0.5, color='blue',
label='Original Data')
plt.scatter(samples_frank_df['Rainfall'], samples_frank_df['Temperature'],
alpha=0.5, color='orange',
label='Frank Copula Samples')
plt.title('Original Data vs. Frank Copula Samples')
plt.xlabel('Rainfall')
plt.ylabel('Temperature')
plt.grid()
plt.legend()
plt.show()

# Kernel Density Estimates for Rainfall


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Rainfall'], label='Original Rainfall', color='blue', fill=True,
alpha=0.5)
sns.kdeplot(samples_frank_df['Rainfall'], label='Frank Copula', color='orange',
linestyle='--', fill=True, alpha=0.5)
plt.title('Rainfall Distribution Comparison')
plt.xlabel('Rainfall')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Kernel Density Estimates for Temperature


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Temperature'], label='Original Temperature', color='blue',
fill=True, alpha=0.5)
sns.kdeplot(samples_frank_df['Temperature'], label='Frank Copula', color='orange',
linestyle='--', fill=True, alpha=0.5)
plt.title('Temperature Distribution Comparison')
plt.xlabel('Temperature')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Comparison metrics for rainfall and temperature


def calculate_metrics(original, simulated):
mae_rainfall = mean_absolute_error(original['Rainfall'], simulated['Rainfall'])
mse_rainfall = mean_squared_error(original['Rainfall'], simulated['Rainfall'])
rmse_rainfall = np.sqrt(mse_rainfall)

mae_temperature = mean_absolute_error(original['Temperature'],
simulated['Temperature'])
mse_temperature = mean_squared_error(original['Temperature'],
simulated['Temperature'])
rmse_temperature = np.sqrt(mse_temperature)

return mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature,


mse_temperature, rmse_temperature

mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature, mse_temperature,


rmse_temperature = calculate_metrics(data,
samples_frank_df)

# Print comparison metrics


print(f'Comparison Metrics for Frank Copula:')
print(f'Mean Absolute Error (MAE) - Rainfall: {mae_rainfall:.2f}')
print(f'Mean Squared Error (MSE) - Rainfall: {mse_rainfall:.2f}')
print(f'Root Mean Squared Error (RMSE) - Rainfall: {rmse_rainfall:.2f}')
print(f'Mean Absolute Error (MAE) - Temperature: {mae_temperature:.2f}')
print(f'Mean Squared Error (MSE) - Temperature: {mse_temperature:.2f}')
print(f'Root Mean Squared Error (RMSE) - Temperature: {rmse_temperature:.2f}')

# AIC and BIC calculations


def calculate_aic_bic(original, simulated):
n = len(original)
rss = np.sum((original - simulated) ** 2)
k = 2 # number of parameters in the model (simplified)
aic = n * np.log(rss / n) + 2 * k
bic = n * np.log(rss / n) + k * np.log(n)
return aic, bic

aic_rainfall, bic_rainfall = calculate_aic_bic(data['Rainfall'],


samples_frank_df['Rainfall'])
aic_temperature, bic_temperature = calculate_aic_bic(data['Temperature'],
samples_frank_df['Temperature'])

# Print AIC and BIC


print(f'\nAIC and BIC for Frank Copula:')
print(f'AIC - Rainfall: {aic_rainfall:.2f}, BIC: {bic_rainfall:.2f}')
print(f'AIC - Temperature: {aic_temperature:.2f}, BIC: {bic_temperature:.2f}')

# Print parameters of the copula


print(f'\nFrank Copula Parameters:')
print(f'Fitted Parameter θ: {theta:.2f}')

You might also like