Ilovepdf Merged
Ilovepdf Merged
ipynb - Colab
1 #q1
2 import numpy as np
3 import matplotlib.pyplot as plt
4 import statsmodels.api as sm
5
6 # Parameters
7 mu = 2.5e10 # mean
8 sigma = 0.5 * mu # standard deviation
9 n_years = 100 # number of years
10 n_obs = n_years * 12 # number of observations
11
12 # Function to generate time series data
13 def generate_data(model_type, phi=None, theta=None):
14 np.random.seed(42) # for reproducibility
15 epsilon = np.random.normal(loc=0, scale=sigma, size=n_obs) # white noise
16
17 if model_type == 'AR':
18 ar_model = sm.tsa.ArmaProcess(ar=[1, -phi], ma=[1])
19 data = ar_model.generate_sample(nsample=n_obs, scale=sigma)
20 elif model_type == 'MA':
21 ma_model = sm.tsa.ArmaProcess(ar=[1], ma=[1, -theta])
22 data = ma_model.generate_sample(nsample=n_obs, scale=sigma)
23 elif model_type == 'ARMA':
24 arma_model = sm.tsa.ArmaProcess(ar=[1, -phi], ma=[1, -theta])
25 data = arma_model.generate_sample(nsample=n_obs, scale=sigma)
26 else:
27 raise ValueError("Invalid model type. Choose from 'AR', 'MA', or 'ARMA'.")
28
29 return data
30
31 # Function to plot ACF and PACF
32 def plot_acf_pacf(data, model_type):
33 fig, ax = plt.subplots(1, 2, figsize=(12, 4))
34 sm.graphics.tsa.plot_acf(data, lags=20, ax=ax[0], alpha=0.05)
35 sm.graphics.tsa.plot_pacf(data, lags=20, ax=ax[1], alpha=0.05)
36 ax[0].set_title(f'ACF ({model_type})')
37 ax[1].set_title(f'PACF ({model_type})')
38 plt.tight_layout()
39 plt.show()
40
41 # Generate and plot data for each model
42 models = {
43 'AR (ϕ=0.2)': {'type': 'AR', 'phi': 0.2},
44 'AR (ϕ=0.8)': {'type': 'AR', 'phi': 0.8},
45 'AR (ϕ=-0.2)': {'type': 'AR', 'phi': -0.2},
46 'AR (ϕ=-0.8)': {'type': 'AR', 'phi': -0.8},
47 'MA (θ=0.2)': {'type': 'MA', 'theta': 0.2},
48 'MA (θ=0.8)': {'type': 'MA', 'theta': 0.8},
49 'MA (θ=-0.2)': {'type': 'MA', 'theta': -0.2},
50 'MA (θ=-0.8)': {'type': 'MA', 'theta': -0.8},
51 'ARMA (ϕ=0.3, θ=0.7)': {'type': 'ARMA', 'phi': 0.3, 'theta': 0.7},
52 'ARMA (ϕ=-0.5, θ=-0.5)': {'type': 'ARMA', 'phi': -0.5, 'theta': -0.5},
53 }
54
55 for model_name, params in models.items():
56 data = generate_data(params['type'], params.get('phi'), params.get('theta'))
57 plt.figure(figsize=(10, 4))
58 plt.plot(data, label=model_name)
59 plt.title(f"Time Series ({model_name})")
60 plt.xlabel("Months")
61 plt.ylabel("Flow")
62 plt.legend()
63 plt.show()
64
65 plot_acf_pacf(data, model_name.split('(')[0])
66
67
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 1/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
output
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 2/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 3/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 4/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 5/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 6/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
1 import pandas as pd
2
3 # Replace 'file_path.xlsx' with the path to your Excel file
4 df = pd.read_excel('IN023320100.xlsx')
5 df
6 # Now you can work with the DataFrame 'df' as usual
7
1901- KANPUR,
0 IN023320100 26.47 80.35 185 53
01-01 IN
1901- KANPUR,
1 IN023320100 26.47 80.35 185 0
01-02 IN
1901- KANPUR,
2 IN023320100 26.47 80.35 185 0
01-03 IN
1901- KANPUR,
3 IN023320100 26.47 80.35 185 0
01-04 IN
1901- KANPUR,
4 IN023320100 26.47 80.35 185 0
01-05 IN
1968- KANPUR,
23930 IN023320100 26.47 80.35 185 0
12-27 IN
1968- KANPUR,
23931 IN023320100 26.47 80.35 185 0
12-28 IN
1
2 #2a
3 # Convert precipitation from 1/10 mm to mm
4 df['PRCP'] = df['PRCP'] / 10
5
6 # Define the threshold for wet state (1 mm/day)
7 threshold = 1
8
9 # Create a new column for wet (1) and dry (0) states
10 df['State'] = (df['PRCP'] >= threshold).astype(int)
11
12 # Print the DataFrame with the wet and dry states
13 print(df)
14
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 7/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
23930 0.0 ,,I 0
23931 0.0 ,,I 0
23932 0.0 ,,I 0
23933 0.0 ,,I 0
23934 0.0 ,,I 0
1
2 #2b
3 if df['DATE'].dtype != 'string':
4 df['DATE'] = df['DATE'].astype(str)
5
6 # Filter data for June and August
7 df_june_august = df[(df['DATE'].str.contains('-06-')) | (df['DATE'].str.contains('-08-'))]
8
9 # Create a transition matrix
10 transition_matrix = pd.crosstab(df_june_august['State'], df_june_august['State'].shift(-1), normalize='index')
11
12 # Print the transition matrix
13 print("Transition Probability Matrix:")
14 print(transition_matrix)
1
2 #2c
3 import numpy as np
4 import matplotlib.pyplot as plt
5
6 # Function to simulate a sequence based on transition matrix
7 def simulate_sequence(transition_matrix, initial_state, n_days):
8 states = [initial_state]
9 current_state = initial_state
10 for _ in range(n_days - 1):
11 next_state = np.random.choice(transition_matrix.columns, p=transition_matrix.loc[current_state])
12 states.append(next_state)
13 current_state = next_state
14 return states
15
16 # Simulate sequences for June and August
17 n_days = 5000
18 initial_state = df_june_august.iloc[0]['State'] # Initial state based on observed data
19
20 june_sequence = simulate_sequence(transition_matrix, initial_state, n_days)
21 august_sequence = simulate_sequence(transition_matrix, initial_state, n_days)
22
23 # Plot empirical cumulative probability distribution functions
24 def plot_ecdf(data, label):
25 sorted_data = np.sort(data)
26 ecdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
27 plt.plot(sorted_data, ecdf, label=label)
28
29 plt.figure(figsize=(10, 6))
30 plot_ecdf(june_sequence, 'June (Simulated)')
31 plot_ecdf(df_june_august['State'], 'June (Observed)')
32 plot_ecdf(august_sequence, 'August (Simulated)')
33 plot_ecdf(df_june_august['State'], 'August (Observed)')
34
35 plt.xlabel('Duration (days)')
36 plt.ylabel('Empirical Cumulative Probability')
37 plt.legend()
38 plt.title('Empirical Cumulative Probability Distribution of Wet and Dry Spells')
39 plt.show()
40
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 8/9
4/13/24, 5:32 PM Untitled38.ipynb - Colab
https://colab.research.google.com/drive/1cHCNzZVqgcFVnIfs1U_rmGEyM4N0Jh2a#scrollTo=G7OWbyUzRr-i&printMode=true 9/9