Ejecución Paso A Paso

Descargar como pdf o txt
Descargar como pdf o txt
Está en la página 1de 12

5/9/2020 rn_caudales - Jupyter Notebook

In [4]:

#===========================================================
#DESARROLLADOR : Ing.Ramirez Quispe, Robert Marlindo. #
#DOCENTE : Dr. Walter Obando Licera #
#PROGRAMA : Redes Neuronales Artificiales #
# en la Hidrología. #
#FECHA : 05/09/2020 DD/MM/AA #
#LUGAR : UNI Lima - Perú #
# Maestría en Ingeneiría Hidráulica #
#BLOGGER : https://ramirezquispe1.blogspot.com/ #
#===========================================================#

In [5]:

# Importar librerias
import scipy.stats as stats # librería estadística
import numpy as np # librería numérica
import pandas as pd # tablas
import matplotlib.pyplot as plt # gráficos
import seaborn as sns # gráficos más elaborados
import itertools # objetos de iteración

In [6]:

# leemnos los datos


data_caudales = pd.read_csv("caudales.csv")
data_caudales.head()
Out[6]:

Q_simulado Q_observado

0 22.243 25.360

1 22.214 23.394

2 22.263 23.781

3 22.440 23.716

4 22.842 24.367

In [7]:

data_caudales.shape
Out[7]:

(1217, 2)

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 1/12
5/9/2020 rn_caudales - Jupyter Notebook

In [9]:

# 1. Resumen estadístico
plt.figure(figsize=(10,2)) #tamaño de la ventana
sns.heatmap(round(data_caudales.describe()[1:].transpose(),2), linewidth=2, annot=True, fmt
plt.xticks(fontsize=20)
plt.yticks(fontsize=12)
plt.title("Resumen de variables estadísticas")
plt.show()

In [10]:

# 1. Define variable principal (variable objetivo)


target = "Q_observado"

In [12]:

# 2. Define la función objetivo


from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [13]:

data_caudales.head()
Out[13]:

Q_simulado Q_observado

0 22.243 25.360

1 22.214 23.394

2 22.263 23.781

3 22.440 23.716

4 22.842 24.367

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 2/12
5/9/2020 rn_caudales - Jupyter Notebook

In [14]:

# Gráfico de 2 variables
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plt.plot(data_caudales.Q_simulado, marker='x', linestyle=':', color='b', label = "Q_simulad
plt.plot(data_caudales.Q_observado, marker='*', linestyle='-', color='g', label = "Q_observ
plt.grid(True) # Activa cuadrícula del gráfico pero no se muestra
plt.legend()
Out[14]:

<matplotlib.legend.Legend at 0x1df06dec190>

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 3/12
5/9/2020 rn_caudales - Jupyter Notebook

In [15]:

# 3. Dividimos la data en dos subconjuntos:


# X : parámetros del modelo (TODOS menos tu varíable principal)
# Y : variable principal
# train: data de entrenamiento (~70%)
# test : data de prueba (~30%)

from sklearn.model_selection import train_test_split # Función para dividir la data


train,test = train_test_split(data_caudales,test_size = 0.3,random_state = 123)

#se separar 70% para entrenamiento


train_X = train[[x for x in train.columns if x not in [target] ]]
train_Y = train[target]

#para probar el 30% de los valores


test_X = test[[x for x in test.columns if x not in [target] ]]
test_Y = test[target]

In [16]:

train_X.head()
Out[16]:

Q_simulado

169 29.031

801 22.000

1008 8.744

1066 14.303

274 24.586

In [17]:

train_Y.head()
Out[17]:

169 24.629
801 24.170
1008 9.006
1066 14.008
274 32.606
Name: Q_observado, dtype: float64

In [18]:

train_X.shape
Out[18]:

(851, 1)

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 4/12
5/9/2020 rn_caudales - Jupyter Notebook

In [12]:

test_X.head()
Out[12]:

Q_simulado

542 15.874

569 19.643

891 25.248

920 30.599

883 42.823

In [20]:

def model(algorithm, dtrainx, dtrainy, dtestx, dtesty, of_type):


alg_name = str(algorithm)
alg_name = alg_name[0:5]

print (algorithm)
print ("===============")
algorithm.fit(dtrainx,dtrainy)
prediction = algorithm.predict(dtestx)
print ("RMSE :", np.sqrt(mean_squared_error(dtesty, prediction)) )
prediction = pd.DataFrame(prediction)
cross_val = cross_val_score(algorithm,dtrainx,dtrainy,cv=20,scoring="neg_mean_squared_e
cross_val = cross_val.ravel()
print ("Validación cruzada")
print ("===============")
print ("cv-mean :",cross_val.mean())
print ("cv-std :",cross_val.std())
print ("cv-max :",cross_val.max())
print ("cv-min :",cross_val.min())

plt.figure(figsize=(10,35))
plt.subplot(211)

testy = dtesty.reset_index()[target]

ax = testy.plot(label="Caudal Observado",figsize=(12,13),linewidth=2, linestyle='-',col


ax = prediction[0].plot(label = "Caudal RNA",figsize=(12,13),linewidth=2, linestyle='-'

plt.legend(loc="best")
plt.title("Caudal Observado vs Caudal RNA ")
plt.xlabel("Indice")
plt.ylabel("Valores")
plt.grid(True) # Activa cuadrícula del gráfico pero no se muestra

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 5/12
5/9/2020 rn_caudales - Jupyter Notebook

In [22]:

# Regresión Lineal
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
model(lr,train_X,train_Y,test_X,test_Y,"coef")
LinearRegression()
===============
RMSE : 4.546930060654723
Validación cruzada
===============
cv-mean : -20.832059607668857
cv-std : 8.328806583173176
cv-max : -7.685452152252439
cv-min : -39.97126907544244

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 6/12
5/9/2020 rn_caudales - Jupyter Notebook

In [23]:

# Regresión de Lasso
from sklearn.linear_model import Ridge,Lasso
ls = Lasso()
model(ls,train_X,train_Y,test_X,test_Y,"coef")
Lasso()
===============
RMSE : 4.554992299413336
Validación cruzada
===============
cv-mean : -20.83846304999165
cv-std : 8.434246103606126
cv-max : -7.703670180417129
cv-min : -40.40778511526412

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 7/12
5/9/2020 rn_caudales - Jupyter Notebook

In [24]:

# Regresión de Ridge
rigde = Ridge()
model(rigde,train_X,train_Y,test_X,test_Y,"coef")
Ridge()
===============
RMSE : 4.5469383538694155
Validación cruzada
===============
cv-mean : -20.83205750902434
cv-std : 8.328931074527857
cv-max : -7.685468099957004
cv-min : -39.971781602554316

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 8/12
5/9/2020 rn_caudales - Jupyter Notebook

In [25]:

# Decision Tree Regressor


from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor()
model(dtr,train_X,train_Y,test_X,test_Y,"feat")
DecisionTreeRegressor()
===============
RMSE : 5.972632273890786
Validación cruzada
===============
cv-mean : -37.6498808329759
cv-std : 15.085623642571658
cv-max : -10.134598357142854
cv-min : -64.8771887219476

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 9/12
5/9/2020 rn_caudales - Jupyter Notebook

In [26]:

# Gradient Boosting Regressor


from sklearn.ensemble import GradientBoostingRegressor
gbr = GradientBoostingRegressor()
model(gbr,train_X,train_Y,test_X,test_Y,"feat")
GradientBoostingRegressor()
===============
RMSE : 4.455173551504094
Validación cruzada
===============
cv-mean : -21.14999130653242
cv-std : 10.37021145188244
cv-max : -6.576186574861182
cv-min : -44.972822465209674

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 10/12
5/9/2020 rn_caudales - Jupyter Notebook

In [27]:

# Random Forest Regressor


from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
model(rf,train_X,train_Y,test_X,test_Y,"feat")
RandomForestRegressor()
===============
RMSE : 5.014309748582894
Validación cruzada
===============
cv-mean : -27.808089385925705
cv-std : 11.790529652061377
cv-max : -6.6742833717984364
cv-min : -51.611068145283646

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 11/12
5/9/2020 rn_caudales - Jupyter Notebook

In [28]:

# Ada Boost Regressor


from sklearn.ensemble import AdaBoostRegressor
adb = AdaBoostRegressor()
model(adb,train_X,train_Y,test_X,test_Y,"feat")
AdaBoostRegressor()
===============
RMSE : 4.515763876587429
Validación cruzada
===============
cv-mean : -22.634972822076865
cv-std : 7.435903127633061
cv-max : -9.728850478085517
cv-min : -40.99896614926056

localhost:8888/notebooks/python_uni_/_pdise_havanzada/rn_caudales.ipynb# 12/12

También podría gustarte