# IMPORTING PACKAGES
import pandas as pd # data processing
import numpy as np # working with arrays
import matplotlib.pyplot as plt # visualization
import seaborn as sb # visualization
from termcolor import colored as cl # text customization
from sklearn.model_selection import train_test_split # data split
from sklearn.linear_model import LinearRegression # OLS algorithm
from sklearn.linear_model import Ridge # Ridge algorithm
from sklearn.linear_model import Lasso # Lasso algorithm
from sklearn.linear_model import BayesianRidge # Bayesian algorithm
from sklearn.linear_model import ElasticNet # ElasticNet algorithm
from sklearn.metrics import explained_variance_score as evs # evaluation metric
from sklearn.metrics import r2_score as r2 # evaluation metric
sb.set_style('whitegrid') # plot style
plt.rcParams['figure.figsize'] = (20, 10) # plot size
importing_packages.py hosted with by GitHub view raw
# IMPORTING DATA
df = pd.read_csv('house.csv')
df.set_index('Id', inplace = True)
df.head(5)
importing_data.py hosted with by GitHub view raw
df.dropna(inplace = True)
print(cl(df.isnull().sum(), attrs = ['bold']))
dropna.py hosted with by GitHub view raw
df.describe()
describe.py hosted with by GitHub view raw
print(cl(df.dtypes, attrs = ['bold']))
dtypes.py hosted with by GitHub view raw
df['MasVnrArea'] = pd.to_numeric(df['MasVnrArea'], errors = 'coerce')
df['MasVnrArea'] = df['MasVnrArea'].astype('int64')
print(cl(df.dtypes, attrs = ['bold']))
dtype_convert.py hosted with by GitHub view raw
sb.heatmap(df.corr(), annot = True, cmap = 'magma')
plt.savefig('heatmap.png')
plt.show()
heatmap.py hosted with by GitHub view raw