Machine Learning - Code - Jupiter
Machine Learning - Code - Jupiter
In [144]:
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import sklearn
In [3]:
train_path='/Users/stefanoskarageorgiou/Desktop/train1.csv'
train=pd.read_csv(train_path)
In [4]:
train.head()
Out[4]:
s × 81 columns
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 1/14
29/04/2018 PROJECT_B_FINAL
In [5]:
train.describe()
Out[5]:
8 rows × 38 columns
In [6]:
train.isnull().sum().sort_values(ascending=False)[:20]
Out[6]:
PoolQC 1453
MiscFeature 1406
Alley 1369
Fence 1179
FireplaceQu 690
LotFrontage 259
GarageCond 81
GarageType 81
GarageYrBlt 81
GarageFinish 81
GarageQual 81
BsmtExposure 38
BsmtFinType2 38
BsmtFinType1 37
BsmtCond 37
BsmtQual 37
MasVnrArea 8
MasVnrType 8
Electrical 1
Utilities 0
dtype: int64
In [7]:
train['LotFrontage'].fillna((train['LotFrontage'].mean()), inplace=True)
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 2/14
29/04/2018 PROJECT_B_FINAL
In [8]:
train.Alley=train.Alley.fillna(value="NA")
In [9]:
train['MasVnrArea'].fillna((train['MasVnrArea'].mean()), inplace=True)
In [10]:
train.MasVnrType=train.MasVnrType.fillna(value="NA")
In [11]:
train.BsmtQual=train.BsmtQual.fillna(value="NA")
In [12]:
train.BsmtCond=train.BsmtCond.fillna(value="NA")
In [13]:
train.BsmtExposure=train.BsmtExposure.fillna(value="NA")
In [14]:
train.BsmtFinType1=train.BsmtFinType1.fillna(value="NA")
In [15]:
train.BsmtFinType2=train.BsmtFinType2.fillna(value="NA")
In [16]:
train.Electrical=train.Electrical.fillna(value="NA")
In [17]:
train.FireplaceQu=train.FireplaceQu.fillna(value="NA")
In [18]:
train.GarageType=train.GarageType.fillna(value="NA")
In [19]:
train.GarageYrBlt=train.GarageYrBlt.fillna(value=train.YearBuilt)
In [20]:
train.GarageFinish=train.GarageFinish.fillna(value='NA')
In [21]:
train.GarageQual=train.GarageQual.fillna(value='NA')
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 3/14
29/04/2018 PROJECT_B_FINAL
In [22]:
train.GarageCond=train.GarageCond.fillna(value='NA')
In [23]:
train.PoolQC=train.PoolQC.fillna(value='NA')
In [24]:
train.Fence=train.Fence.fillna(value='NA')
In [25]:
train.MiscFeature=train.MiscFeature.fillna(value='NA')
In [26]:
train.isnull().sum().sort_values(ascending=False)[:20]
Out[26]:
SalePrice 0
Heating 0
RoofStyle 0
RoofMatl 0
Exterior1st 0
Exterior2nd 0
MasVnrType 0
MasVnrArea 0
ExterQual 0
ExterCond 0
Foundation 0
BsmtQual 0
BsmtCond 0
BsmtExposure 0
BsmtFinType1 0
BsmtFinSF1 0
BsmtFinType2 0
BsmtFinSF2 0
BsmtUnfSF 0
YearRemodAdd 0
dtype: int64
In [27]:
y=train.SalePrice
In [28]:
X=train.drop('SalePrice',axis=1)
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 4/14
29/04/2018 PROJECT_B_FINAL
In [29]:
In [30]:
X.ExterQual = X.ExterQual.map(mapping1)
X.ExterCond = X.ExterCond.map(mapping1)
X.BsmtQual = X.BsmtQual.map(mapping1)
X.BsmtCond = X.BsmtCond.map(mapping1)
X.HeatingQC = X.HeatingQC.map(mapping1)
X.BsmtExposure = X.BsmtExposure.map(mapping2)
X.BsmtFinType1 = X.BsmtFinType1.map(mapping3)
X.BsmtFinType2 = X.BsmtFinType2.map(mapping3)
X.PoolQC=X.PoolQC.map(mapping1)
X.GarageCond=X.GarageCond.map(mapping1)
X.GarageQual=X.GarageQual.map(mapping1)
X.GarageFinish=X.GarageFinish.map(mapping4)
X.FireplaceQu=X.FireplaceQu.map(mapping1)
X.KitchenQual=X.KitchenQual.map(mapping1)
In [149]:
plt.plot(train['SalePrice'])
plt.show()
In [31]:
X.PoolQC.unique()
Out[31]:
array([0, 5, 2, 4])
In [32]:
X.shape
Out[32]:
(1460, 80)
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 5/14
29/04/2018 PROJECT_B_FINAL
In [33]:
X=pd.get_dummies(X)
In [34]:
X.shape
Out[34]:
(1460, 246)
In [35]:
In [41]:
lr = LinearRegression()
lr.fit(X, y)
Out[41]:
In [45]:
import math
In [46]:
In [47]:
Out[47]:
In [48]:
lr=LinearRegression()
lr.fit(X_tr,y_tr)
Out[48]:
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 6/14
29/04/2018 PROJECT_B_FINAL
In [49]:
y_pred=lr.predict(X_ts)
y_true=y_ts.as_matrix()
res=rmsle(y_true,y_pred)
In [50]:
res
Out[50]:
0.23831635704788817
In [134]:
plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8,square=True);
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 7/14
29/04/2018 PROJECT_B_FINAL
In [141]:
Out[141]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21a7b890>
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 8/14
29/04/2018 PROJECT_B_FINAL
In [145]:
sns.distplot(train['OverallQual'], fit=stats.norm);
plt.figure()
stats.probplot(train['SalePrice'],plot=plt)
Out[145]:
In [51]:
#improve_1
In [ ]:
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 9/14
29/04/2018 PROJECT_B_FINAL
In [119]:
corr = train.corr()
(SalePrice 1.000000
OverallQual 0.790982
GrLivArea 0.708624
GarageCars 0.640409
GarageArea 0.623431
TotalBsmtSF 0.613581
1stFlrSF 0.605852
FullBath 0.560664
TotRmsAbvGrd 0.533723
YearBuilt 0.522897
GarageYrBlt 0.508043
YearRemodAdd 0.507101
MasVnrArea 0.475241
Fireplaces 0.466929
BsmtFinSF1 0.386420
LotFrontage 0.334901
WoodDeckSF 0.324413
2ndFlrSF 0.319334
OpenPorchSF 0.315856
HalfBath 0.284108
LotArea 0.263843
BsmtFullBath 0.227122
BsmtUnfSF 0.214479
BedroomAbvGr 0.168213
ScreenPorch 0.111447
Name: SalePrice, dtype: float64, '\n')
Fireplaces 0.466929
BsmtFinSF1 0.386420
LotFrontage 0.334901
WoodDeckSF 0.324413
2ndFlrSF 0.319334
OpenPorchSF 0.315856
HalfBath 0.284108
LotArea 0.263843
BsmtFullBath 0.227122
BsmtUnfSF 0.214479
BedroomAbvGr 0.168213
ScreenPorch 0.111447
PoolArea 0.092404
MoSold 0.046432
3SsnPorch 0.044584
BsmtFinSF2 -0.011378
BsmtHalfBath -0.016844
MiscVal -0.021190
Id -0.021917
LowQualFinSF -0.025606
YrSold -0.028923
OverallCond -0.077856
MSSubClass -0.084284
EnclosedPorch -0.128578
KitchenAbvGr -0.135907
Name: SalePrice, dtype: float64
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 10/14
29/04/2018 PROJECT_B_FINAL
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [120]:
X_1=train.drop(['SalePrice','MiscVal','BsmtFinSF2','BsmtHalfBath','Id','LowQualFinSF
In [121]:
In [122]:
Out[122]:
In [123]:
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 11/14
29/04/2018 PROJECT_B_FINAL
In [124]:
X_1.ExterQual = X_1.ExterQual.map(mapping1)
X_1.ExterCond = X_1.ExterCond.map(mapping1)
X_1.BsmtQual = X_1.BsmtQual.map(mapping1)
X_1.BsmtCond = X_1.BsmtCond.map(mapping1)
X_1.HeatingQC = X_1.HeatingQC.map(mapping1)
X_1.BsmtExposure = X_1.BsmtExposure.map(mapping2)
X_1.BsmtFinType1 = X_1.BsmtFinType1.map(mapping3)
X_1.BsmtFinType2 = X_1.BsmtFinType2.map(mapping3)
X_1.PoolQC=X_1.PoolQC.map(mapping1)
X_1.GarageCond=X_1.GarageCond.map(mapping1)
X_1.GarageQual=X_1.GarageQual.map(mapping1)
X_1.GarageFinish=X_1.GarageFinish.map(mapping4)
X_1.FireplaceQu=X_1.FireplaceQu.map(mapping1)
X_1.KitchenQual=X_1.KitchenQual.map(mapping1)
In [125]:
In [126]:
lr=LinearRegression()
lr.fit(X_1_tr,y_tr)
Out[126]:
In [127]:
y_pred=lr.predict(X_1_ts)
y_true=y_ts.as_matrix()
res=rmsle(y_true,y_pred)
In [128]:
res
Out[128]:
0.20235051475055066
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 12/14
29/04/2018 PROJECT_B_FINAL
In [150]:
actual_values = y_ts
plt.scatter(y_pred, actual_values, alpha=0.75, color='b')
plt.xlabel('Predicted Price')
plt.ylabel('Actual Price')
plt.title('Linear Regression Model')
plt.show()
In [87]:
# improve 2
In [129]:
In [131]:
/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/coordinate
_descent.py:491: ConvergenceWarning: Objective did not converge. You m
ight want to increase the number of iterations. Fitting data with very
small alpha may cause precision problems.
ConvergenceWarning)
Out[131]:
In [132]:
y_pred=lass.predict(X_1_ts)
y_true=y_ts.as_matrix()
res=rmsle(y_true,y_pred)
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 13/14
29/04/2018 PROJECT_B_FINAL
In [133]:
res
Out[133]:
0.18142288750842797
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
http://localhost:8888/notebooks/PROJECT_B_FINAL.ipynb# 14/14