178 - Regulinear - Ipynb - Colab
178 - Regulinear - Ipynb - Colab
ipynb - Colab
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
df = pd.read_csv('Melbourne_housing_FULL.csv')
df.head()
Suburb Address Rooms Type Price Method SellerG Date Distance Postcode ... Bathroom Car Landsize Building
68 Studley
0 Abbotsford 2 h NaN SS Jellis 3/09/2016 2.5 3067.0 ... 1.0 1.0 126.0
St
85 Turner
1 Abbotsford 2 h 1480000.0 S Biggin 3/12/2016 2.5 3067.0 ... 1.0 1.0 202.0
St
25
2 Abbotsford Bloomburg 2 h 1035000.0 S Biggin 4/02/2016 2.5 3067.0 ... 1.0 0.0 156.0
St
18/659
3 Abbotsford 3 u NaN VB Rounds 4/02/2016 2.5 3067.0 ... 2.0 1.0 0.0
Victoria St
5 Charles
4 Abbotsford 3 h 1465000.0 SP Biggin 4/03/2017 2.5 3067.0 ... 2.0 0.0 134.0 1
St
5 rows × 21 columns
df.shape
df.nunique()
Suburb 351
Address 34009
Rooms 12
Type 3
Price 2871
Method 9
SellerG 388
Date 78
Distance 215
Postcode 211
Bedroom2 15
Bathroom 11
Car 15
Landsize 1684
BuildingArea 740
YearBuilt 160
CouncilArea 33
Lattitude 13402
Longtitude 14524
Regionname 8
Propertycount 342
dtype: int64
cols = ['Suburb', 'Rooms', 'Type', 'Method', 'SellerG', 'Regionname', 'Propertycount', 'Distance', 'CouncilArea', 'Bedroom2', 'Bathroom
data = df[cols]
data.head()
https://colab.research.google.com/drive/1Yf66McVrD7X9X-dy_Zl98knJxFYjDRNJ#scrollTo=8xyMNFki0r-i&printMode=true 1/3
9/23/24, 5:52 PM 178_Regulinear.ipynb - Colab
Suburb Rooms Type Method SellerG Regionname Propertycount Distance CouncilArea Bedroom2 Bathroom Car Landsize Buil
Suburb 0
Rooms 0
Type 0
Method 0
SellerG 0
Regionname 3
Propertycount 3
Distance 1
CouncilArea 3
Bedroom2 8217
Bathroom 8226
Car 8728
Landsize 0
BuildingArea 0
Price 7610
dtype: int64
data = data.dropna()
data.isna().sum()
https://colab.research.google.com/drive/1Yf66McVrD7X9X-dy_Zl98knJxFYjDRNJ#scrollTo=8xyMNFki0r-i&printMode=true 2/3
9/23/24, 5:52 PM 178_Regulinear.ipynb - Colab
Suburb 0
data = pd.get_dummies(data, drop_first=True)
Rooms
data.head() 0
Type 0
Method 0
CouncilArea_Mo
Rooms Propertycount Distance Bedroom2 Bathroom Car Landsize BuildingArea Price Suburb_Aberfeldie ...
SellerG 0 Shire
1Regionname
2 0 4019.0 2.5 2.0 1.0 1.0 202.0 160.2564 1480000.0 False ...
Propertycount
2 2 0 4019.0 2.5 2.0 1.0 0.0 156.0 79.0000 1035000.0 False ...
4 Distance
3 0 4019.0 2.5 3.0 2.0 0.0 134.0 150.0000 1465000.0 False ...
5CouncilArea
3 0 4019.0 2.5 3.0 2.0 1.0 94.0 160.2564 850000.0 False ...
6 Bedroom2
4 0 4019.0 2.5 3.0 1.0 2.0 120.0 142.0000 1600000.0 False ...
Bathroom
5 rows 0
× 712 columns
Car 0
Landsize 0
X = data.drop('Price', axis=1)
BuildingArea 0
Y = data['Price']
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.3, random_state=2)
Price 0
reg = LinearRegression().fit(train_x, train_y)
reg.score(test_x, test_y)
dtype: int64
-89343812250.58357
reg.score(train_x, train_y)
0.7007327631278367
lasso.score(test_x, test_y)
0.6815721613465073
https://colab.research.google.com/drive/1Yf66McVrD7X9X-dy_Zl98knJxFYjDRNJ#scrollTo=8xyMNFki0r-i&printMode=true 3/3