0% found this document useful (0 votes)

63 views7 pages

New Opendocument Text

Uploaded by

api-635142331

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as ODT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

63 views7 pages

New Opendocument Text

Uploaded by

api-635142331

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as ODT, PDF, TXT or read online on Scribd

You are on page 1/ 7

# -- coding: utf-8 --

"""383project.ipynb

Automatically generated by Colaboratory.

Original file is located at

https://colab.research.google.com/drive/1Qu-fQnX2V4vOJyVDnFJcBTT8aokepIfW

# Home Price Predictions

Authors: Mike Divine, Kenneth Ao, Russell Frost
---

Our goal is to build a system to predict home prices in Placer County based on the home sales data in
the same County in 2023.
"""

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.ticker as mticker
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeClassifier, export_graphviz, DecisionTreeRegressor
import graphviz

#dataset

url = 'https://drive.google.com/file/d/1Y8OEKy6qnbGYbldcojWVLo1r4WIZHqx7/view'
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
df = pd.read_csv(path)

#predict 'cprice' == close price

#predictors that may have correlation: 'bedrooms', 'fbathrooms', 'pbathrooms', 'lsacres', 'lssqft', 'sqft',
'ybd', 'efeatures', 'fireplaces', 'ffireplace', 'garage', 'parking', levels', 'pool', fpool', 'patio'

predictors = ['bedrooms','sqft','fbathrooms','pbathrooms','ybd']
target = 'cprice'
closing_price = df['cprice']

#change strings to floats, rounds down

df['bedrooms'] = df['bedrooms'].astype(str).apply(lambda x: int(x[0]))

# Convert 'Under Construction' and 'New' to 2023

df['ybd'] = df['ybd'].replace(['Under Construction', 'New', 'Unknown'], '2023')
# Convert the 'ybd' column to float
df['ybd'] = df['ybd'].astype(float)

# function to format prices

def price_formatter(x, pos):
return '${:,.0f}'.format(x)

X = df[predictors].values
y = df[target].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

regr = LinearRegression()
regr.fit(X_train, y_train)

predictions = regr.predict(X_test)

print(np.around(predictions[:10]))
print(np.around(y_test[:10]))

mse = mean_squared_error(y_test, predictions)

rmse = np.sqrt(mse)
print("Root Mean Squared Error:", round(rmse,2))

plt.scatter(predictions, y_test, color='blue', alpha=0.5)

plt.xlabel('Predicted Closing Price')
plt.ylabel('Actual Closing Price')
plt.title('Actual vs Predicted Closing Price')

formatter = mticker.FuncFormatter(price_formatter)
plt.gca().yaxis.set_major_formatter(formatter)
plt.gca().xaxis.set_major_formatter(formatter)
plt.tick_params(axis='x', labelrotation=45)

# Setting x-axis ticks at increments of 200,000

plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(200000))

# Setting y-axis ticks at increments of 200,000

plt.gca().yaxis.set_major_locator(mticker.MultipleLocator(200000))

# Setting the limits of x-axis and y-axis

plt.xlim(0, 2000000)
plt.ylim(0, 2000000)
plt.show()

"""#Exploring The Data With Visializations

---

"""

closing_price = df['cprice']
listed_price = df['lprice']

model = LinearRegression()
model.fit(closing_price.values.reshape(-1,1), listed_price)
predictions_cprice_vs_lprice = model.predict(closing_price.values.reshape(-1,1))

plt.scatter(closing_price, listed_price, color='blue', alpha = 0.5)

plt.title('Closing Price vs Listed Price')
plt.xlabel('Closing Price')
plt.ylabel('Listing Price')
plt.plot(closing_price, predictions_cprice_vs_lprice, color='red', label='Trend')

formatter = mticker.FuncFormatter(price_formatter)
plt.gca().yaxis.set_major_formatter(formatter)
plt.gca().xaxis.set_major_formatter(formatter)
plt.tick_params(axis='x', labelrotation=45)

plt.legend()

closing_price = df['cprice']
number_bedrooms = df['bedrooms']

model = LinearRegression()
model.fit(closing_price.values.reshape(-1,1), number_bedrooms)
predictions_cprice_vs_bed = model.predict(closing_price.values.reshape(-1,1))

plt.scatter(number_bedrooms, closing_price, color='blue', alpha = 0.5)

plt.title('Closing Price vs Number of Bedrooms')
plt.ylabel('Closing Price')
plt.xlabel('Bedrooms')
plt.xticks(range(10))

formatter = mticker.FuncFormatter(price_formatter)
plt.gca().yaxis.set_major_formatter(formatter)

plt.plot(predictions_cprice_vs_bed, closing_price, color='red', label='Trend')

plt.legend()

closing_price = df['cprice']
lot_size_acer = df['lsacres']

model = LinearRegression()
model.fit(closing_price.values.reshape(-1,1), lot_size_acer)
predictions_cprice_vs_acer = model.predict(closing_price.values.reshape(-1,1))

plt.scatter(closing_price, lot_size_acer, color='blue', alpha = 0.5)

plt.title('Closing Price vs Lot Size(acer)')
plt.xlabel('Closing Price')
plt.ylabel('Lot Size(acer)')
plt.plot(closing_price, predictions_cprice_vs_acer, color='red', label='Trend')
tick_marks = np.arange(0,200+1,step=10)
plt.yticks(tick_marks)

formatter = mticker.FuncFormatter(price_formatter)
plt.gca().xaxis.set_major_formatter(formatter)
plt.tick_params(axis='x', labelrotation=45)

plt.legend()

closing_price = df['cprice']
lot_size_acer = df['lsacres']

model = LinearRegression()
model.fit(closing_price.values.reshape(-1,1), lot_size_acer)
predictions_cprice_vs_acer = model.predict(closing_price.values.reshape(-1,1))

plt.scatter(lot_size_acer, closing_price, color='blue', alpha = 0.25)

plt.title('Closing Price vs Lot Size(acer)')
plt.ylabel('Closing Price')
plt.xlabel('Lot Size(acer)')
plt.plot(predictions_cprice_vs_acer, closing_price, color='red', label='Trend')
plt.xlim(0, 5)
plt.ylim(0, 2000000)
tick_marks = np.arange(0,5+1,step=1)
plt.xticks(tick_marks)

formatter = mticker.FuncFormatter(price_formatter)
plt.gca().yaxis.set_major_formatter(formatter)

plt.legend()

"""# Testing Different Models

DecisionTreeRegressor
"""

predictors = ['bedrooms', 'fbathrooms', 'pbathrooms']

X = df[predictors].values
y = df['cprice'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

reg = DecisionTreeRegressor(max_depth=3, random_state=42)

reg.fit(X_train, y_train)
prediction = reg.predict(X_test)

mse = mean_squared_error(prediction, y_test)

rmse = np.sqrt(mse)
print(rmse)

dot_data = export_graphviz(reg, precision=2,

feature_names=predictors,
proportion=True,
filled=True, rounded=True)
graph = graphviz.Source(dot_data)
graph

"""KNN Regression"""

predictors = ['bedrooms', 'fbathrooms', 'pbathrooms']

X = df[predictors].values
y = df['cprice'].values

X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

ks = np.arange(1, 30, 2)
for k in ks:
knn = KNeighborsRegressor(n_neighbors=k)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)

root_mse = np.sqrt(mean_squared_error(predictions, y_test))

print('Test RMSE when k = {}: {:.2f}'.format(k, root_mse))

"""PolynomialFeatures

"""

pf = PolynomialFeatures(degree=3, include_bias=False)
pf.fit(X)
X_poly = pf.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

polyLR = LinearRegression()
polyKNNR = KNeighborsRegressor()

polyLR.fit(X_train, y_train)
polyKNNR.fit(X_train, y_train)

polyLR_predictions = polyLR.predict(X_test)
polyKNNR_predictions = polyKNNR.predict(X_test)

polyLR_rmse = np.sqrt(mean_squared_error(polyLR_predictions, y_test))

polyKNNR_rmse = np.sqrt(mean_squared_error(polyKNNR_predictions, y_test))

print('Test polyLR RMSE: {:.2f}'.format(polyLR_rmse))

print('Test polyKNNR RMSE: {:.2f}'.format(polyKNNR_rmse))

sns.scatterplot(x=y_test, y=polyLR_predictions)
min_value = min(np.min(y_test), np.min(polyLR_predictions))
max_value = max(np.max(y_test), np.max(polyLR_predictions))
plt.plot([min_value, max_value], [min_value, max_value], linewidth=2, linestyle='dashed',
color='black')
plt.show()

sns.scatterplot(x=y_test, y=polyKNNR_predictions)
min_value = min(np.min(y_test), np.min(polyKNNR_predictions))
max_value = max(np.max(y_test), np.max(polyKNNR_predictions))
plt.plot([min_value, max_value], [min_value, max_value], linewidth=2, linestyle='dashed',
color='black')

"""Forward Selection"""

#using only float columns, remove lprice dom oprice because they are not property attributes
drop_columns = ['cprice', 'lnumber', 'ldate' , 'omd','pdate', 'cdate',
'street','city','zip','cooling','heating','efeatures','ffireplace','parking','levels','pool','fpool','patio','style','subd
ivision','premarks', 'lprice','dom','oprice']
predictors = df.drop(columns=drop_columns).columns

X = df[predictors]
y = df['cprice'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

remaining = list(range(X_train.shape[1]))
selected = []
curr_features = []
rmse_min = -1
i_min = -1

#n == the number of features to look for

n=5
while len(selected) < n:
# find the single features that works best in conjunction
# with the already selected features
rmse_min = 1e7
for i in remaining:
# YOUR CODE HERE
regr = LinearRegression()
curr_features = selected + [i]

#get train and test using only current column + selected columns
X_curr_train = X_train[:, curr_features]
X_curr_test = X_test[:, curr_features]

regr.fit(X_curr_train, y_train)
X_curr_prediction = regr.predict(X_curr_test)

rmse = np.sqrt(mean_squared_error(y_test, X_curr_prediction))

if rmse < rmse_min:

rmse_min = rmse
i_min = i

remaining.remove(i_min)
selected.append(i_min)
print('best feature {}: {}'.format(len(selected), X.columns[i_min]))
print('num features: {}; rmse: {:.2f}'.format(len(selected), rmse_min))

Operating Manual - AGM-13H
No ratings yet
Operating Manual - AGM-13H
181 pages
PGIM Setup Admin
No ratings yet
PGIM Setup Admin
324 pages
Part 3 of Negida Handbook of Clinical Research
No ratings yet
Part 3 of Negida Handbook of Clinical Research
147 pages
Case-Study-Dos - 19070123
No ratings yet
Case-Study-Dos - 19070123
13 pages
Fire Alarm System - Notifier PDF
No ratings yet
Fire Alarm System - Notifier PDF
19 pages
Turbo HD DVR V3.4.70 Build160708 Release Notes - External
No ratings yet
Turbo HD DVR V3.4.70 Build160708 Release Notes - External
7 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML Project Part A 1
No ratings yet
ML Project Part A 1
6 pages
Big Data Analytics - Applications, Challenges & Future Directions
No ratings yet
Big Data Analytics - Applications, Challenges & Future Directions
6 pages
MIS - Deliberate Software Attack
No ratings yet
MIS - Deliberate Software Attack
5 pages
Classical IPC Problems Reader's and Writer Problem
No ratings yet
Classical IPC Problems Reader's and Writer Problem
79 pages
More About Spreadsheet Errors and Fixes
100% (1)
More About Spreadsheet Errors and Fixes
3 pages
1 - Lab Manual (ML)
No ratings yet
1 - Lab Manual (ML)
42 pages
Coding Question
No ratings yet
Coding Question
6 pages
Real Estate Price Prediction Model
No ratings yet
Real Estate Price Prediction Model
3 pages
Circuits and Systems For Efficient Portable-to-Portable Wireless Charging
No ratings yet
Circuits and Systems For Efficient Portable-to-Portable Wireless Charging
125 pages
ML Practical 04
No ratings yet
ML Practical 04
19 pages
USA Real Estate Price Prediction Using Decision Tree Regressor, and AdaBoost Regressor
No ratings yet
USA Real Estate Price Prediction Using Decision Tree Regressor, and AdaBoost Regressor
14 pages
Scala Unit 1
No ratings yet
Scala Unit 1
60 pages
Paper HM
No ratings yet
Paper HM
50 pages
Lunch Box Switch - Seven Segment Display (CC and CA) : Lab Activity - 7
No ratings yet
Lunch Box Switch - Seven Segment Display (CC and CA) : Lab Activity - 7
7 pages
Report
No ratings yet
Report
40 pages
ML Record
No ratings yet
ML Record
21 pages
Set 2
No ratings yet
Set 2
19 pages
ML Record
No ratings yet
ML Record
19 pages
Document From Jahnavi
No ratings yet
Document From Jahnavi
20 pages
MC Co Implementation Guide
No ratings yet
MC Co Implementation Guide
53 pages
Numeric
No ratings yet
Numeric
20 pages
Deber
No ratings yet
Deber
23 pages
House Pricing
No ratings yet
House Pricing
15 pages
Ash Regression
No ratings yet
Ash Regression
11 pages
Project
No ratings yet
Project
10 pages
IoT Task4 21BEC0384
No ratings yet
IoT Task4 21BEC0384
9 pages
2024 Instorescreen Portfolio Brochure WW en
No ratings yet
2024 Instorescreen Portfolio Brochure WW en
35 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
20 pages
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
No ratings yet
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
20 pages
T2 Summary VHA
No ratings yet
T2 Summary VHA
14 pages
Machine Learning Presentaion
No ratings yet
Machine Learning Presentaion
15 pages
RDBMS - Muj
No ratings yet
RDBMS - Muj
34 pages
ML
No ratings yet
ML
17 pages
DL Assignment 1ms24rai03
No ratings yet
DL Assignment 1ms24rai03
10 pages
Product Data Sheet Rosemount 708 Wireless Acoustic Transmitter en 88228
No ratings yet
Product Data Sheet Rosemount 708 Wireless Acoustic Transmitter en 88228
12 pages
Data Sheet Acronis SCS Cyber Backup 12.5 Hardened Edition EN US 230627
No ratings yet
Data Sheet Acronis SCS Cyber Backup 12.5 Hardened Edition EN US 230627
2 pages
SiddharthShah 1032221195 DivC 50 DL LabAssignment2
No ratings yet
SiddharthShah 1032221195 DivC 50 DL LabAssignment2
7 pages
Predicting House Prices
No ratings yet
Predicting House Prices
9 pages
Back To Normal? or Will 5G Push The Envelope?: Industry Analysis #3 2021
No ratings yet
Back To Normal? or Will 5G Push The Envelope?: Industry Analysis #3 2021
26 pages
Regression Algorithm
No ratings yet
Regression Algorithm
9 pages
Housepriceprediction ML 221104055342 Fb5109ae
No ratings yet
Housepriceprediction ML 221104055342 Fb5109ae
17 pages
ML Regression
No ratings yet
ML Regression
9 pages
ML Manual
No ratings yet
ML Manual
9 pages
Shub Neet DT
No ratings yet
Shub Neet DT
12 pages
Explain Me Every Code Written in It With Deep Know
No ratings yet
Explain Me Every Code Written in It With Deep Know
7 pages
Python
No ratings yet
Python
4 pages
House Price Prediction Using Machine Learning in Python
No ratings yet
House Price Prediction Using Machine Learning in Python
13 pages
Python File
No ratings yet
Python File
5 pages
Architecture Outline - Self Billing and Reclassification in AIM Telenor
No ratings yet
Architecture Outline - Self Billing and Reclassification in AIM Telenor
16 pages
AIML
No ratings yet
AIML
5 pages
Ads Lab8
No ratings yet
Ads Lab8
5 pages
Learning Activity Sheet Empowerment Technologies-Senior High School
No ratings yet
Learning Activity Sheet Empowerment Technologies-Senior High School
6 pages
Price Prediction
No ratings yet
Price Prediction
4 pages
Expt 7
No ratings yet
Expt 7
3 pages
Unit 3 5
No ratings yet
Unit 3 5
4 pages
DA Lab2
No ratings yet
DA Lab2
5 pages
PWC Communication Tools and B.U.D.S. (Spark Series) - Shop Manual Supplement smr2016-108
No ratings yet
PWC Communication Tools and B.U.D.S. (Spark Series) - Shop Manual Supplement smr2016-108
6 pages
Docu 4
No ratings yet
Docu 4
3 pages
2 - Linear - Regression - Multivariate - Ipynb - Colaboratory
No ratings yet
2 - Linear - Regression - Multivariate - Ipynb - Colaboratory
4 pages
2a DL
No ratings yet
2a DL
4 pages
Phase 5
No ratings yet
Phase 5
5 pages
Wa0009.
No ratings yet
Wa0009.
4 pages
Data Mining Final Assignment
No ratings yet
Data Mining Final Assignment
4 pages
PTD Lab Manual
No ratings yet
PTD Lab Manual
16 pages
Project 4 - House Price Prediction - Ipynb - Colab
No ratings yet
Project 4 - House Price Prediction - Ipynb - Colab
5 pages
Regression Dataset
No ratings yet
Regression Dataset
3 pages
Exp4 (Linear Regression)
No ratings yet
Exp4 (Linear Regression)
2 pages
Anomaly-Based IDS To Detect Attack Using Various...
No ratings yet
Anomaly-Based IDS To Detect Attack Using Various...
5 pages
DL Lab Prog 2
No ratings yet
DL Lab Prog 2
2 pages
Lesson Agenda 24, October 8th, 2020
No ratings yet
Lesson Agenda 24, October 8th, 2020
3 pages
7 A
No ratings yet
7 A
2 pages
2 Linear Regression Multivariate
No ratings yet
2 Linear Regression Multivariate
2 pages
1.3 Python As A Calculator
100% (1)
1.3 Python As A Calculator
2 pages
A
No ratings yet
A
2 pages
DSBDA Prac4 2
No ratings yet
DSBDA Prac4 2
1 page
Linear Regression - Py
No ratings yet
Linear Regression - Py
2 pages
House - Price - Ipynb - Colab
No ratings yet
House - Price - Ipynb - Colab
2 pages
Linear Regression - Jupyter Notebook
No ratings yet
Linear Regression - Jupyter Notebook
2 pages
ML Four To Eight
No ratings yet
ML Four To Eight
3 pages
Ict 11-3RD Quarter Exam
No ratings yet
Ict 11-3RD Quarter Exam
3 pages
5G in Military Usage
No ratings yet
5G in Military Usage
1 page
Entry Level Java Developer Resume Example
No ratings yet
Entry Level Java Developer Resume Example
1 page
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet