Chapter 5
Chapter 5
// use client
// @ts-nocheck
import Head from "next/head";
import Image from "next/image";
import Link from "next/link";
import { useState } from "react";
return (
<>
<Head>
<title>Estate AI</title>
<meta
name="description"
content="Looking for the best rooms to rent? Let the AI help
you with your customized preferences!"
/>
<meta name="viewport" content="width=device-width, initial-
scale=1" />
<link rel="icon" href="/favicon.ico" />
</Head>
<main>
{/* Header */}
<header className="flex justify-between bg-gradient-to-b
from-black to-transparent px-10 py-4 absolute top-0 left-0 right-0 z-
10">
<div className="header_left flex md:w-[50%] w-full justify-
center md:justify-start items-center gap-4">
<Image src="/logo.png" alt="Estate AI" width={40}
height={40} />
<p
className="text-white font-bold text-2xl
"
>
Estate AI
</p>
</div>
<div className="header_right md:flex hidden items-center
justify-end gap-10 w-[50%]">
<Link
className="text-white text-lg hover:text-blue-500
hover:cursor-pointer"
href="/"
>
<p>Home</p>
</Link>
<Link
className="text-white text-lg hover:text-blue-500
hover:cursor-pointer"
href="/about"
>
<p>About</p>
</Link>
<Link
className="text-white text-lg hover:text-blue-500
hover:cursor-pointer"
href="/about"
>
<p>GitHub</p>
</Link>
</div>
</header>
CSS:
@tailwind base;
@tailwind components;
@tailwind utilities;
html {
scroll-behavior: smooth;
}
/* custom scrollbar */
::-webkit-scrollbar {
width: 20px;
}
::-webkit-scrollbar-track {
background-color: black;
}
::-webkit-scrollbar-thumb {
background-color: #d6dee1;
border-radius: 20px;
border: 6px solid transparent;
background-clip: content-box;
}
::-webkit-scrollbar-thumb:hover {
background-color: #a8bbbf;
}
Backend:
App.py:
from flask import Flask, request, jsonify
from pydantic import BaseModel
import pandas as pd
import joblib
from flask_cors import CORS, cross_origin
app = Flask(__name__)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
class RequestFeatures(BaseModel):
bedroom: float
layout_type: int
area: float
furnish_type: int
bathroom: float
city: int
agent: int
builder: int
owner: int
apartment: int
independent_floor: int
independent_house: int
penthouse: int
studio_apartment: int
villa: int
@app.route('/', methods=['POST'])
@cross_origin()
def predict():
req = RequestFeatures(**request.json)
df = pd.DataFrame([req.dict()], columns=req.dict().keys())
try:
print("Values in payload", req.dict(), [list(req.dict().values())])
res = model.predict([list(req.dict().values())])
return jsonify({"prediction": res[0]})
except Exception as e:
return jsonify({"error": str(e)})
details.ipynb:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#load the data
init_df = pd.read_csv('data.csv')
init_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193011 entries, 0 to 193010
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 seller_type 193011 non-null object
1 bedroom 193011 non-null float64
2 layout_type 193011 non-null object
3 property_type 193011 non-null object
4 locality 193011 non-null object
5 price 193011 non-null float64
6 area 193011 non-null float64
7 furnish_type 193011 non-null object
8 bathroom 193011 non-null float64
9 city 193011 non-null object
dtypes: float64(4), object(6)
memory usage: 14.7+ MB
init_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193011 entries, 0 to 193010
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 seller_type 193011 non-null object
1 bedroom 193011 non-null float64
2 layout_type 193011 non-null object
3 property_type 193011 non-null object
4 locality 193011 non-null object
5 price 193011 non-null float64
6 area 193011 non-null float64
7 furnish_type 193011 non-null object
8 bathroom 193011 non-null float64
9 city 193011 non-null object
dtypes: float64(4), object(6)
memory usage: 14.7+ MB
# I'll be doing visualization later, as I need to learn more about it!
plt.scatter(init_df.price,init_df.city)
<matplotlib.collections.PathCollection at 0x18925fe22a0>
#! Encoding Values
# Okay so there are 6 String feilds which we need to encode
#! For values like seller_type, city where they are not related we'll
use get_dummies
#* For values like furnish_type, layout_type where they can be
arranged as rank, as BHK > RK, we will use label encoder
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
seller_type = pd.get_dummies(init_df.seller_type)
init_df = init_df.drop('seller_type', axis='columns')
init_df = pd.concat([init_df, seller_type], axis="columns")
e_property = pd.get_dummies(init_df.property_type)
init_df = init_df.drop('property_type', axis='columns')
init_df = pd.concat([init_df, e_property], axis='columns')
init_df.tail()
locality_encoder = LabelEncoder()
np.random.seed(42)
init_df.head()
bedroom layout_type locality price area furnish_type
bathroom city AGENT BUILDER OWNER Apartment
Independent Floor Independent House Penthouse Studio
Apartment Villa
0 2.0 1.0 Bodakdev 20000.0 1450.0 2.0 2.0 1.0
False False True True False False False False False
1 1.0 0.0 CG Road 7350.0 210.0 1.0 1.0 1.0
False False True False False False False True False
2 3.0 1.0 Jodhpur 22000.0 1900.0 0.0 3.0 1.0
False False True True False False False False False
3 2.0 1.0 Sanand 13000.0 1285.0 1.0 2.0 1.0
False False True False False True False False False
4 2.0 1.0 Navrangpura 18000.0 1600.0 2.0 2.0
1.0 False False True False False True False False False
# * Now we need to repeat this, for this we better write a function
# TODO - Solve this error when using functions!
# !cannot access local variable 'init_df' where it is not associated
with a value
#! Note, I'm not removing the last dummy column, let's see how it
impacts the model i.e Linear Collinearilty Error or So it's called
# ? Find a way to export all the features, with their orignal values,
encoded values and their data type.
# One way to do this is make a set of unique values, then use that
set to create a dictionary of keys with values obtained by passing
the key in the encoder.
# schema = {}
# encoded_values = {
# "layout_type": layout_encoder,
# "furnish_type": furnish_encoder,
# "city": city_encoder,
# "locality": locality_encoder
#}
# schema_df.to_csv('schema.csv', index=False)
# Renaming the rows
init_df.rename(columns={"AGENT":"agent","OWNER":"owner",
"BUILDER":"builder", "Apartment":"apartment", "Independent
Floor":"independent_floor", "Independent
House":"independent_house", "Penthouse":"penthouse", "Studio
Apartment":"studio_apartment","Villa":"villa"}, inplace=True)
from sklearn.model_selection import train_test_split
init_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193011 entries, 0 to 193010
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 bedroom 193011 non-null float64
1 layout_type 193011 non-null float64
2 locality 193011 non-null object
3 price 193011 non-null float64
4 area 193011 non-null float64
5 furnish_type 193011 non-null float64
6 bathroom 193011 non-null float64
7 city 193011 non-null float64
8 agent 193011 non-null bool
9 builder 193011 non-null bool
10 owner 193011 non-null bool
11 apartment 193011 non-null bool
12 independent_floor 193011 non-null bool
13 independent_house 193011 non-null bool
14 penthouse 193011 non-null bool
15 studio_apartment 193011 non-null bool
16 villa 193011 non-null bool
dtypes: bool(9), float64(7), object(1)
memory usage: 13.4+ MB
Y = init_df['price']
X = init_df.drop(['price', 'locality'], axis='columns')
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
# Hyperparameter tuning
#! Accuracy Before tuning 77.4%
n_estimators = [40,80,120]
max_features = [0.3, 0.6, 1.0]
max_depth = [2, 6, 8]
bootstrap = [True, False]
param_grid = {
'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'bootstrap': bootstrap
}
model = RandomForestRegressor()
best_model = rf_grid.best_estimator_
Fitting 5 folds for each of 54 candidates, totalling 270 fits
import joblib
annotated-types==0.6.0
asttokens==2.4.1
blinker==1.7.0
click==8.1.7
colorama==0.4.6
comm==0.2.2
contourpy==1.2.0
cycler==0.12.1
debugpy==1.8.1
decorator==5.1.1
executing==2.0.1
Flask==3.0.2
Flask-Cors==4.0.0
fonttools==4.49.0
ipykernel==6.29.3
ipython==8.22.2
itsdangerous==2.1.2
jedi==0.19.1
Jinja2==3.1.3
joblib==1.3.2
jupyter_client==8.6.1
jupyter_core==5.7.2
kiwisolver==1.4.5
MarkupSafe==2.1.5
matplotlib==3.8.3
matplotlib-inline==0.1.6
nest-asyncio==1.6.0
numpy==1.26.4
packaging==24.0
pandas==2.2.1
parso==0.8.3
pillow==10.2.0
platformdirs==4.2.0
prompt-toolkit==3.0.43
psutil==5.9.8
pure-eval==0.2.2
pydantic==2.6.4
pydantic_core==2.16.3
Pygments==2.17.2
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
pywin32==306
pyzmq==25.1.2
scikit-learn==1.4.1.post1
scipy==1.12.0
six==1.16.0
stack-data==0.6.3
threadpoolctl==3.3.0
tornado==6.4
traitlets==5.14.2
typing_extensions==4.10.0
tzdata==2024.1
wcwidth==0.2.13
Werkzeug==3.0.1
schema.json:
{
"layout_type": {
"data-type": "integer",
"Encoding": {
"BHK": "1",
"RK": "0"
}
},
"furnish_type": {
"data-type": "integer",
"Encoding": {
"Furnished": "0",
"Semi-Furnished": "1",
"Unfurnished": "2"
}
}
}