PRJ Car Price Prediction For Data Science
PRJ Car Price Prediction For Data Science
Import Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
Manual EDA
Number of records
len(df)
Create X
X = df.iloc[:,[0,1,3,4,5,6,7,8]].values
display (X)
Create Y
Y = df.iloc[:,[2]].values
display (Y)
Label Encoding
from sklearn.preprocessing import LabelEncoder
le1 = LabelEncoder()
X[:,0] = le1.fit_transform(X[:,0])
le2 = LabelEncoder()
X[:,-4] = le2.fit_transform(X[:,-4])
display (X)
Display – X
display (pd.DataFrame(X))
print(np.concatenate((y_pred.reshape(len(y_pred),1),Y_test.reshape(len(Y_tes
t),1)),1))
print(np.concatenate((y_pred.reshape(len(y_pred),1),Y_test.reshape(len(Y_tes
t),1)),1))
result = pd.concat([df,pd.DataFrame(y_pred)],axis=1)
display( result)
RandomizedSearchCV
# Hyperparameter Tuning and RandomizedSearchCV - Model used –
RandomForestRegressor
rand_grid={'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf}
rf=RandomForestRegressor()
rCV=RandomizedSearchCV(estimator=rf,param_distributions=rand_grid,scorin
g='neg_mean_squared_error',n_iter=3,cv=3,random_state=42, n_jobs = 1)
Fit Model
import warnings
warnings.filterwarnings('ignore')
rCV.fit(X_train,Y_train)
Prediction
rf_pred=rCV.predict(X_test)
display (rf_pred)
Display Accuracy
display (r2_score(Y_test,rf_pred))
Model CatBoostRegressor
from catboost import CatBoostRegressor
cat=CatBoostRegressor()
cat.fit(X_train,Y_train)