E-Commerce Product Delivery Prediction
E-Commerce Product Delivery Prediction
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
Loading Dataset
file_name =
("https://raw.githubusercontent.com/SUKHMAN-SINGH-1612/Data-Science-
Projects/refs/heads/main/E-Commerce%20Product%20Delivery
%20Prediction/E_Commerce.csv")
df = pd.read_csv(file_name)
df.head()
Analysis of Data
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10999 entries, 0 to 10998
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 10999 non-null int64
1 Warehouse_block 10999 non-null object
2 Mode_of_Shipment 10999 non-null object
3 Customer_care_calls 10999 non-null int64
4 Customer_rating 10999 non-null int64
5 Cost_of_the_Product 10999 non-null int64
6 Prior_purchases 10999 non-null int64
7 Product_importance 10999 non-null object
8 Gender 10999 non-null object
9 Discount_offered 10999 non-null int64
10 Weight_in_gms 10999 non-null int64
11 Reached.on.Time_Y.N 10999 non-null int64
dtypes: int64(8), object(4)
memory usage: 1.0+ MB
df.isnull().sum()
ID 0
Warehouse_block 0
Mode_of_Shipment 0
Customer_care_calls 0
Customer_rating 0
Cost_of_the_Product 0
Prior_purchases 0
Product_importance 0
Gender 0
Discount_offered 0
Weight_in_gms 0
Reached.on.Time_Y.N 0
dtype: int64
df.duplicated().sum()
df.describe()
ID Customer_care_calls Customer_rating
Cost_of_the_Product \
count 10999.00000 10999.000000 10999.000000
10999.000000
mean 5500.00000 4.054459 2.990545
210.196836
std 3175.28214 1.141490 1.413603
48.063272
min 1.00000 2.000000 1.000000
96.000000
25% 2750.50000 3.000000 2.000000
169.000000
50% 5500.00000 4.000000 3.000000
214.000000
75% 8249.50000 5.000000 4.000000
251.000000
max 10999.00000 7.000000 5.000000
310.000000
EDA
final_res = []
X = df.drop(['Reached.on.Time_Y.N'],axis =1)
y = df['Reached.on.Time_Y.N']
Modelling
y_pred = LR.predict(x_test)
RandomForestClassifier()
y_pred1 = RDC.predict(x_test)
acc_RDC = accuracy_score(y_test,y_pred1)
acc_RDC
final_res.append(acc_RDC)
DecisionTreeClassifier()
y_pred2 = DTC.predict(x_test)
acc_DTC = accuracy_score(y_test,y_pred2)
acc_DTC
final_res.append(acc_DTC)
KNeighborsClassifier()
y_pred3 = KNN.predict(x_test)
acc_KNN = accuracy_score(y_test,y_pred3)
acc_KNN
final_res.append(acc_KNN)
SVC()
y_pred4 = SV.predict(x_test)
acc_SV = accuracy_score(y_test,y_pred4)
acc_SV
final_res.append(acc_SV)
GradientBoostingClassifier()
y_pred5 = GBC.predict(x_test)
acc_GBC = accuracy_score(y_test,y_pred5)
acc_GBC
final_res.append(acc_GBC)
y_pred6 = XG.predict(x_test)
acc_XG = accuracy_score(y_test,y_pred6)
acc_XG
final_res.append(acc_XG)
Conclusion
final = np.array(final_res)
result = final.reshape(-1,1)
column = ['Accuracy_score']
Index = ['Logistic Regression','Random Forest','Decision
Tree','KNN','Support Vector Machine','Gradient Boosting','XG Boost']
final_result = pd.DataFrame(result,columns = column,index = Index)
final_result
Accuracy_score
Logistic Regression 0.659091
Random Forest 0.670455
Decision Tree 0.643636
KNN 0.655455
Support Vector Machine 0.690455
Gradient Boosting 0.683636
XG Boost 0.664091