MLPROJECT - Ipynb - Colaboratory

import random
import pandas as pd
RANDOM_SEED = 97
#INPUT_DATASET = "/content/onlinedeliverydata (5).csv"

INPUT_DATASET = "/content/onlinedeliverydata (5) (1).csv"
OUTPUT_DATASET = "output_dataset.csv"
BALANCE_COL = "Output"
VALUES = [1,0]
# set the random seed for reproducibility

random.seed(97)
# load the dataset

dataset = pd.read_csv(INPUT_DATASET)
# figure out the minimum number of the values

value_counts = []
for value in VALUES:
value_counts.append(dataset[dataset[BALANCE_COL] == value].shape[0])
min_num_rows = min(value_counts)
for index, value in enumerate(VALUES):
print(f"There were {value_counts[index]} {value}s in the dataset - the kept amount is {min_num_rows}.")
# randomly select the minumum number of rows each of the values

chosen_ids = []
for label in VALUES:
ids = dataset[dataset[BALANCE_COL] == label].index
chosen_ids.extend(random.sample(list(ids), min_num_rows))
# remove the non-chosen ids from the dataset

dataset = dataset.drop(dataset.index[list(set(range(dataset.shape[0])) - set(chosen_ids))])
dataset.to_csv(OUTPUT_DATASET, index=False)
output There were 0 1s in the dataset - the kept amount is 0.

There were 0 0s in the dataset - the kept amount is 0.
from google.colab import drive

drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 
import numpy as np
from sklearn import datasets

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data=pd.read_csv("/content/onlinedeliverydata (5) (1).csv")

df=pd.DataFrame(data)
VALUES = [1, 0]
#df.drop(['Pin code'],axis=1,inplace=True)
df1=df.dropna()
df1.replace(('Yes','No'),(1,0),inplace=True)
df1.replace(('Yes','NO'),(1,0),inplace=True)
df1.replace(('Uneducated','Ph.D','School','Post Graduate','Graduate','Strongly Agree','Bakery items (snacks)','Male','Female','Sligh
df1
print(df.shape)
print(df1.max())
df
(388, 46)
Age 33
Gender 76
Marital Status 81
Occupation 40
Monthly Income 80
Educational Qualifications 90
Family size 6
Pin code 560109
More restaurant choices 60
Easy Payment option 83
More Offers and Discount 83
Good Food quality 83
Good Tracking system 83
Self Cooking 83
Health Concern 83
Late Delivery 83
Poor Hygiene 83
Bad past experience 83
Unavailability 83
Unaffordable 83
Long delivery time 83
Delay of delivery person getting assigned 83
Delay of delivery person picking up food 83
Wrong order delivered 83
Missing item 83
Order placed by mistake 83
Influence of time 83
Order Time 83
Residence in busy location 83
Google Maps Accuracy 32
Good Road Condition 61
Low quantity low time 82
Delivery person ability 82
Influence of rating 82
Less Delivery time 82
High Quality of package 82
Number of calls 32
Politeness 93
Freshness 93
Temperature 93
Good Taste 93
Good Quantity 93
Output 93
Unnamed: 43 93
Unnamed: 44 93
Unnamed: 45 1
dtype: int64
<ipython-input-7-16e138d1bd11>:16: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver

df1.replace(('Yes','No'),(1,0),inplace=True)

df1.replace(('Yes','NO'),(1,0),inplace=True)

df1.replace(('Uneducated','Ph.D','School','Post Graduate','Graduate','Strongly Agree','Bakery items (snacks)','Male','Female',
More Easy Number
Marital Monthly Educational Family Pin
Age Gender Occupation restaurant Payment ... of Politeness Fr
Status Income Qualifications size code
choices option calls
Non Veg
No foods Moderately M
0 20 Female Single Student Post Graduate 4 560001 Neutral ... Yes
Income (Lunch / Important
Dinner)
Non Veg
Below foods Strongly Very
1 24 Female Single Student Graduate 3 560009 ... Yes
Rs.10000 (Lunch / agree Important
Dinner)
Non Veg
Below foods Strongly
2 22 Male Single Student Post Graduate 3 560017 ... Yes Important
Rs.10000 (Lunch / agree
Dinner)
Veg foods
No (Breakfast / Very
3 22 Female Single Student Graduate 6 560019 Agree ... Yes
Income Lunch / Important
Dinner)
Non Veg
Below foods
Below foods
4 22 Male Single Student Post Graduate 4 560010 Agree ... Yes Important
Rs.10000 (Lunch /
Dinner)
... ... ... ... ... ... ... ... ... ... ... ... ... ...
Non Veg
No foods
383 23 Female Single Student Post Graduate 2 560001 Agree ... Maybe Important
Income (Lunch /
Dinner)
Non Veg
No foods Moderately
384 23 Female Single Student Post Graduate 4 560048 Neutral ... Yes
Dinner)
Non Veg
No foods
385 22 Female Single Student Post Graduate 5 560010 Agree ... Yes Important
Income (Lunch /
Dinner)
Non Veg
Below foods Strongly
386 23 Male Single Student Post Graduate 2 560009 ... Yes Important
Rs.10000 (Lunch / agree
Dinner)
Non Veg
No foods Slightly
387 23 Male Single Student Post Graduate 5 560078 Agree ... Maybe Un
Dinner)
388 rows × 46 columns
x=df1.iloc[ : , :-1].values
y=df1.iloc[ : ,-1].values
print(x)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
clf=DecisionTreeClassifier()
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)
print(y_pred)
print("accuracy_score = ",accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred), '\n')
[[20 76 70 ... 11 11 11]

[24 76 70 ... 13 13 13]
[22 1 70 ... 92 13 11]
...
[22 76 70 ... 13 13 13]
[23 1 70 ... 92 13 13]
[23 1 70 ... 11 11 93]]
[0 0 1 1 0 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 0 1 0 1 1 0 1
1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1
1 1 1 1]
accuracy_score = 0.9358974358974359
[[16 1]
[ 4 57]]
precision recall f1-score support
0 0.80 0.94 0.86 17

1 0.98 0.93 0.96 61
accuracy 0.94 78
macro avg 0.89 0.94 0.91 78
weighted avg 0.94 0.94 0.94 78
Confusion Matrix:
[[16 1]
[ 4 57]]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import chi2
from itertools import product
sns.heatmap(df.corr('pearson'))
plt.tight_layout()
<ipython-input-9-7f6129b9eb16>:15: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future

sns.heatmap(df.corr('pearson'))
X=df1.iloc[ : , :-1].values
# Splitting the dataset into training and test set.

x_train, x_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0)
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
from sklearn.ensemble import RandomForestClassifier

classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")
classifier.fit(x_train, y_train)
y_pred= classifier.predict(x_test)
print("accuracy_score\n",accuracy_score(y_test,y_pred))
accuracy_score
0.9381443298969072
X=df1.iloc[ : , :-1].values
x_train, x_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
from sklearn.linear_model import LogisticRegression
classifier= LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)
y_pred= classifier.predict(x_test)
print("accuracy_score\n",accuracy_score(y_test,y_pred))
accuracy_score
0.9484536082474226
import seaborn as sns

import matplotlib.pyplot as plt
plt.figure(figsize=(9,8))
sns.heatmap(df1.corr())
plt.show()
acc=[99,88,98,100]
prec=[99,96,94,100]
re=[99,89,94,100]
f1=[99,92,94,100]
x=np.arange(4)
width=0.2
plt.bar(x-0.2, acc, width, color='cyan')
plt.bar(x, prec, width, color='orange')
plt.bar(x+0.2, re, width, color='green')
plt.bar(x+0.4,f1, width, color='blue')
plt.xticks(x, ['Decision tree', 'logistic ', 'XG boost', 'Random forest'])

plt.xlabel("Algorithms")
plt.ylabel("metrics")
plt.legend(["accuracy", "precision", "Recall","f1_score"],loc="lower left")
plt.show()
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU, PReLU, ELU
from keras.layers import DropoutWrapper
# init ann
clf = Sequential()
clf.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu',input_dim = 10))
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-15-4018cb1ab8db> in <cell line: 5>()
3 from keras.layers import Dense
4 from keras.layers import LeakyReLU, PReLU, ELU
----> 5 from keras.layers import DropoutWrapper
6 # init ann
7 clf = Sequential()
ImportError: cannot import name 'DropoutWrapper' from 'keras.layers' (/usr/local/lib/python3.10/dist-

packages/keras/layers/__init__.py)
---------------------------------------------------------------------------
NOTE: If your import is failing due to a missing package, you can
manually install dependencies using either !pip or !apt.
To view examples of installing some common dependencies, click the

"Open Examples" button below.
---------------------------------------------------------------------------
OPEN EXAMPLES SEARCH STACK OVERFLOW
clf.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu'))
clf.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

clf.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_3 (Dense) (None, 6) 66
=================================================================
Total params: 115
Trainable params: 115
Non-trainable params: 0
_________________________________________________________________
clf.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])
model_history = clf.fit(Xtrain, ytrain,validation_split = 0.33,batch_size = 10,epochs = 10)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-8164250083a5> in <cell line: 1>()
----> 1 clf.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])
2
3 model_history = clf.fit(Xtrain, ytrain,validation_split = 0.33,batch_size = 10,epochs = 10)
NameError: name 'clf' is not defined
SEARCH STACK OVERFLOW
print(model_history.history.keys())
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
ypred = clf.predict(Xtest)
ypred = (ypred > 0.5)
3/3 [==============================] - 0s 4ms/step
from sklearn metrics import confusion matrix accuracy score

MLPROJECT - Ipynb - Colaboratory

Uploaded by

Copyright:

Available Formats

MLPROJECT - Ipynb - Colaboratory

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

MLPROJECT - Ipynb - Colaboratory

Uploaded by

Copyright:

Available Formats

import random

#INPUT_DATASET = "/content/onlinedeliverydata (5).csv"

# set the random seed for reproducibility

# load the dataset

# figure out the minimum number of the values

# randomly select the minumum number of rows each of the values

# remove the non-chosen ids from the dataset

output There were 0 1s in the dataset - the kept amount is 0.

from google.colab import drive

from sklearn import datasets

data=pd.read_csv("/content/onlinedeliverydata (5) (1).csv")

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver

388 rows × 46 columns

[[20 76 70 ... 11 11 11]

0 0.80 0.94 0.86 17

from sklearn.ensemble import RandomForestClassifier

from itertools import product

<ipython-input-9-7f6129b9eb16>:15: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future

# Splitting the dataset into training and test set.

from sklearn.ensemble import RandomForestClassifier

import seaborn as sns

plt.xticks(x, ['Decision tree', 'logistic ', 'XG boost', 'Random forest'])

ImportError: cannot import name 'DropoutWrapper' from 'keras.layers' (/usr/local/lib/python3.10/dist-

To view examples of installing some common dependencies, click the

OPEN EXAMPLES SEARCH STACK OVERFLOW

clf.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu'))

clf.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

dense_4 (Dense) (None, 6) 42

dense_5 (Dense) (None, 1) 7

clf.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])

model_history = clf.fit(Xtrain, ytrain,validation_split = 0.33,batch_size = 10,epochs = 10)

NameError: name 'clf' is not defined

SEARCH STACK OVERFLOW

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

3/3 [==============================] - 0s 4ms/step

from sklearn metrics import confusion matrix accuracy score

You might also like