MLPROJECT - Ipynb - Colaboratory

Download as pdf or txt
Download as pdf or txt
You are on page 1of 7

import random

import pandas as pd

RANDOM_SEED = 97

#INPUT_DATASET = "/content/onlinedeliverydata (5).csv"


INPUT_DATASET = "/content/onlinedeliverydata (5) (1).csv"
OUTPUT_DATASET = "output_dataset.csv"

BALANCE_COL = "Output"
VALUES = [1,0]

# set the random seed for reproducibility


random.seed(97)

# load the dataset


dataset = pd.read_csv(INPUT_DATASET)

# figure out the minimum number of the values


value_counts = []
for value in VALUES:
value_counts.append(dataset[dataset[BALANCE_COL] == value].shape[0])
min_num_rows = min(value_counts)
for index, value in enumerate(VALUES):
print(f"There were {value_counts[index]} {value}s in the dataset - the kept amount is {min_num_rows}.")

# randomly select the minumum number of rows each of the values


chosen_ids = []
for label in VALUES:
ids = dataset[dataset[BALANCE_COL] == label].index
chosen_ids.extend(random.sample(list(ids), min_num_rows))

# remove the non-chosen ids from the dataset


dataset = dataset.drop(dataset.index[list(set(range(dataset.shape[0])) - set(chosen_ids))])
dataset.to_csv(OUTPUT_DATASET, index=False)

output There were 0 1s in the dataset - the kept amount is 0.


There were 0 0s in the dataset - the kept amount is 0.

from google.colab import drive


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

 

import numpy as np

from sklearn import datasets


from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data=pd.read_csv("/content/onlinedeliverydata (5) (1).csv")


df=pd.DataFrame(data)

VALUES = [1, 0]
#df.drop(['Pin code'],axis=1,inplace=True)
df1=df.dropna()
df1.replace(('Yes','No'),(1,0),inplace=True)
df1.replace(('Yes','NO'),(1,0),inplace=True)
df1.replace(('Uneducated','Ph.D','School','Post Graduate','Graduate','Strongly Agree','Bakery items (snacks)','Male','Female','Sligh
df1
print(df.shape)

print(df1.max())
df
(388, 46)
Age 33
Gender 76
Marital Status 81
Occupation 40
Monthly Income 80
Educational Qualifications 90
Family size 6
Pin code 560109
More restaurant choices 60
Easy Payment option 83
More Offers and Discount 83
Good Food quality 83
Good Tracking system 83
Self Cooking 83
Health Concern 83
Late Delivery 83
Poor Hygiene 83
Bad past experience 83
Unavailability 83
Unaffordable 83
Long delivery time 83
Delay of delivery person getting assigned 83
Delay of delivery person picking up food 83
Wrong order delivered 83
Missing item 83
Order placed by mistake 83
Influence of time 83
Order Time 83
Residence in busy location 83
Google Maps Accuracy 32
Good Road Condition 61
Low quantity low time 82
Delivery person ability 82
Influence of rating 82
Less Delivery time 82
High Quality of package 82
Number of calls 32
Politeness 93
Freshness 93
Temperature 93
Good Taste 93
Good Quantity 93
Output 93
Unnamed: 43 93
Unnamed: 44 93
Unnamed: 45 1
dtype: int64
<ipython-input-7-16e138d1bd11>:16: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver


df1.replace(('Yes','No'),(1,0),inplace=True)
<ipython-input-7-16e138d1bd11>:17: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver


df1.replace(('Yes','NO'),(1,0),inplace=True)
<ipython-input-7-16e138d1bd11>:18: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver


df1.replace(('Uneducated','Ph.D','School','Post Graduate','Graduate','Strongly Agree','Bakery items (snacks)','Male','Female',
More Easy Number
Marital Monthly Educational Family Pin
Age Gender Occupation restaurant Payment ... of Politeness Fr
Status Income Qualifications size code
choices option calls

Non Veg
No foods Moderately M
0 20 Female Single Student Post Graduate 4 560001 Neutral ... Yes
Income (Lunch / Important
Dinner)

Non Veg
Below foods Strongly Very
1 24 Female Single Student Graduate 3 560009 ... Yes
Rs.10000 (Lunch / agree Important
Dinner)

Non Veg
Below foods Strongly
2 22 Male Single Student Post Graduate 3 560017 ... Yes Important
Rs.10000 (Lunch / agree
Dinner)

Veg foods
No (Breakfast / Very
3 22 Female Single Student Graduate 6 560019 Agree ... Yes
Income Lunch / Important
Dinner)

Non Veg
Below foods
Below foods
4 22 Male Single Student Post Graduate 4 560010 Agree ... Yes Important
Rs.10000 (Lunch /
Dinner)

... ... ... ... ... ... ... ... ... ... ... ... ... ...

Non Veg
No foods
383 23 Female Single Student Post Graduate 2 560001 Agree ... Maybe Important
Income (Lunch /
Dinner)

Non Veg
No foods Moderately
384 23 Female Single Student Post Graduate 4 560048 Neutral ... Yes
Income (Lunch / Important
Dinner)

Non Veg
No foods
385 22 Female Single Student Post Graduate 5 560010 Agree ... Yes Important
Income (Lunch /
Dinner)

Non Veg
Below foods Strongly
386 23 Male Single Student Post Graduate 2 560009 ... Yes Important
Rs.10000 (Lunch / agree
Dinner)

Non Veg
No foods Slightly
387 23 Male Single Student Post Graduate 5 560078 Agree ... Maybe Un
Income (Lunch / Important
Dinner)

388 rows × 46 columns

x=df1.iloc[ : , :-1].values
y=df1.iloc[ : ,-1].values
print(x)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
clf=DecisionTreeClassifier()

clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)
print(y_pred)
print("accuracy_score = ",accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

print(classification_report(y_test,y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred), '\n')

[[20 76 70 ... 11 11 11]


[24 76 70 ... 13 13 13]
[22 1 70 ... 92 13 11]
...
[22 76 70 ... 13 13 13]
[23 1 70 ... 92 13 13]
[23 1 70 ... 11 11 93]]
[0 0 1 1 0 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 0 1 0 1 1 0 1
1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1
1 1 1 1]
accuracy_score = 0.9358974358974359
[[16 1]
[ 4 57]]
precision recall f1-score support

0 0.80 0.94 0.86 17


1 0.98 0.93 0.96 61

accuracy 0.94 78
macro avg 0.89 0.94 0.91 78
weighted avg 0.94 0.94 0.94 78

Confusion Matrix:
[[16 1]
[ 4 57]]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import chi2

from itertools import product

sns.heatmap(df.corr('pearson'))
plt.tight_layout()

<ipython-input-9-7f6129b9eb16>:15: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future


sns.heatmap(df.corr('pearson'))

X=df1.iloc[ : , :-1].values
y=df1.iloc[ : ,-1].values

# Splitting the dataset into training and test set.


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0)

#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)

from sklearn.ensemble import RandomForestClassifier


classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")
classifier.fit(x_train, y_train)
y_pred= classifier.predict(x_test)

print("accuracy_score\n",accuracy_score(y_test,y_pred))

accuracy_score
0.9381443298969072
X=df1.iloc[ : , :-1].values
y=df1.iloc[ : ,-1].values
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
from sklearn.linear_model import LogisticRegression
classifier= LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)
y_pred= classifier.predict(x_test)
print("accuracy_score\n",accuracy_score(y_test,y_pred))

accuracy_score
0.9484536082474226

import seaborn as sns


import matplotlib.pyplot as plt
plt.figure(figsize=(9,8))
sns.heatmap(df1.corr())
plt.show()
acc=[99,88,98,100]
prec=[99,96,94,100]
re=[99,89,94,100]
f1=[99,92,94,100]
x=np.arange(4)
width=0.2
plt.bar(x-0.2, acc, width, color='cyan')
plt.bar(x, prec, width, color='orange')
plt.bar(x+0.2, re, width, color='green')
plt.bar(x+0.4,f1, width, color='blue')

plt.xticks(x, ['Decision tree', 'logistic ', 'XG boost', 'Random forest'])


plt.xlabel("Algorithms")
plt.ylabel("metrics")
plt.legend(["accuracy", "precision", "Recall","f1_score"],loc="lower left")
plt.show()

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU, PReLU, ELU
from keras.layers import DropoutWrapper
# init ann
clf = Sequential()
clf.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu',input_dim = 10))

---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-15-4018cb1ab8db> in <cell line: 5>()
3 from keras.layers import Dense
4 from keras.layers import LeakyReLU, PReLU, ELU
----> 5 from keras.layers import DropoutWrapper
6 # init ann
7 clf = Sequential()

ImportError: cannot import name 'DropoutWrapper' from 'keras.layers' (/usr/local/lib/python3.10/dist-


packages/keras/layers/__init__.py)

---------------------------------------------------------------------------
NOTE: If your import is failing due to a missing package, you can
manually install dependencies using either !pip or !apt.

To view examples of installing some common dependencies, click the


"Open Examples" button below.
---------------------------------------------------------------------------

OPEN EXAMPLES SEARCH STACK OVERFLOW

clf.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu'))

clf.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))


clf.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_3 (Dense) (None, 6) 66

dense_4 (Dense) (None, 6) 42

dense_5 (Dense) (None, 1) 7

=================================================================
Total params: 115
Trainable params: 115
Non-trainable params: 0
_________________________________________________________________

clf.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])

model_history = clf.fit(Xtrain, ytrain,validation_split = 0.33,batch_size = 10,epochs = 10)

---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-8164250083a5> in <cell line: 1>()
----> 1 clf.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])
2
3 model_history = clf.fit(Xtrain, ytrain,validation_split = 0.33,batch_size = 10,epochs = 10)

NameError: name 'clf' is not defined

SEARCH STACK OVERFLOW

print(model_history.history.keys())

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

ypred = clf.predict(Xtest)
ypred = (ypred > 0.5)

3/3 [==============================] - 0s 4ms/step

from sklearn metrics import confusion matrix accuracy score

You might also like