Solution
Solution
December 3, 2024
1 BINARY CLASSIFICATION
[62]: # Importing Libraries
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(37)
df = pd.read_csv('attrition_data.csv')
1
print('Dataframe shape: ', df.shape)
[5 rows x 27 columns]
df.isnull().sum()
[159]: EMP_ID 0
ANNUAL_RATE 0
HRLY_RATE 0
JOBCODE 0
ETHNICITY 0
2
SEX 0
MARITAL_STATUS 0
JOB_SATISFACTION 0
AGE 0
NUMBER_OF_TEAM_CHANGED 0
REFERRAL_SOURCE 445
HIRE_MONTH 0
REHIRE 0
TERMINATION_YEAR 5394
IS_FIRST_JOB 0
TRAVELLED_REQUIRED 0
PERFORMANCE_RATING 0
DISABLED_EMP 0
DISABLED_VET 0
EDUCATION_LEVEL 0
STATUS 0
JOB_GROUP 0
PREVYR_1 0
PREVYR_2 0
PREVYR_3 0
PREVYR_4 0
PREVYR_5 0
dtype: int64
[162]: df.head()
3
IS_FIRST_JOB TRAVELLED_REQUIRED PERFORMANCE_RATING DISABLED_EMP \
0 N N 4 N
1 Y N 3 N
2 Y N 3 N
3 N Y 2 N
4 N N 4 N
1.2 VISUALIZATIONS
[211]: sns.set(style="darkgrid")
ax = sns.countplot(x="STATUS", data=df, palette=sns.xkcd_palette(["azure",␣
↪"light red"]))
plt.xlabel('Status')
plt.ylabel('Count')
plt.savefig('./plots/status_count.png')
plt.show()
4
[166]: fig=plt.figure(figsize=(8,4))
for x in ['T','A']:
df['AGE'][df['STATUS']==x].plot(kind='kde')
5
[208]: sns.countplot(x='JOB_SATISFACTION', data=df, hue='STATUS', palette=sns.
↪xkcd_palette(["aqua", "periwinkle"]))
6
[207]: sns.boxplot(x='JOB_SATISFACTION',data=df,hue='STATUS',y='AGE', palette=sns.
↪xkcd_palette(["pastel purple", "pastel yellow"]))
7
1.3 FEATURE ENGINEERING
[11]: # Label Encoding categorical features
le = LabelEncoder()
df['NUMBER_OF_TEAM_CHANGED'] = le.fit_transform(df['NUMBER_OF_TEAM_CHANGED'])
df['REHIRE'] = le.fit_transform(df['REHIRE'])
df['IS_FIRST_JOB'] = le.fit_transform(df['IS_FIRST_JOB'])
df['TRAVELLED_REQUIRED'] = le.fit_transform(df['TRAVELLED_REQUIRED'])
df['DISABLED_EMP'] = le.fit_transform(df['DISABLED_EMP'])
df['DISABLED_VET'] = le.fit_transform(df['DISABLED_VET'])
df['EDUCATION_LEVEL'] = le.fit_transform(df['EDUCATION_LEVEL'])
df['STATUS'] = le.fit_transform(df['STATUS'])
fig, ax = plt.subplots(figsize=(15,10))
sns.heatmap(df.corr(), annot = True, ax=ax)
plt.savefig('./plots/correlation_heatmap.png')
8
We see that HRLY_RATE and ANNUAL_RATE are highly correlated with correla-
tion of 1, so we can take the ANNUAL_RATE and discard HRLY_RATE
[13]: df.drop(['HRLY_RATE'], axis=1, inplace=True)
df['HIRE_MONTH'] = df['HIRE_MONTH'].astype('category')
df['JOB_GROUP'] = df['JOB_GROUP'].astype('category')
df['REFERRAL_SOURCE'] = df['REFERRAL_SOURCE'].astype('category')
df['ETHNICITY'] = df['ETHNICITY'].astype('category')
df['SEX'] = df['SEX'].astype('category')
df['MARITAL_STATUS'] = df['MARITAL_STATUS'].astype('category')
df = pd.get_dummies(df, columns=['HIRE_MONTH', 'JOB_GROUP', 'REFERRAL_SOURCE',␣
↪'SEX', 'MARITAL_STATUS', 'ETHNICITY'])
X = df.drop(['STATUS'], axis=1)
y = df['STATUS']
9
[16]: # Normalizing the all the features
scaler = StandardScaler()
X = scaler.fit_transform(X)
[17]: # Splitting dataset into training and testing split with 70-30% ratio
1.4 MODELLING
1.4.1 Logistic Regression
[19]: # Building our model with K-fold validation and GridSearch to find the best␣
↪parameters
# Building model
logreg = LogisticRegression(solver='liblinear')
10
error_score=nan,
estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
fit_intercept=True,
intercept_scaling=1, l1_ratio=None,
max_iter=100, multi_class='auto',
n_jobs=None, penalty='l2',
random_state=None, solver='liblinear',
tol=0.0001, verbose=0,
warm_start=False),
iid='deprecated', n_jobs=-1,
param_grid={'C': [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10],
'penalty': ['l1', 'l2']},
pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
scoring='accuracy', verbose=1)
[21]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
logreg_grid = grid.best_estimator_
y_pred = logreg_grid.predict(X_test)
11
print('Model Accuracy:', logreg_grid_score)
print('Classification Report:\n', classification_report(y_test, y_pred))
[24]: # Building our model with K-fold validation and GridSearch to find the best␣
↪parameters
# Building model
knn = KNeighborsClassifier()
12
metric_params=None, n_jobs=None,
n_neighbors=5, p=2,
weights='uniform'),
iid='deprecated', n_jobs=-1,
param_grid={'n_neighbors': [3, 5, 11, 19],
'weights': ['uniform', 'distance']},
pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
scoring='accuracy', verbose=1)
[26]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
knn_grid= grid.best_estimator_
y_pred = knn_grid.predict(X_test)
13
accuracy 0.65 2884
macro avg 0.64 0.64 0.64 2884
weighted avg 0.65 0.65 0.64 2884
[29]: # Building our model with K-fold validation and GridSearch to find the best␣
↪parameters
# Building model
gb = GaussianNB()
[31]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
gb_grid= grid.best_estimator_
14
y_pred = gb_grid.predict(X_test)
# Building model
svc = SVC(kernel='rbf', probability=True) ## 'rbf' stands for gaussian kernel
15
# Fitting the model
grid.fit(X_train, y_train)
[36]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
svm_grid= grid.best_estimator_
y_pred = svm_grid.predict(X_test)
16
[37]: Predicted A Predicted T
Actual A 1309 316
Actual T 609 650
# Building model
dtc = DecisionTreeClassifier()
17
[Parallel(n_jobs=-1)]: Done 280 tasks | elapsed: 2.3s
[Parallel(n_jobs=-1)]: Done 540 out of 540 | elapsed: 4.1s finished
[41]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
dtc_grid= grid.best_estimator_
y_pred = dtc_grid.predict(X_test)
18
[42]: # Confusion matrix
# Building model
rfc = RandomForestClassifier()
19
grid = GridSearchCV(rfc, param_grid=params, scoring='accuracy', n_jobs =-1,␣
↪cv=cv, verbose=1)
20
'min_samples_leaf': 3, 'min_samples_split': 3, 'n_estimators': 300}
Best Estimator: RandomForestClassifier(bootstrap=False, ccp_alpha=0.0,
class_weight=None,
criterion='gini', max_depth=None, max_features=10,
max_leaf_nodes=None, max_samples=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=3, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=300,
n_jobs=None, oob_score=False, random_state=None,
verbose=0, warm_start=False)
[46]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
rfc_grid= grid.best_estimator_
y_pred = rfc_grid.predict(X_test)
21
1.4.7 Artificial Neural Networks
[53]: # Defining our neural network model
def create_model(optimizer='adam'):
model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer=optimizer,␣
↪metrics=['accuracy'])
return model
[54]: # Building our model with K-fold validation and GridSearch to find the best␣
↪parameters
# Building model
nn = KerasClassifier(build_fn=create_model)
22
6728/6728 [==============================] - 3s 492us/sample - loss: 0.5849 -
accuracy: 0.6794
Epoch 3/200
6728/6728 [==============================] - 3s 415us/sample - loss: 0.5567 -
accuracy: 0.7060
Epoch 4/200
6728/6728 [==============================] - 3s 393us/sample - loss: 0.5382 -
accuracy: 0.7265 - loss: 0.542
Epoch 5/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.5150 -
accuracy: 0.7438
Epoch 6/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.4840 -
accuracy: 0.7661
Epoch 7/200
6728/6728 [==============================] - 3s 425us/sample - loss: 0.4540 -
accuracy: 0.7814
Epoch 8/200
6728/6728 [==============================] - 3s 458us/sample - loss: 0.4238 -
accuracy: 0.7995
Epoch 9/200
6728/6728 [==============================] - 3s 445us/sample - loss: 0.3966 -
accuracy: 0.8194
Epoch 10/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.3569 -
accuracy: 0.8396
Epoch 11/200
6728/6728 [==============================] - 3s 394us/sample - loss: 0.3430 -
accuracy: 0.8465
Epoch 12/200
6728/6728 [==============================] - 3s 402us/sample - loss: 0.3190 -
accuracy: 0.8558
Epoch 13/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.2964 -
accuracy: 0.8680 - loss: 0.2926 - ac
Epoch 14/200
6728/6728 [==============================] - 3s 405us/sample - loss: 0.2785 -
accuracy: 0.8757
Epoch 15/200
6728/6728 [==============================] - 3s 395us/sample - loss: 0.2645 -
accuracy: 0.8841
Epoch 16/200
6728/6728 [==============================] - 3s 462us/sample - loss: 0.2387 -
accuracy: 0.8957
Epoch 17/200
6728/6728 [==============================] - 6s 850us/sample - loss: 0.2291 -
accuracy: 0.8994
Epoch 18/200
23
6728/6728 [==============================] - 4s 600us/sample - loss: 0.2272 -
accuracy: 0.8971
Epoch 19/200
6728/6728 [==============================] - 3s 470us/sample - loss: 0.2058 -
accuracy: 0.9098 - ETA: 0s - loss: 0.2040 - accu
Epoch 20/200
6728/6728 [==============================] - 3s 491us/sample - loss: 0.1973 -
accuracy: 0.9154
Epoch 21/200
6728/6728 [==============================] - 3s 477us/sample - loss: 0.1977 -
accuracy: 0.9147 - loss: 0.151 - ETA: 1s
Epoch 22/200
6728/6728 [==============================] - 4s 640us/sample - loss: 0.1887 -
accuracy: 0.9165
Epoch 23/200
6728/6728 [==============================] - 3s 404us/sample - loss: 0.1813 -
accuracy: 0.9220
Epoch 24/200
6728/6728 [==============================] - 3s 465us/sample - loss: 0.1764 -
accuracy: 0.9239 - loss: 0.1689
Epoch 25/200
6728/6728 [==============================] - 5s 810us/sample - loss: 0.1714 -
accuracy: 0.9232
Epoch 26/200
6728/6728 [==============================] - 4s 543us/sample - loss: 0.1579 -
accuracy: 0.9304
Epoch 27/200
6728/6728 [==============================] - 4s 554us/sample - loss: 0.1702 -
accuracy: 0.9255
Epoch 28/200
6728/6728 [==============================] - 7s 1ms/sample - loss: 0.1614 -
accuracy: 0.9288
Epoch 29/200
6728/6728 [==============================] - 4s 590us/sample - loss: 0.1584 -
accuracy: 0.9287 - loss: 0.1573 - accuracy
Epoch 30/200
6728/6728 [==============================] - 3s 483us/sample - loss: 0.1485 -
accuracy: 0.9349
Epoch 31/200
6728/6728 [==============================] - 6s 850us/sample - loss: 0.1302 -
accuracy: 0.9383
Epoch 32/200
6728/6728 [==============================] - 4s 604us/sample - loss: 0.1398 -
accuracy: 0.9364
Epoch 33/200
6728/6728 [==============================] - 4s 624us/sample - loss: 0.1445 -
accuracy: 0.9322
Epoch 34/200
24
6728/6728 [==============================] - 4s 540us/sample - loss: 0.1472 -
accuracy: 0.9342
Epoch 35/200
6728/6728 [==============================] - 3s 448us/sample - loss: 0.1289 -
accuracy: 0.9410
Epoch 36/200
6728/6728 [==============================] - 5s 723us/sample - loss: 0.1243 -
accuracy: 0.9435
Epoch 37/200
6728/6728 [==============================] - 4s 537us/sample - loss: 0.1274 -
accuracy: 0.9394
Epoch 38/200
6728/6728 [==============================] - 3s 491us/sample - loss: 0.1331 -
accuracy: 0.9404
Epoch 39/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.1216 -
accuracy: 0.9431 - loss:
Epoch 40/200
6728/6728 [==============================] - 4s 531us/sample - loss: 0.1072 -
accuracy: 0.9471
Epoch 41/200
6728/6728 [==============================] - 4s 594us/sample - loss: 0.1289 -
accuracy: 0.9460 -
Epoch 42/200
6728/6728 [==============================] - 3s 461us/sample - loss: 0.1174 -
accuracy: 0.9556 - loss: 0.1113 - accura
Epoch 43/200
6728/6728 [==============================] - 3s 429us/sample - loss: 0.1092 -
accuracy: 0.9615
Epoch 44/200
6728/6728 [==============================] - 3s 424us/sample - loss: 0.0896 -
accuracy: 0.9669
Epoch 45/200
6728/6728 [==============================] - 3s 417us/sample - loss: 0.1343 -
accuracy: 0.9550
Epoch 46/200
6728/6728 [==============================] - 3s 426us/sample - loss: 0.0983 -
accuracy: 0.9646
Epoch 47/200
6728/6728 [==============================] - 3s 418us/sample - loss: 0.0852 -
accuracy: 0.9718
Epoch 48/200
6728/6728 [==============================] - 3s 409us/sample - loss: 0.0904 -
accuracy: 0.9694
Epoch 49/200
6728/6728 [==============================] - 4s 593us/sample - loss: 0.0851 -
accuracy: 0.9695 - loss: 0
Epoch 50/200
25
6728/6728 [==============================] - 4s 626us/sample - loss: 0.1118 -
accuracy: 0.9600
Epoch 51/200
6728/6728 [==============================] - 3s 428us/sample - loss: 0.0906 -
accuracy: 0.9691
Epoch 52/200
6728/6728 [==============================] - 3s 410us/sample - loss: 0.0775 -
accuracy: 0.9740
Epoch 53/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.0877 -
accuracy: 0.9697 - loss: 0.0782
Epoch 54/200
6728/6728 [==============================] - 3s 420us/sample - loss: 0.0911 -
accuracy: 0.9667 - loss: 0.0698 - accuracy - ETA: 2s - loss: 0 - ETA: 1s - loss:
0.081 - E - ETA: 0s - loss: 0.0927 - accuracy: 0.
Epoch 55/200
6728/6728 [==============================] - 3s 449us/sample - loss: 0.0729 -
accuracy: 0.9749
Epoch 56/200
6728/6728 [==============================] - 4s 554us/sample - loss: 0.0752 -
accuracy: 0.9749 - loss: 0.055 - ETA: 2s - los - ETA: 1s - loss: 0.0 - ETA:
Epoch 57/200
6728/6728 [==============================] - 3s 438us/sample - loss: 0.0780 -
accuracy: 0.9728 - ETA: 1s - loss: 0.0691
Epoch 58/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0929 -
accuracy: 0.9688
Epoch 59/200
6728/6728 [==============================] - 3s 410us/sample - loss: 0.0818 -
accuracy: 0.9718
Epoch 60/200
6728/6728 [==============================] - 3s 400us/sample - loss: 0.0631 -
accuracy: 0.9802
Epoch 61/200
6728/6728 [==============================] - 3s 405us/sample - loss: 0.0811 -
accuracy: 0.9727 - loss: 0.0812 - accuracy: 0.97
Epoch 62/200
6728/6728 [==============================] - 3s 423us/sample - loss: 0.0661 -
accuracy: 0.9782
Epoch 63/200
6728/6728 [==============================] - 3s 399us/sample - loss: 0.0590 -
accuracy: 0.9790 - l
Epoch 64/200
6728/6728 [==============================] - 3s 399us/sample - loss: 0.0756 -
accuracy: 0.9731 - loss: 0.0
Epoch 65/200
6728/6728 [==============================] - 3s 410us/sample - loss: 0.0725 -
accuracy: 0.9768 - loss: 0.0749 - accuracy: 0. - ETA:
26
Epoch 66/200
6728/6728 [==============================] - 3s 445us/sample - loss: 0.0720 -
accuracy: 0.9746
Epoch 67/200
6728/6728 [==============================] - 3s 408us/sample - loss: 0.0666 -
accuracy: 0.9787 - loss: 0.0620 - accu
Epoch 68/200
6728/6728 [==============================] - 3s 431us/sample - loss: 0.0582 -
accuracy: 0.9804
Epoch 69/200
6728/6728 [==============================] - 3s 458us/sample - loss: 0.0618 -
accuracy: 0.9811
Epoch 70/200
6728/6728 [==============================] - 3s 422us/sample - loss: 0.0572 -
accuracy: 0.9819 - loss: 0.0404 - ETA: -
Epoch 71/200
6728/6728 [==============================] - 3s 447us/sample - loss: 0.0642 -
accuracy: 0.9798
Epoch 72/200
6728/6728 [==============================] - 3s 489us/sample - loss: 0.0562 -
accuracy: 0.9802
Epoch 73/200
6728/6728 [==============================] - 3s 490us/sample - loss: 0.0744 -
accuracy: 0.9774
Epoch 74/200
6728/6728 [==============================] - 3s 411us/sample - loss: 0.0603 -
accuracy: 0.9807
Epoch 75/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.0545 -
accuracy: 0.9816 - loss: 0.0476 - accuracy: 0.98 - ETA
Epoch 76/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.0528 -
accuracy: 0.9854TA: 0s - loss: 0.0526 - accuracy
Epoch 77/200
6728/6728 [==============================] - 3s 409us/sample - loss: 0.0536 -
accuracy: 0.9807 - ETA: 0s - loss: 0.0508
Epoch 78/200
6728/6728 [==============================] - 3s 441us/sample - loss: 0.0680 -
accuracy: 0.9773
Epoch 79/200
6728/6728 [==============================] - 3s 422us/sample - loss: 0.0566 -
accuracy: 0.9811
Epoch 80/200
6728/6728 [==============================] - 3s 414us/sample - loss: 0.0464 -
accuracy: 0.9847
Epoch 81/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.0534 -
accuracy: 0.9835
27
Epoch 82/200
6728/6728 [==============================] - 3s 413us/sample - loss: 0.0485 -
accuracy: 0.9839
Epoch 83/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0610 -
accuracy: 0.9796 - loss: 0.0614 - accuracy: 0.
Epoch 84/200
6728/6728 [==============================] - 3s 394us/sample - loss: 0.0471 -
accuracy: 0.9845
Epoch 85/200
6728/6728 [==============================] - 3s 388us/sample - loss: 0.0650 -
accuracy: 0.9795
Epoch 86/200
6728/6728 [==============================] - 3s 391us/sample - loss: 0.0589 -
accuracy: 0.9804
Epoch 87/200
6728/6728 [==============================] - 3s 386us/sample - loss: 0.0483 -
accuracy: 0.9851 - loss: 0.0481 - accuracy:
Epoch 88/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0560 -
accuracy: 0.9819 - los - ETA: 0s - loss: 0.0560 - accuracy:
Epoch 89/200
6728/6728 [==============================] - 3s 438us/sample - loss: 0.0460 -
accuracy: 0.9874
Epoch 90/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0511 -
accuracy: 0.9859
Epoch 91/200
6728/6728 [==============================] - 3s 469us/sample - loss: 0.0611 -
accuracy: 0.9816
Epoch 92/200
6728/6728 [==============================] - 5s 679us/sample - loss: 0.0345 -
accuracy: 0.9897
Epoch 93/200
6728/6728 [==============================] - 3s 508us/sample - loss: 0.0365 -
accuracy: 0.9877
Epoch 94/200
6728/6728 [==============================] - 3s 513us/sample - loss: 0.0543 -
accuracy: 0.9823 - loss: 0.0510
Epoch 95/200
6728/6728 [==============================] - 3s 402us/sample - loss: 0.0538 -
accuracy: 0.9838 - loss: 0.0536 - accura
Epoch 96/200
6728/6728 [==============================] - 3s 417us/sample - loss: 0.0376 -
accuracy: 0.9878 -
Epoch 97/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0515 -
accuracy: 0.9841
28
Epoch 98/200
6728/6728 [==============================] - 3s 393us/sample - loss: 0.0460 -
accuracy: 0.9863 ETA: 0s - loss: 0.0455 - accuracy: 0.98
Epoch 99/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0396 -
accuracy: 0.9872 - loss: 0.0391 - accuracy: 0.
Epoch 100/200
6728/6728 [==============================] - 3s 392us/sample - loss: 0.0472 -
accuracy: 0.9866 - loss: 0
Epoch 101/200
6728/6728 [==============================] - 3s 388us/sample - loss: 0.0507 -
accuracy: 0.9845
Epoch 102/200
6728/6728 [==============================] - 3s 389us/sample - loss: 0.0393 -
accuracy: 0.9877 - loss: 0.0334
Epoch 103/200
6728/6728 [==============================] - 3s 404us/sample - loss: 0.0324 -
accuracy: 0.9911
Epoch 104/200
6728/6728 [==============================] - 3s 390us/sample - loss: 0.0532 -
accuracy: 0.9828
Epoch 105/200
6728/6728 [==============================] - 3s 393us/sample - loss: 0.0432 -
accuracy: 0.9865 - loss: 0.0410 - accu
Epoch 106/200
6728/6728 [==============================] - 3s 379us/sample - loss: 0.0435 -
accuracy: 0.9868 - loss: 0
Epoch 107/200
6728/6728 [==============================] - 3s 400us/sample - loss: 0.0386 -
accuracy: 0.9871
Epoch 108/200
6728/6728 [==============================] - 3s 396us/sample - loss: 0.0452 -
accuracy: 0.9866
Epoch 109/200
6728/6728 [==============================] - 3s 397us/sample - loss: 0.0392 -
accuracy: 0.9869 - loss: 0.0250 -
Epoch 110/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0470 -
accuracy: 0.9845 - loss: - ETA: 0s - loss: 0.0524 - accuracy - ETA: 0s - l
Epoch 111/200
6728/6728 [==============================] - 3s 427us/sample - loss: 0.0421 -
accuracy: 0.9874
Epoch 112/200
6728/6728 [==============================] - 3s 413us/sample - loss: 0.0350 -
accuracy: 0.9877 - - ETA: 1s - loss: 0.0 -
Epoch 113/200
6728/6728 [==============================] - 3s 444us/sample - loss: 0.0289 -
accuracy: 0.9909
29
Epoch 114/200
6728/6728 [==============================] - 6s 843us/sample - loss: 0.0405 -
accuracy: 0.9871
Epoch 115/200
6728/6728 [==============================] - 4s 529us/sample - loss: 0.0599 -
accuracy: 0.9817
Epoch 116/200
6728/6728 [==============================] - 3s 410us/sample - loss: 0.0231 -
accuracy: 0.9933
Epoch 117/200
6728/6728 [==============================] - 3s 390us/sample - loss: 0.0359 -
accuracy: 0.9884
Epoch 118/200
6728/6728 [==============================] - 3s 393us/sample - loss: 0.0589 -
accuracy: 0.9813
Epoch 119/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.0389 -
accuracy: 0.9884 - loss: - ETA: 0s -
Epoch 120/200
6728/6728 [==============================] - 4s 555us/sample - loss: 0.0322 -
accuracy: 0.9893 - l
Epoch 121/200
6728/6728 [==============================] - 3s 503us/sample - loss: 0.0380 -
accuracy: 0.9887TA: 1s - loss: 0.0323 - accu
Epoch 122/200
6728/6728 [==============================] - 3s 449us/sample - loss: 0.0343 -
accuracy: 0.9886
Epoch 123/200
6728/6728 [==============================] - 3s 430us/sample - loss: 0.0319 -
accuracy: 0.9903 - loss: 0.0240 - ac - E - ETA - ETA: 0s - loss: 0.0291 - accu
Epoch 124/200
6728/6728 [==============================] - 6s 957us/sample - loss: 0.0414 -
accuracy: 0.9862
Epoch 125/200
6728/6728 [==============================] - 3s 444us/sample - loss: 0.0346 -
accuracy: 0.9877
Epoch 126/200
6728/6728 [==============================] - 3s 458us/sample - loss: 0.0335 -
accuracy: 0.9883
Epoch 127/200
6728/6728 [==============================] - 3s 445us/sample - loss: 0.0304 -
accuracy: 0.9903: 0s - loss: 0.0278 - - ETA: 0s - loss: 0.0280 - ac
Epoch 128/200
6728/6728 [==============================] - 3s 486us/sample - loss: 0.0340 -
accuracy: 0.9897
Epoch 129/200
6728/6728 [==============================] - 3s 463us/sample - loss: 0.0468 -
accuracy: 0.9859
30
Epoch 130/200
6728/6728 [==============================] - 3s 414us/sample - loss: 0.0419 -
accuracy: 0.9872
Epoch 131/200
6728/6728 [==============================] - 4s 525us/sample - loss: 0.0316 -
accuracy: 0.9903
Epoch 132/200
6728/6728 [==============================] - 4s 621us/sample - loss: 0.0296 -
accuracy: 0.9914
Epoch 133/200
6728/6728 [==============================] - 5s 750us/sample - loss: 0.0264 -
accuracy: 0.9921
Epoch 134/200
6728/6728 [==============================] - 5s 775us/sample - loss: 0.0369 -
accuracy: 0.9897
Epoch 135/200
6728/6728 [==============================] - 5s 790us/sample - loss: 0.0464 -
accuracy: 0.9865 - loss: 0.0
Epoch 136/200
6728/6728 [==============================] - 5s 696us/sample - loss: 0.0298 -
accuracy: 0.9914
Epoch 137/200
6728/6728 [==============================] - 5s 788us/sample - loss: 0.0273 -
accuracy: 0.9905
Epoch 138/200
6728/6728 [==============================] - 4s 616us/sample - loss: 0.0337 -
accuracy: 0.9893 - loss: 0.0333 - accu
Epoch 139/200
6728/6728 [==============================] - 5s 757us/sample - loss: 0.0407 -
accuracy: 0.9878 - loss: 0.0424 - ac
Epoch 140/200
6728/6728 [==============================] - 3s 404us/sample - loss: 0.0387 -
accuracy: 0.9889:
Epoch 141/200
6728/6728 [==============================] - 3s 390us/sample - loss: 0.0297 -
accuracy: 0.9923
Epoch 142/200
6728/6728 [==============================] - 5s 723us/sample - loss: 0.0249 -
accuracy: 0.9918
Epoch 143/200
6728/6728 [==============================] - 5s 783us/sample - loss: 0.0184 -
accuracy: 0.9948
Epoch 144/200
6728/6728 [==============================] - 5s 754us/sample - loss: 0.0433 -
accuracy: 0.9856
Epoch 145/200
6728/6728 [==============================] - 5s 730us/sample - loss: 0.0369 -
accuracy: 0.9877
31
Epoch 146/200
6728/6728 [==============================] - 5s 734us/sample - loss: 0.0209 -
accuracy: 0.9941
Epoch 147/200
6728/6728 [==============================] - 4s 634us/sample - loss: 0.0335 -
accuracy: 0.9894
Epoch 148/200
6728/6728 [==============================] - 2s 357us/sample - loss: 0.0444 -
accuracy: 0.9860
Epoch 149/200
6728/6728 [==============================] - 5s 696us/sample - loss: 0.0321 -
accuracy: 0.9924
Epoch 150/200
6728/6728 [==============================] - 3s 429us/sample - loss: 0.0264 -
accuracy: 0.9917 - - ETA: - ETA: 0s - loss:
Epoch 151/200
6728/6728 [==============================] - 3s 472us/sample - loss: 0.0329 -
accuracy: 0.9893 - loss: 0.0302 - accura
Epoch 152/200
6728/6728 [==============================] - 6s 923us/sample - loss: 0.0391 -
accuracy: 0.9891
Epoch 153/200
6728/6728 [==============================] - 6s 842us/sample - loss: 0.0222 -
accuracy: 0.9941
Epoch 154/200
6728/6728 [==============================] - 5s 777us/sample - loss: 0.0229 -
accuracy: 0.9929
Epoch 155/200
6728/6728 [==============================] - 3s 393us/sample - loss: 0.0405 -
accuracy: 0.9896
Epoch 156/200
6728/6728 [==============================] - 5s 694us/sample - loss: 0.0355 -
accuracy: 0.9890
Epoch 157/200
6728/6728 [==============================] - 5s 692us/sample - loss: 0.0293 -
accuracy: 0.9918
Epoch 158/200
6728/6728 [==============================] - 8s 1ms/sample - loss: 0.0207 -
accuracy: 0.99420s - loss: 0.021
Epoch 159/200
6728/6728 [==============================] - 3s 490us/sample - loss: 0.0352 -
accuracy: 0.9890
Epoch 160/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.0393 -
accuracy: 0.9889 - l
Epoch 161/200
6728/6728 [==============================] - 3s 377us/sample - loss: 0.0141 -
accuracy: 0.9958 - loss: 0.0
32
Epoch 162/200
6728/6728 [==============================] - 3s 421us/sample - loss: 0.0296 -
accuracy: 0.9902
Epoch 163/200
6728/6728 [==============================] - 2s 354us/sample - loss: 0.0278 -
accuracy: 0.9915 - loss: 0.0200 - accuracy: 0. - ETA: 0s - loss: 0.0249 - ac
Epoch 164/200
6728/6728 [==============================] - 6s 917us/sample - loss: 0.0437 -
accuracy: 0.9880
Epoch 165/200
6728/6728 [==============================] - 3s 509us/sample - loss: 0.0273 -
accuracy: 0.9914
Epoch 166/200
6728/6728 [==============================] - 3s 487us/sample - loss: 0.0166 -
accuracy: 0.9951
Epoch 167/200
6728/6728 [==============================] - 3s 480us/sample - loss: 0.0273 -
accuracy: 0.9911
Epoch 168/200
6728/6728 [==============================] - 3s 468us/sample - loss: 0.0303 -
accuracy: 0.9902
Epoch 169/200
6728/6728 [==============================] - 3s 497us/sample - loss: 0.0243 -
accuracy: 0.9923
Epoch 170/200
6728/6728 [==============================] - 3s 446us/sample - loss: 0.0326 -
accuracy: 0.9899
Epoch 171/200
6728/6728 [==============================] - 3s 510us/sample - loss: 0.0355 -
accuracy: 0.9902
Epoch 172/200
6728/6728 [==============================] - 4s 589us/sample - loss: 0.0270 -
accuracy: 0.9917
Epoch 173/200
6728/6728 [==============================] - 4s 522us/sample - loss: 0.0231 -
accuracy: 0.9932
Epoch 174/200
6728/6728 [==============================] - 4s 629us/sample - loss: 0.0221 -
accuracy: 0.9939
Epoch 175/200
6728/6728 [==============================] - 3s 470us/sample - loss: 0.0263 -
accuracy: 0.9924
Epoch 176/200
6728/6728 [==============================] - 3s 499us/sample - loss: 0.0223 -
accuracy: 0.9938
Epoch 177/200
6728/6728 [==============================] - 5s 672us/sample - loss: 0.0689 -
accuracy: 0.9884
33
Epoch 178/200
6728/6728 [==============================] - 3s 431us/sample - loss: 0.0357 -
accuracy: 0.9893
Epoch 179/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0160 -
accuracy: 0.9975 - loss: 0.0167 - accura
Epoch 180/200
6728/6728 [==============================] - 3s 395us/sample - loss: 0.0066 -
accuracy: 0.9990
Epoch 181/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.0363 -
accuracy: 0.9899
Epoch 182/200
6728/6728 [==============================] - 3s 392us/sample - loss: 0.0403 -
accuracy: 0.9871
Epoch 183/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0154 -
accuracy: 0.9957
Epoch 184/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0225 -
accuracy: 0.9933
Epoch 185/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.0240 -
accuracy: 0.9920
Epoch 186/200
6728/6728 [==============================] - 3s 398us/sample - loss: 0.0260 -
accuracy: 0.9927
Epoch 187/200
6728/6728 [==============================] - 3s 400us/sample - loss: 0.0404 -
accuracy: 0.9896
Epoch 188/200
6728/6728 [==============================] - 3s 399us/sample - loss: 0.0164 -
accuracy: 0.9957
Epoch 189/200
6728/6728 [==============================] - 3s 407us/sample - loss: 0.0233 -
accuracy: 0.9930
Epoch 190/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0288 -
accuracy: 0.9906
Epoch 191/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0374 -
accuracy: 0.9902
Epoch 192/200
6728/6728 [==============================] - 3s 394us/sample - loss: 0.0162 -
accuracy: 0.9948
Epoch 193/200
6728/6728 [==============================] - 3s 395us/sample - loss: 0.0153 -
accuracy: 0.9957
34
Epoch 194/200
6728/6728 [==============================] - 3s 403us/sample - loss: 0.0129 -
accuracy: 0.9961
Epoch 195/200
6728/6728 [==============================] - 3s 401us/sample - loss: 0.0460 -
accuracy: 0.9857
Epoch 196/200
6728/6728 [==============================] - 3s 440us/sample - loss: 0.0276 -
accuracy: 0.9927
Epoch 197/200
6728/6728 [==============================] - 3s 389us/sample - loss: 0.0212 -
accuracy: 0.9944
Epoch 198/200
6728/6728 [==============================] - 3s 410us/sample - loss: 0.0215 -
accuracy: 0.9933
Epoch 199/200
6728/6728 [==============================] - 3s 399us/sample - loss: 0.0295 -
accuracy: 0.9912
Epoch 200/200
6728/6728 [==============================] - 3s 400us/sample - loss: 0.0173 -
accuracy: 0.9958
[56]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
nn_grid= grid.best_estimator_
35
y_pred = nn_grid.predict(X_test)
# Building model
xgb = XGBClassifier(objective='binary:logistic')
36
# Fitting the model
grid.fit(X_train, y_train)
37
[65]: # Using the best parameters from the grid-search and predicting on test feature␣
↪dataset(X_test)
xgb_grid= grid.best_estimator_
y_pred = xgb_grid.predict(X_test)
1.5 RESULTS
[73]: score_df = pd.DataFrame(
[
['Logistic Regression', logreg_grid_score, 0.6857355126300149],
['K-Nearest Neighbors', knn_grid_score, 0.6381376919266962],
['Gaussian Naïve Bayes', gb_grid_score, 0.5548291233283804],
['Support Vector Machines', svm_grid_score, 0.6701337295690937],
['Decision Tree Classifier', dtc_grid_score, 0.6505695889053987],
['Random Forest Tree Classifier', rfc_grid_score, 0.7094105993065873],
['Artificial Neural Networks', nn_grid_score, 0.6248637939574045],
['GBM - XGBoost', xgb_grid_score, 0.7132738979692917],
],
columns= ['Model', 'Test Score', 'Validation Score']
38
)
score_df['Test Score'] = score_df['Test Score']*100
score_df['Validation Score'] = score_df['Validation Score']*100
[74]: score_df
plt.ylim(20, 90)
plt.xticks(rotation=45, horizontalalignment="right")
plt.savefig('./plots/result.png')
sns.despine(fig)
39
[128]: time_df = pd.DataFrame(
[
['Logistic Regression', 1.2],
['K-Nearest Neighbors', 1.0],
['Gaussian Naïve Bayes', 0.0034],
['Support Vector Machines', 51.7],
['Decision Tree Classifier', 0.068],
['Random Forest Tree Classifier', 15.1],
['Artificial Neural Networks', 454.2],
['GBM - XGBoost', 40.8],
],
columns= ['Model', 'Training Time']
)
plt.xticks(rotation=45, horizontalalignment="right")
plt.ylabel('Training Time(in mins)')
plt.savefig('./plots/training_time.png')
sns.despine(fig)
40
41