Let's get familar with using SVMs, Random Forests and
KNN Classifiers in Python with sklearn
import numpy as np
import pandas as pd
# Load data
file_name = "/content/heights_weights.csv"
df = pd.read_csv(file_name)
df.head()
Height Weight Male
0 73.847017 241.893563 1
1 68.781904 162.310473 1
2 74.110105 212.740856 1
3 71.730978 220.042470 1
4 69.881796 206.349801 1
# Plotting our data
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
ax = sns.scatterplot(x="Height", y="Weight", hue="Male", data=df)
Get our data ready and prepared for training in sklearn
# Extract the columns we'll use for our data
x = df.iloc[:,0:2].values
y = df.iloc[:,2].values
# Split data into our test and training datasets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3, random_state=0
y_pred = regressor.predict(X_test)
# Import our model and performance assessement classes from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
Logi:stic Regression Performance
# Fit (train) the Logistic Regression classifier
logreg_clf = LogisticRegression()
logreg_model = logreg_clf.fit(X_train, Y_train)
logreg_prediction = logreg_clf.predict(X_test)
print("Accuracy {0:.2f}%".format(100*accuracy_score(logreg_prediction, Y_test)))
# Display the Confusion Matrix and Classification Report
print(confusion_matrix(logreg_prediction, Y_test))
print(classification_report(logreg_prediction, Y_test))
Accuracy 91.87%
[[1385 140]
[ 104 1371]]
precision recall f1-score support
0 0.93 0.91 0.92 1525
1 0.91 0.93 0.92 1475
accuracy 0.92 3000
macro avg 0.92 0.92 0.92 3000
weighted avg 0.92 0.92 0.92 3000
Random Forest Performance
# Fit (train) the Random Forest classifier
ranfor_clf = RandomForestClassifier()
ranfor_model = ranfor_clf.fit(X_train, Y_train)
ranfor_prediction = ranfor_clf.predict(X_test)
print("Accuracy {0:.2f}%".format(100*accuracy_score(ranfor_prediction, Y_test)))
# Display the Confusion Matrix and Classification Report
print(confusion_matrix(ranfor_prediction, Y_test))
print(classification_report(ranfor_prediction, Y_test))
Accuracy 90.23%
[[1373 177]
[ 116 1334]]
precision recall f1-score support
0 0.92 0.89 0.90 1550
1 0.88 0.92 0.90 1450
accuracy 0.90 3000
macro avg 0.90 0.90 0.90 3000
weighted avg 0.90 0.90 0.90 3000
Support Vector Machine Performance
# Fit (train) the Support Vector Machine classifier
svm_clf = SVC()
svm_model = svm_clf.fit(X_train, Y_train)
svm_prediction = svm_clf.predict(X_test)
print("Accuracy {0:.2f}%".format(100*accuracy_score(svm_prediction, Y_test)))
# Display the Confusion Matrix and Classification Report
print(confusion_matrix(svm_prediction, Y_test))
print(classification_report(svm_prediction, Y_test))
Accuracy 91.40%
[[1382 151]
[ 107 1360]]
precision recall f1-score support
0 0.93 0.90 0.91 1533
1 0.90 0.93 0.91 1467
accuracy 0.91 3000
macro avg 0.91 0.91 0.91 3000
weighted avg 0.91 0.91 0.91 3000
KNN Classifier Performance
# Fit (train) the KNN classifier
knn_clf = KNeighborsClassifier()
knn_model = knn_clf.fit(X_train, Y_train)
knn_prediction = knn_clf.predict(X_test)
print("Accuracy {0:.2f}%".format(100*accuracy_score(knn_prediction, Y_test)))
# Display the Confusion Matrix and Classification Report
print(confusion_matrix(knn_prediction, Y_test))
print(classification_report(knn_prediction, Y_test))
Accuracy 90.27%
[[1367 170]
[ 122 1341]]
precision recall f1-score support
0 0.92 0.89 0.90 1537
1 0.89 0.92 0.90 1463
accuracy 0.90 3000
macro avg 0.90 0.90 0.90 3000
weighted avg 0.90 0.90 0.90 3000