Loading The Dataset: Import As Import As Import As Import As From Import From Import From Import From Import From Import
Loading The Dataset: Import As Import As Import As Import As From Import From Import From Import From Import From Import
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn import preprocessing
In [2]: df = pd.read_csv('emails.csv')
In [3]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5172 entries, 0 to 5171
Columns: 3002 entries, Email No. to Prediction
dtypes: int64(3001), object(1)
memory usage: 118.5+ MB
In [4]: df.head()
Out[4]:
Email
the to ect and for of a you hou ... connevey jay valued lay infrastructure military
No.
Email
0 0 0 1 0 0 0 2 0 0 ... 0 0 0 0 0
1
Email
1 8 13 24 6 6 2 102 1 27 ... 0 0 0 0 0
2
Email
2 0 0 1 0 0 0 8 0 0 ... 0 0 0 0 0
3
Email
3 0 5 22 0 5 1 51 2 10 ... 0 0 0 0 0
4
Email
4 7 6 17 1 5 2 57 0 9 ... 0 0 0 0 0
5
Cleaning
In [6]: df.drop(columns=['Email No.'], inplace=True)
In [7]: df.isna().sum()
Out[7]: the 0
to 0
ect 0
and 0
for 0
..
military 0
allowing 0
ff 0
dry 0
Prediction 0
Length: 3001, dtype: int64
In [8]: df.describe()
Out[8]:
the to ect and for of a
1. K-Nearest Neighbors
2. Linear SVM
3. Polynomial SVM
4. RBF SVM
5. Sigmoid SVM
In [11]: models = {
"K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=2),
"Linear SVM":LinearSVC(random_state=8, max_iter=900000),
"Polynomical SVM":SVC(kernel="poly", degree=2, random_state=8),
"RBF SVM":SVC(kernel="rbf", random_state=8),
"Sigmoid SVM":SVC(kernel="sigmoid", random_state=8)
}