0% found this document useful (0 votes)
13 views9 pages

Siddhesh Asati: #Group: B (ML) #Assignment: 7

code2

Uploaded by

Siddhesh Asati
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views9 pages

Siddhesh Asati: #Group: B (ML) #Assignment: 7

code2

Uploaded by

Siddhesh Asati
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

Siddhesh Asati

[1]: #GROUP: B(ML)


#ASSIGNMENT: 7

[2]: import warnings


warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score


from sklearn.metrics import precision_score, recall_score, f1_score,␣
‹→roc_auc_score

from sklearn.metrics import confusion_matrix

[3]: data = pd.read_csv("emails.csv")


data

[3]: Email No. the to ect and for of a you hou ... connevey \
0 Email 1 0 0 1 0 0 0 2 0 0 ... 0
1 Email 2 8 13 24 6 6 2 102 1 27 ... 0
2 Email 3 0 0 1 0 0 0 8 0 0 ... 0
3 Email 4 0 5 22 0 5 1 51 2 10 ... 0
4 Email 5 7 6 17 1 5 2 57 0 9 ... 0
... ... ... .. ... ... ... .. ... ... ... ... ...
5167 Email 5168 2 2 2 3 0 0 32 0 0 ... 0
5168 Email 5169 35 27 11 2 6 5 151 4 3 ... 0
5169 Email 5170 0 0 1 1 0 0 11 0 0 ... 0
5170 Email 5171 2 7 1 0 2 1 28 2 0 ... 0
5171 Email 5172 22 24 5 1 6 5 148 8 2 ... 0

jay valued lay infrastructure military allowing ff dry \


0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 1 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 1 0

1
... ... ... ... ... ... ... .. ...
5167 0 0 0 0 0 0 0 0
5168 0 0 0 0 0 0 1 0
5169 0 0 0 0 0 0 0 0
5170 0 0 0 0 0 0 1 0
5171 0 0 0 0 0 0 0 0

Prediction
0 0
1 0
2 0
3 0
4 0
... ...
5167 0
5168 0
5169 1
5170 1
5171 0

[5172 rows x 3002 columns]

[4]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5172 entries, 0 to 5171
Columns: 3002 entries, Email No. to Prediction
dtypes: int64(3001), object(1)
memory usage: 118.5+ MB
[5]: data.head()

[5]: Email No. the to ect and for of a you hou ... connevey jay \
0 Email 1 0 0 1 0 0 0 2 0 0 ... 0 0
1 Email 2 8 13 24 6 6 2 102 1 27 ... 0 0
2 Email 3 0 0 1 0 0 0 8 0 0 ... 0 0
3 Email 4 0 5 22 0 5 1 51 2 10 ... 0 0
4 Email 5 7 6 17 1 5 2 57 0 9 ... 0 0

valued lay infrastructure military allowing ff dry Prediction


0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 1 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 1 0 0

[5 rows x 3002 columns]

2
[6]: data.tail()

[6]: Email No. the to ect and for of a you hou ... connevey \
5167 Email 5168 2 2 2 3 0 0 32 0 0 ... 0
5168 Email 5169 35 27 11 2 6 5 151 4 3 ... 0
5169 Email 5170 0 0 1 1 0 0 11 0 0 ... 0
5170 Email 5171 2 7 1 0 2 1 28 2 0 ... 0
5171 Email 5172 22 24 5 1 6 5 148 8 2 ... 0

jay valued lay infrastructure military allowing ff dry \


5167 0 0 0 0 0 0 0 0
5168 0 0 0 0 0 0 1 0
5169 0 0 0 0 0 0 0 0
5170 0 0 0 0 0 0 1 0
5171 0 0 0 0 0 0 0 0

Prediction
5167 0
5168 0
5169 1
5170 1
5171 0

[5 rows x 3002 columns]

[7]: data.shape

[7]: (5172, 3002)

[8]: data.describe()

[8]: the to ect and for \


count 5172.000000 5172.000000 5172.000000 5172.000000 5172.000000
mean 6.640565 6.188128 5.143852 3.075599 3.124710
std 11.745009 9.534576 14.101142 6.045970 4.680522
min 0.000000 0.000000 1.000000 0.000000 0.000000
25% 0.000000 1.000000 1.000000 0.000000 1.000000
50% 3.000000 3.000000 1.000000 1.000000 2.000000
75% 8.000000 7.000000 4.000000 3.000000 4.000000
max 210.000000 132.000000 344.000000 89.000000 47.000000

of a you hou in ... \


count 5172.000000 5172.000000 5172.000000 5172.000000 5172.000000 ...
mean 2.627030 55.517401 2.466551 2.024362 10.600155 ...
std 6.229845 87.574172 4.314444 6.967878 19.281892 ...
min 0.000000 0.000000 0.000000 0.000000 0.000000 ...
25% 0.000000 12.000000 0.000000 0.000000 1.000000 ...

3
50% 1.000000 28.000000 1.000000 0.000000 5.000000 ...
75% 2.000000 62.250000 3.000000 1.000000 12.000000 ...
max 77.000000 1898.000000 70.000000 167.000000 223.000000 ...

connevey jay valued lay infrastructure \


count 5172.000000 5172.000000 5172.000000 5172.000000 5172.000000
mean 0.005027 0.012568 0.010634 0.098028 0.004254
std 0.105788 0.199682 0.116693 0.569532 0.096252
min 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000 0.000000
max 4.000000 7.000000 2.000000 12.000000 3.000000

military allowing ff dry Prediction


count 5172.000000 5172.000000 5172.000000 5172.000000 5172.000000
mean 0.006574 0.004060 0.914733 0.006961 0.290023
std 0.138908 0.072145 2.780203 0.098086 0.453817
min 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 1.000000 0.000000 1.000000
max 4.000000 3.000000 114.000000 4.000000 1.000000

[8 rows x 3001 columns]

[9]: data.dtypes

[9]: Email No. object


the int64
to int64
ect int64
and int64
...
military int64
allowing int64
ff int64
dry int64
Prediction int64
Length: 3002, dtype: object

[10]: data.isnull().sum()

[10]: Email No. 0


the 0
to 0
ect 0

4
and 0
..
military 0
allowing 0
ff 0
dry 0
Prediction 0
Length: 3002, dtype: int64

[11]: #For null column


null_counts = data.isnull().sum()
columns_with_nulls = null_counts[null_counts >= 1]
columns_with_nulls

[11]: Series([], dtype: int64)

[12]: # Remove Email no. column


x = data.iloc[:,1:3001]
x.head()

[12]: the to ect and for of a you hou in ... enhancements connevey \
0 0 0 1 0 0 0 2 0 0 0 ... 0 0
1 8 13 24 6 6 2 102 1 27 18 ... 0 0
2 0 0 1 0 0 0 8 0 0 4 ... 0 0
3 0 5 22 0 5 1 51 2 10 1 ... 0 0
4 7 6 17 1 5 2 57 0 9 3 ... 0 0

jay valued lay infrastructure military allowing ff dry


0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 1 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 1 0

[5 rows x 3000 columns]

[13]: # Output Class


y = data.iloc[:,-1]
y.head()

[13]: 0 0
1 0
2 0
3 0
4 0
Name: Prediction, dtype: int64

5
[14]: from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2␣
‹→,random_state=12)

x_test

[14]: the to ect and for of a you hou in ... enhancements \


4075 2 1 3 0 1 2 13 1 0 3 ... 0
4835 10 7 3 8 3 5 99 0 0 14 ... 0
4439 6 5 4 2 0 9 57 3 1 0 ... 0
3910 4 5 5 1 3 2 44 0 2 5 ... 0
2398 0 1 1 0 4 0 14 0 0 6 ... 0
... ... .. ... ... ... .. ... ... ... .. ... ...
4367 18 11 16 7 5 3 126 1 14 29 ... 0
2513 0 4 1 2 0 0 27 0 1 6 ... 0
1662 2 4 2 3 1 1 104 0 0 18 ... 0
3810 7 2 1 2 1 1 26 0 1 3 ... 0
570 9 31 7 14 4 3 519 1 2 69 ... 0

connevey jay valued lay infrastructure military allowing ff dry


4075 0 0 0 0 0 0 0 0 0
4835 0 0 0 1 0 0 0 6 0
4439 0 0 0 0 0 0 0 0 0
3910 0 0 0 0 0 0 0 0 0
2398 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... .. ...
4367 0 0 0 0 0 0 0 0 0
2513 0 0 0 0 0 0 0 0 0
1662 0 0 0 1 0 0 0 3 0
3810 0 0 0 0 0 0 0 0 0
570 0 0 0 1 0 0 0 8 1

[1035 rows x 3000 columns]

[15]: from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

[16]: #K-NN classifier


from sklearn.neighbors import KNeighborsClassifier
knn= KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )
knn.fit(x_train, y_train)

[16]: KNeighborsClassifier()

[17]: y_pred_knn = knn.predict(x_test)


y_pred_knn

6
[17]: array([1, 0, 0, ..., 1, 1, 1], dtype=int64)

[18]: cm = confusion_matrix(y_test, y_pred_knn)


cm

[18]: array([[586, 113],


[ 24, 312]], dtype=int64)

[19]: #Confusion matrix


sns.heatmap(cm, annot=True, fmt='.2f', cmap='Reds')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

[20]: #Accuracy Score


accuracy_score(y_test, y_pred_knn)

[20]: 0.8676328502415459

7
[21]: #Support Vector Machine
from sklearn.svm import SVC
svc = SVC(kernel='linear', random_state=0)
svc.fit(x_train, y_train)

[21]: SVC(kernel='linear', random_state=0)

[22]: y_pred_svc = svc.predict(x_test)


y_pred_svc

[22]: array([0, 0, 0, ..., 1, 0, 1], dtype=int64)

[23]: cm = confusion_matrix(y_test, y_pred_svc)


cm

[23]: array([[685, 14],


[ 44, 292]], dtype=int64)

[24]: # Confusion matrix


sns.heatmap(cm, annot=True, fmt='.2f', cmap='Reds')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

8
[25]: #Accuracy score
accuracy_score(y_test, y_pred_svc)

[25]: 0.9439613526570049

[ ]:

You might also like