0% found this document useful (0 votes)
21 views8 pages

Practical 6

The document presents a practical implementation of the Gaussian Naive Bayes algorithm using the Iris dataset. It includes steps for data loading, preprocessing, model training, prediction, and evaluation, resulting in accuracy metrics and confusion matrices for both training and test sets. Additionally, it provides visualizations of the decision boundaries for the Naive Bayes classifier.

Uploaded by

Dhruv Desai
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views8 pages

Practical 6

The document presents a practical implementation of the Gaussian Naive Bayes algorithm using the Iris dataset. It includes steps for data loading, preprocessing, model training, prediction, and evaluation, resulting in accuracy metrics and confusion matrices for both training and test sets. Additionally, it provides visualizations of the decision boundaries for the Naive Bayes classifier.

Uploaded by

Dhruv Desai
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

Practical 6

Ans 1: INPUT:
# Import required libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import confusion_matrix, classification_report

from matplotlib.colors import ListedColormap

# Load the Iris dataset

iris = load_iris()

dataset = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])

x = dataset.iloc[:, [0, 1]].values # Selecting first two features for simplicity

y = dataset.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

# Feature Scaling

sc = StandardScaler()

x_train = sc.fit_transform(x_train)

x_test = sc.transform(x_test)

# Fitting Naive Bayes to the Training set

classifier = GaussianNB()

classifier.fit(x_train, y_train)

# Predicting the Test set results

y_pred = classifier.predict(x_test)
# Accuracy

from sklearn import metrics

print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100)

y_pred_train = classifier.predict(x_train)

print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_train, y_pred_train)*100)

# Confusion matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

# Classification report

print(classification_report(y_test, y_pred))

# Plotting

fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# Test set

x_set, y_set = x_test, y_test

X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),

np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))

ax[0].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75,


cmap=ListedColormap(('purple', 'green', 'red')))

ax[0].set_xlim(X1.min(), X1.max())

ax[0].set_ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

ax[0].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1], color=ListedColormap(('purple', 'green','red'))(i),


label=j)

ax[0].set_title('Naive Bayes (Test set)')

ax[0].set_xlabel('Sepal Length (cm)')


ax[0].set_ylabel('Sepal Width (cm)')

ax[0].legend()

# Training set

x_set, y_set = x_train, y_train

X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),

np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))

ax[1].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75,


cmap=ListedColormap(('purple', 'green', 'red')))

ax[1].set_xlim(X1.min(), X1.max())

ax[1].set_ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

ax[1].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1], color=ListedColormap(('purple', 'green', 'red'))(i),


label=j)

ax[1].set_title('Naive Bayes (Training set)')

ax[1].set_xlabel('Sepal Length (cm)')

ax[1].set_ylabel('Sepal Width (cm)')

ax[1].legend()

plt.show()

OUTPUT:
Gaussian Naive Bayes model accuracy(in %): 90.0

Gaussian Naive Bayes model accuracy(in %): 88.33333333333333

Confusion matrix

[[65 3]

[ 7 25]]

True Positives(TP) = 65

True Negatives(TN) = 25

False Positives(FP) = 3

False Negatives(FN) = 7

precision recall f1-score support

0 0.90 0.96 0.93 68

1 0.89 0.78 0.83 32

accuracy 0.90 100


macro avg 0.90 0.87 0.88 100

weighted avg 0.90 0.90 0.90 100

d:\sem 7\fai\practical\pf2\untitled0.py:60: UserWarning: *c* argument looks like a single numeric RGB or
RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches
with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you
intend to specify the same RGB or RGBA value for all points.

mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],

d:\sem 7\fai\practical\pf2\untitled0.py:76: UserWarning: *c* argument looks like a single numeric RGB or
RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches
with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you
intend to specify the same RGB or RGBA value for all points.

mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],

This version of python seems to be incorrectly compiled

(internal generated filenames are not absolute).

This may make the debugger miss breakpoints.


Related bug: http://bugs.python.org/issue1666807

Practical : 7
ANS 1: INPUT
# Import required libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import confusion_matrix, classification_report

from matplotlib.colors import ListedColormap

# Load the Iris dataset

iris = load_iris()

dataset = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])

x = dataset.iloc[:, [0, 1]].values # Selecting first two features for simplicity

y = dataset.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

# Feature Scaling

sc = StandardScaler()

x_train = sc.fit_transform(x_train)

x_test = sc.transform(x_test)

# Fitting Naive Bayes to the Training set

classifier = GaussianNB()

classifier.fit(x_train, y_train)

# Predicting the Test set results


y_pred = classifier.predict(x_test)

# Accuracy

from sklearn import metrics

print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100)

y_pred_train = classifier.predict(x_train)

print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_train, y_pred_train)*100)

# Confusion matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

# Classification report

print(classification_report(y_test, y_pred))

# Plotting

fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# Test set

x_set, y_set = x_test, y_test

X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),

np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))

ax[0].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75,


cmap=ListedColormap(('purple', 'green', 'red')))

ax[0].set_xlim(X1.min(), X1.max())

ax[0].set_ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

ax[0].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1], color=ListedColormap(('purple', 'green','red'))(i),


label=j)

ax[0].set_title('Naive Bayes (Test set)')

ax[0].set_xlabel('Sepal Length (cm)')


ax[0].set_ylabel('Sepal Width (cm)')

ax[0].legend()

# Training set

x_set, y_set = x_train, y_train

X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),

np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))

ax[1].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75,


cmap=ListedColormap(('purple', 'green', 'red')))

ax[1].set_xlim(X1.min(), X1.max())

ax[1].set_ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(y_set)):

ax[1].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1], color=ListedColormap(('purple', 'green', 'red'))(i),


label=j)

ax[1].set_title('Naive Bayes (Training set)')

ax[1].set_xlabel('Sepal Length (cm)')

ax[1].set_ylabel('Sepal Width (cm)')

ax[1].legend()

plt.show()

OUTPUT:
Gaussian Naive Bayes model accuracy(in %): 76.31578947368422

Gaussian Naive Bayes model accuracy(in %): 81.25

Confusion matrix

[[13 0 0]

[ 0 12 4]

[ 0 5 4]]

True Positives(TP) = 13

True Negatives(TN) = 12

False Positives(FP) = 0

False Negatives(FN) = 0

precision recall f1-score support

0.0 1.00 1.00 1.00 13


1.0 0.71 0.75 0.73 16

2.0 0.50 0.44 0.47 9

accuracy 0.76 38

macro avg 0.74 0.73 0.73 38

weighted avg 0.76 0.76 0.76 38

You might also like