0% found this document useful (0 votes)
25 views19 pages

ML1 3 Merged

Ml

Uploaded by

VIGNESH T V
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
25 views19 pages

ML1 3 Merged

Ml

Uploaded by

VIGNESH T V
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 19

1.

Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis
based on a given set of training data samples. Read the training data from a.CSV file

import csv

a = []

with open('/home/cit/Downloads/enjoysport.csv', 'r') as csvfile:


for row in csv.reader(csvfile):
a.append(row)
print(a)

print("\n The total number of training instances are : ",len(a))

num_attribute = len(a[0])-1

print("\n The initial hypothesis is : ")


hypothesis = ['0']*num_attribute
print(hypothesis)

for i in range(0, len(a)):


if a[i][num_attribute] == 'yes':
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("\n The hypothesis for the training instance {} is : \n" .format(i+1),hypothesis)

print("\n The Maximally specific hypothesis for the training instance is ")
print(hypothesis)

output

[['sky', 'airtemp', 'humidity', 'wind', 'water', 'forcast', 'enjoysport'], ['sunny', 'warm', 'normal', 'strong', 'warm', 'same',
'yes'], ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'], ['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no'],
['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']]

The total number of training instances are : 5

The initial hypothesis is :


['0', '0', '0', '0', '0', '0']

The hypothesis for the training instance 1 is :


['0', '0', '0', '0', '0', '0']

The hypothesis for the training instance 2 is :


['sunny', 'warm', 'normal', 'strong', 'warm', 'same']

The hypothesis for the training instance 3 is :


['sunny', 'warm', '?', 'strong', 'warm', 'same']

The hypothesis for the training instance 4 is :


['sunny', 'warm', '?', 'strong', 'warm', 'same']

The hypothesis for the training instance 5 is :


['sunny', 'warm', '?', 'strong', '?', '?']

The Maximally specific hypothesis for the training instance is


['sunny', 'warm', '?', 'strong', '?', '?']

enjoysport.csv

sky airtemp humidity wind water forcast enjoysport


sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes

2. For a given set of training data examples stored in a .CSV file, implement and demonstrate
the Candidate-Elimination algorithm to output a description of the set of all hypotheses
consistent with the training examples

import csv

with open("trainingexamples.csv") as f:
csv_file = csv.reader(f)
data = list(csv_file)

specific = data[1][:-1]
general = [['?' for i in range(len(specific))] for j in range(len(specific))]

for i in data:
if i[-1] == "Yes":
for j in range(len(specific)):
if i[j] != specific[j]:
specific[j] = "?"
general[j][j] = "?"

elif i[-1] == "No":


for j in range(len(specific)):
if i[j] != specific[j]:
general[j][j] = specific[j]
else:
general[j][j] = "?"

print("\nStep " + str(data.index(i)+1) + " of Candidate Elimination Algorithm")


print(specific)
print(general)

gh = [] # gh = general Hypothesis
for i in general:
for j in i:
if j != '?':
gh.append(i)
break
print("\nFinal Specific hypothesis:\n", specific)
print("\nFinal General hypothesis:\n", gh)

output

Step 1 of Candidate Elimination Algorithm


['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]

Step 2 of Candidate Elimination Algorithm


['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]

Step 3 of Candidate Elimination Algorithm


['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]

Step 4 of Candidate Elimination Algorithm


['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', 'Same']]

Step 5 of Candidate Elimination Algorithm


['Sunny', 'Warm', '?', 'Strong', '?', '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?']]

Final Specific hypothesis:


['Sunny', 'Warm', '?', 'Strong', '?', '?']

Final General hypothesis:


[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]

trainingexamples.csv

sky airtemp humidity wind


water forcast enjoysport
War
Sunny Warm Normal Strong m Same Yes
War
Sunny Warm High Strong m Same Yes
War Chang
Rainy Cold High Strong m e No
Chang
Sunny Warm High Strong Cool e Yes
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use
an appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.

import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers

class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""

def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))

counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1

for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic

def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0

counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)

sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums

def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)

total_entropy=entropy([row[-1] for row in data])


for x in range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in dic[attr[x]]])
total_entropy-=ratio[x]*entropies[x]
return total_entropy

def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node

n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]

attr,dic=subtables(data,split,delete=True)

for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child))
return node

def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return

print(" "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)

def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)

'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)

print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3.csv")

for xtest in testdata:


print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)

output
The decision tree for the dataset using ID3 algorithm is
Outlook
overcast
yes
sunny
Humidity
high
no
normal
yes
rain
Wind
strong
no
weak
yes
The test instance: ['sunny', 'hot', 'high', 'weak', 'no']
The label for test instance: no
The test instance: ['sunny', 'hot', 'high', 'strong', 'no']
The label for test instance: no
The test instance: ['overcast', 'hot', 'high', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'high', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'cool', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'cool', 'normal', 'strong', 'no']
The label for test instance: no
The test instance: ['overcast', 'cool', 'normal', 'strong', 'yes']
The label for test instance: yes
The test instance: ['sunny', 'mild', 'high', 'weak', 'no']
The label for test instance: no
The test instance: ['sunny', 'cool', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['sunny', 'mild', 'normal', 'strong', 'yes']
The label for test instance: yes
The test instance: ['overcast', 'mild', 'high', 'strong', 'yes']
The label for test instance: yes
The test instance: ['overcast', 'hot', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'high', 'strong', 'no']
The label for test instance: no

id3.csv

Outlook Temperatur Humidity Wind Answer


e
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rain mild high strong no
4.Build an Artificial Neural Network by implementing the Backpropagation algorithm and test
the same using appropriate data sets.

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer

#weight and bias initialization


wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))

#draws a random range of numbers uniformly of dim x*y


for i in range(epoch):

#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)

#how much hidden layer wts contributed to error


hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad

# dotproduct of nextlayererror and currentlayerop


wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

output

Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.89571283]
[0.88239245]
[0.89153673]]

5.Write a Program to implement the naive bayesian classifier for a sample training data set
stored as a .CSV file. Compute the accuracy of the classifier few test data sets.
# import necessary libraries
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

# Load Data from CSV


data = pd.read_csv('tennisdata.csv')
print("The first 5 Values of data is :\n", data.head())
# obtain train data and train output
X = data.iloc[:, :-1]
print("\nThe First 5 values of the train data is\n", X.head())
y = data.iloc[:, -1]
print("\nThe First 5 values of train output is\n", y.head())
# convert them in numbers
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)

le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)

le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)

le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train output is\n", X.head())


le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.20)

classifier = GaussianNB()
classifier.fit(X_train, y_train)

from sklearn.metrics import accuracy_score


print("Accuracy is:", accuracy_score(classifier.predict(X_test), y_test))

output

The first 5 Values of data is :


Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High Weak No
1 Sunny Hot High Strong No
2 Overcast Hot High Weak Yes
3 Rain Mild High Weak Yes
4 Rain Cool Normal Weak Yes

The First 5 values of the train data is


Outlook Temperature Humidity Windy
0 Sunny Hot High Weak
1 Sunny Hot High Strong
2 Overcast Hot High Weak
3 Rain Mild High Weak
4 Rain Cool Normal Weak

The First 5 values of train output is


0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object

Now the Train output is


Outlook Temperature Humidity Windy
0 2 1 0 1
1 2 1 0 0
2 0 1 0 1
3 1 2 0 1
4 1 0 1 1

Now the Train output is


[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 0.3333333333333333

tennisdata.csv

PlayTenni
Outlook Temperature Humidity Windy s
Sunny Hot High Weak No
Sunny Hot High Strong No
Overcast Hot High Weak Yes
Rain Mild High Weak Yes
Rain Cool Normal Weak Yes
Rain Cool Normal Strong No
Overcast Cool Normal Strong Yes
Sunny Mild High Weak No
Sunny Cool Normal Weak Yes
Rain Mild Normal Weak Yes
Sunny Mild Normal Strong Yes
Overcast Mild High Strong Yes
Overcast Hot Normal Weak Yes
Rain Mild High Strong No
6.Write a program to construct a Bayesian network considering medical data. Use this model
to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can
use Python ML library classes/API.
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?',np.nan)

print('Sample instances from the dataset are given below')


print(heartDisease.head())

print('\n Attributes and datatypes')


print(heartDisease.dtypes)

model=
BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),
('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')


HeartDiseasetest_infer = VariableElimination(model)

print('\n 1. Probability of HeartDisease given evidence= restecg')


q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)

print('\n 2. Probability of HeartDisease given evidence= cp ')


q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)

output
Sample instances from the dataset are given below
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \
0 63 1 1 145 233 1 2 150 0 2.3 3
1 67 1 4 160 286 0 2 108 1 1.5 2
2 67 1 4 120 229 0 2 129 1 2.6 2
3 37 1 3 130 250 0 0 187 0 3.5 3
4 41 0 2 130 204 0 2 172 0 1.4 1

ca thal heartdisease
0 0 6 0
1 3 3 2
2 2 7 1
3 0 3 0
4 0 3 0

Attributes and datatypes


age int64
sex int64
cp int64
trestbps int64
chol int64
fbs int64
restecg int64
thalach int64
exang int64
oldpeak float64
slope int64
ca object
thal object
heartdisease int64
dtype: object

Learning CPD using Maximum likelihood estimators

Inferencing with Bayesian Network:

1. Probability of HeartDisease given evidence= restecg


+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.1016 |
+-----------------+---------------------+
| heartdisease(1) | 0.0000 |
+-----------------+---------------------+
| heartdisease(2) | 0.2361 |
+-----------------+---------------------+
| heartdisease(3) | 0.2017 |
+-----------------+---------------------+
| heartdisease(4) | 0.4605 |
+-----------------+---------------------+

2. Probability of HeartDisease given evidence= cp


+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.3742 |
+-----------------+---------------------+
| heartdisease(1) | 0.2018 |
+-----------------+---------------------+
| heartdisease(2) | 0.1375 |
+-----------------+---------------------+
| heartdisease(3) | 0.1541 |
+-----------------+---------------------+
| heartdisease(4) | 0.1323 |
+-----------------+---------------------+

7.Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment
on the quality of clustering. You can add Python ML library classes/API in the program.
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np

iris = datasets.load_iris()

X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']

y = pd.DataFrame(iris.target)
y.columns = ['Targets']

model = KMeans(n_clusters=3)
model.fit(X)

plt.figure(figsize=(14,7))

colormap = np.array(['red', 'lime', 'black'])

# Plot the Original Classifications


plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# Plot the Models Classifications


plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K Mean Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))

from sklearn import preprocessing


scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
#xs.sample(5)

from sklearn.mixture import GaussianMixture


gmm = GaussianMixture(n_components=3)
gmm.fit(xs)

y_gmm = gmm.predict(xs)
#y_cluster_gmm

plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))


print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))

output

The accuracy score of K-Mean: 0.24


The Confusion matrixof K-Mean: [[ 0 50 0]
[48 0 2]
[14 0 36]]
The accuracy score of EM: 0.3333333333333333
The Confusion matrix of EM: [[ 0 50 0]
[45 0 5]
[ 0 0 50]]
8.Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.
Print both correct and wrong predictions. Java/Python ML library classes can be used for this
problem.

from sklearn.datasets import load_iris


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
iris_dataset=load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris_dataset["data"], iris_dataset["target"],
random_state=0)
kn = KNeighborsClassifier()
kn.fit(X_train, y_train)
prediction = kn.predict(X_test)
print("ACCURACY:"+str(kn.score(X_test, y_test)))
target_names = iris_dataset.target_names
for pred,actual in zip(prediction,y_test):
print("prediction is "+str(target_names[pred])+",actual is"+str(target_names[actual]))

output
ACCURACY:0.9736842105263158
prediction is virginica,actual isvirginica
prediction is versicolor,actual isversicolor
prediction is setosa,actual issetosa
prediction is virginica,actual isvirginica
prediction is setosa,actual issetosa
prediction is virginica,actual isvirginica

.....................................

10.implement and demonstrate the working of svm algorithm for classification.

# Step 1: Import necessary libraries


import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Step 2: Prepare the data


# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the SVM model


svm_model = SVC(kernel='linear', random_state=42) # Linear SVM
svm_model.fit(X_train, y_train)

# Step 4: Evaluate the model


# Make predictions on the test set
y_pred = svm_model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

output
Accuracy: 0.87
Classification Report:
precision recall f1-score support

0 0.83 0.91 0.87 93


1 0.92 0.83 0.87 107

accuracy 0.87 200


macro avg 0.87 0.87 0.87 200
weighted avg 0.87 0.87 0.87 200
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def kernel(point, xmat, k):


m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point, xmat, ymat, k):


wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat, ymat, k):


m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

data = pd.read_csv('dataset-09.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)

mbill = np.mat(bill)
mtip = np.mat(tip)

m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T))

ypred = localWeightRegression(X,mtip,0.5)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();

csv
total_bill tip
50 12
30 7.5
6013
40 8.5
65 15
20 6
80 18

You might also like