0% found this document useful (0 votes)

89 views28 pages

ML Lab Manual

The program demonstrates the ID3 decision tree algorithm. It loads a training dataset from a CSV file, builds a decision tree on the dataset, and prints the tree. It then loads a test dataset, classifies each test sample using the decision tree, and prints the predicted label.

Uploaded by

Sharan Patil

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

89 views28 pages

ML Lab Manual

Uploaded by

Sharan Patil

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 28

EXPERIMENT-1

Write a Program to Implement Water-Jug problem using Python.

PROGRAM:

x=0
y=0
m=4
n=3
print("Initial state = (0,0)")
print("Capacities = (4,3)")
print("Goal state = (2,y)")
while x != 2:
r = int(input("Enter rule"))
if(r == 1):
x=m
elif(r == 2):
y=n
elif(r == 3):
x=0
elif(r == 4):
y=0
elif(r == 5):
t=n-y
y=n
x -= t
elif(r == 6):
t=m-x
x=m
y -= t
elif(r == 7):
y += x
x=0
elif(r == 8):
x += y
y=0
print (x, y)
EXPERIMENT-2

Write a Program to Implement AO* Algorithm using Python.

PROGRAM:

def recAOStar(n):
global finalPath
print("Expanding Node:",n)
and_nodes = []
or_nodes =[]
if(n in allNodes):
if 'AND' in allNodes[n]:
and_nodes = allNodes[n]['AND']
if 'OR' in allNodes[n]:
or_nodes = allNodes[n]['OR']
if len(and_nodes)==0 and len(or_nodes)==0:
return

solvable = False
marked ={}

while not solvable:
if len(marked)==len(and_nodes)+len(or_nodes):
min_cost_least,min_cost_group_least = least_cost_group(and_nodes,or_nodes,{})
solvable = True
change_heuristic(n,min_cost_least)
optimal_child_group[n] = min_cost_group_least
continue
min_cost,min_cost_group = least_cost_group(and_nodes,or_nodes,marked)
is_expanded = False
if len(min_cost_group)>1:
if(min_cost_group[0] in allNodes):
is_expanded = True
recAOStar(min_cost_group[0])
if(min_cost_group[1] in allNodes):
is_expanded = True
recAOStar(min_cost_group[1])
else:
if(min_cost_group in allNodes):
is_expanded = True
recAOStar(min_cost_group)
if is_expanded:
min_cost_verify, min_cost_group_verify = least_cost_group(and_nodes, or_nodes, {})
if min_cost_group == min_cost_group_verify:
solvable = True
change_heuristic(n, min_cost_verify)
optimal_child_group[n] = min_cost_group
else:
solvable = True
change_heuristic(n, min_cost)
optimal_child_group[n] = min_cost_group
marked[min_cost_group]=1
return heuristic(n)

def least_cost_group(and_nodes, or_nodes, marked):

node_wise_cost = {}
for node_pair in and_nodes:
if not node_pair[0] + node_pair[1] in marked:
cost = 0
cost = cost + heuristic(node_pair[0]) + heuristic(node_pair[1]) + 2
node_wise_cost[node_pair[0] + node_pair[1]] = cost
for node in or_nodes:
if not node in marked:
cost = 0
cost = cost + heuristic(node) + 1
node_wise_cost[node] = cost
min_cost = 999999
min_cost_group = None
for costKey in node_wise_cost:
if node_wise_cost[costKey] < min_cost:
min_cost = node_wise_cost[costKey]
min_cost_group = costKey
return [min_cost, min_cost_group]

def heuristic(n):
return H_dist[n]

def change_heuristic(n, cost):

H_dist[n] = cost
return

def print_path(node):
print(optimal_child_group[node], end="")
node = optimal_child_group[node]
if len(node) > 1:
if node[0] in optimal_child_group:
print("->", end="")
print_path(node[0])
if node[1] in optimal_child_group:
print("->", end="")
print_path(node[1])
else:
if node in optimal_child_group:
print("->", end="")
print_path(node)
H_dist = {
'A': -1,
'B': 4,
'C': 2,
'D': 3,
'E': 6,
'F': 8,
'G': 2,
'H': 0,
'I': 0,
'J': 0
}
allNodes = {
'A': {'AND': [('C', 'D')], 'OR': ['B']},
'B': {'OR': ['E', 'F']},
'C': {'OR': ['G'], 'AND': [('H', 'I')]},
'D': {'OR': ['J']}
}
optimal_child_group = {}
optimal_cost = recAOStar('A')
print('Nodes which gives optimal cost are')
print_path('A')
print('\nOptimal Cost is :: ', optimal_cost)
EXPERIMENT-3

Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.

PROGRAM:

import csv
num_attributes = 6
a = []
print("\n The Given Training Data Set \n")
with open('enjoysport.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
a.append (row)
print(row)
print("\n The initial value of hypothesis: ")
hypothesis = ['0'] * num_attributes
print(hypothesis)

for j in range(0,num_attributes):
hypothesis[j] = a[0][j];

print("\n Find S: Finding a Maximally Specific Hypothesis\n")

for i in range(0,len(a)):
if a[i][num_attributes]=='yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print(" For Training instance No:{0} the hypothesis is
".format(i),hypothesis)
print("\n The Maximally Specific Hypothesis for a given Training
Examples :\n")
print(hypothesis)

Data Set:
EXPERIMENT-4

For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set of
all hypotheses consistent with the training examples.

PROGRAM:
import csv

with open("trainingdata.csv") as f:
csv_file=csv.reader(f)
data=list(csv_file)

s=data[1][:-1]
g=[['?' for i in range(len(s))] for j in range(len(s))]

for i in data:
if i[-1]=="Yes":
for j in range(len(s)):
if i[j]!=s[j]:
s[j]='?'
g[j][j]='?'

elif i[-1]=="No":
for j in range(len(s)):
if i[j]!=s[j]:
g[j][j]=s[j]
else:
g[j][j]="?"
print("\nSteps of Candidate Elimination Algorithm",data.index(i)+1)
print(s)
print(g)
gh=[]
for i in g:
for j in i:
if j!='?':
gh.append(i)
break
print("\nFinal specific hypothesis:\n",s)

print("\nFinal general hypothesis:\n",gh)

DATASET:
EXPERIMENT-5

Write a program to demonstrate the working of the decision tree based ID3 algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.

PROGRAM:

import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers

class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""

def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))

counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1

for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic

def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)

sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums

def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)

total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)

total_entropy=entropy([row[-1] for row in data])

for x in range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in dic[attr[x]]])
total_entropy-=ratio[x]*entropies[x]
return total_entropy

def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node

n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]

attr,dic=subtables(data,split,delete=True)

for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child))
return node

def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return

print(" "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)

def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)

'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)

print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")

for xtest in testdata:

print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)

Training Dataset:

Day Outlook Temperature Humidity Wind PlayTennis

D1 Sunny Hot High Weak No
D2 Sunny Hot High Strong No
D3 Overcast Hot High Weak Yes
D4 Rain Mild High Weak Yes
D5 Rain Cool Normal Weak Yes
D6 Rain Cool Normal Strong No
D7 Overcast Cool Normal Strong Yes
D8 Sunny Mild High Weak No
D9 Sunny Cool Normal Weak Yes
D10 Rain Mild Normal Weak Yes
D11 Sunny Mild Normal Strong Yes
D12 Overcast Mild High Strong Yes
D13 Overcast Hot Normal Weak Yes
D14 Rain Mild High Strong No

Test Dataset:

Day Outlook Temperature Humidity Wind

T1 Rain Cool Normal Strong
T2 Sunny Mild Normal Strong
EXPERIMENT-6

Build an Artificial Neural Network by implementing the Backpropagation algorithm

and test the same using appropriate data sets.

PROGRAM:

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function

def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer

#weight and bias initialization

wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) #weight of the link from input
node to hidden node
bh=np.random.uniform(size=(1,hiddenlayer_neurons)) # bias of the link from input node to hidden node
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) #weight of the link from hidden
node to output node
bout=np.random.uniform(size=(1,output_neurons)) #bias of the link from hidden node to output node

#draws a random range of numbers uniformly of dim x*y

for i in range(epoch):

#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)

#how much hidden layer weights contributed to error

hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad

# dotproduct of nextlayererror and currentlayerop

wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))

print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

Training Examples:

Example Sleep Study Expected % in Exams

1 2 9 92

2 1 5 86

3 3 6 89

Normalize the input:

Example Sleep Study Expected % in Exams

1 2/3 = 0.66666667 9/9 = 1 0.92

2 1/3 = 0.33333333 5/9 = 0.55555556 0.86

3 3/3 = 1 6/9 = 0.66666667 0.89

EXPERIMENT-7
Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test
data sets.

PROGRAM:
import csv
import random
import math

def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitdataset(dataset, splitratio):

#67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
#generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]

def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated

def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)

def summarize(dataset): #creates a dictionary of classes

summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] #excluding labels +ve or -ve
return summaries

def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean and
std
return summaries

def calculateprobability(x, mean, stdev):

exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def calculateclassprobabilities(summaries, inputvector):

probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items():#class and attribute information as mean and
sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] #take mean and sd of every attribute for class
0 and 1 seperaely
x = inputvector[i] #testvector's first attribute
probabilities[classvalue] *= calculateprobability(x, mean, stdev);#use normal
dist
return probabilities

def predict(summaries, inputvector): #training and test data is passed

probabilities = calculateclassprobabilities(summaries, inputvector)
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities.items():#assigns that class which has he highest prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel

def getpredictions(summaries, testset):

predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions

def getaccuracy(testset, predictions):

correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testset))) * 100.0

def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);

trainingset, testset = splitdataset(dataset, splitratio)

print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingset),
len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
#print(summaries)
# test model
predictions = getpredictions(summaries, testset) #find the predictions of test data with the training
data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))

main()

Data Set:

Diabetic
Exampl Pregnan BloodPre SkinThick Pedigree Outcom
es cies Glucose ssure ness Insulin BMI Function Age e

1 6 148 72 35 0 33.6 0.627 50 1

2 1 85 66 29 0 26.6 0.351 31 0

3 8 183 64 0 0 23.3 0.672 32 1

4 1 89 66 23 94 28.1 0.167 21 0

5 0 137 40 35 168 43.1 2.288 33 1

6 5 116 74 0 0 25.6 0.201 30 0

7 3 78 50 32 88 31 0.248 26 1

8 10 115 0 0 0 35.3 0.134 29 0

9 2 197 70 45 543 30.5 0.158 53 1

10 8 125 96 0 0 0 0.232 54 1

EXPERIMENT-8
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Built-in Java classes/API can be used to write the
program. Calculate the accuracy, precision, and recall for your data set.

PROGRAM:

import pandas as pd

msg=pd.read_csv('naivetext.csv',names=['message','label'])

print('The dimensions of the dataset',msg.shape)

msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)

#splitting the dataset into train and test data

from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)

print ('\n the total number of Training Data :',ytrain.shape)

print ('\n the total number of Test Data :',ytest.shape)

#output of the words or Tokens in the text documents

from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print('\n The words or Tokens in the text documents \n')
print(count_vect.get_feature_names())

df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())

# Training Naive Bayes (NB) classifier on training data.

from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)

#printing accuracy, Confusion matrix, Precision and Recall

from sklearn import metrics
print('\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))

print('\n Confusion matrix')

print(metrics.confusion_matrix(ytest,predicted))

print('\n The value of Precision', metrics.precision_score(ytest,predicted))

print('\n The value of Recall', metrics.recall_score(ytest,predicted))

Data Set:

Text Documents Label

1 I love this sandwich pos
2 This is an amazing place pos
3 I feel very good about these beers pos
4 This is my best work pos
5 What an awesome view pos
6 I do not like this restaurant neg
7 I am tired of this stuff neg
8 I can't deal with this neg
9 He is my sworn enemy neg
10 My boss is horrible neg
11 This is an awesome place pos
12 I do not like the taste of this juice neg
13 I love to dance pos
14 I am sick and tired of this place neg
15 What a great holiday pos
16 That is a bad locality to stay neg
17 We will have good fun tomorrow pos
18 I went to my enemy's house today neg

EXPERIMENT-9
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set. You can use Java/Python ML library classes/API

PROGRAM:

import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?',np.nan)

print('Sample instances from the dataset are given below')

print(heartDisease.head())

print('\n Attributes and datatypes')

print(heartDisease.dtypes)

model=BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),
('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')

HeartDiseasetest_infer = VariableElimination(model)

print('\n 1. Probability of HeartDisease given evidence= restecg')

q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':2})
print(q1)

print('\n 2. Probability of HeartDisease given evidence= cp ')

q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)

Data Set:

ag sex c trestbp chol fbs restec thalac exan oldpea slop ca thal Heartdiseas
e p s g h g k e e
6 1 1 145 233 1 2 150 0 2.3 3 0 6 0
3
6 1 4 160 286 0 2 108 1 1.5 2 3 3 2
7
6 1 4 120 229 0 2 129 1 2.6 2 2 7 1
7
4 0 2 130 204 0 2 172 0 1.4 1 0 3 0
1
6 0 4 140 268 0 2 160 0 3.6 3 2 3 3
2
6 1 4 130 206 0 2 132 1 2.4 2 2 7 4
0
EXPERIMENT-10

Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set
for clustering using k-Means algorithm. Compare the results of these two algorithms
and comment on the quality of clustering. You can add Java/Python ML library
classes/API in the program.

PROGRAM:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np

iris = datasets.load_iris()
dataset = pd.read_csv("iris.csv")
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']

y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X)
plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])

# Plot the Original Classifications

plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
# Plot the Models Classifications
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K Mean Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))

from sklearn import preprocessing

scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
#xs.sample(5)

from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=3)
gmm.fit(xs)

y_gmm = gmm.predict(xs)
#y_cluster_gmm

plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

plt.show()
print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))
EXPERIMENT-11

Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions. Java/Python ML library classes can be
used for this problem.

PROGRAM:

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
""" Iris Plants Dataset, dataset contains 150 (50 in each of three
classes)Number of Attributes: 4 numeric, predictive attributes and
the Class"""
iris=datasets.load_iris()
""" The x variable contains the first four columns of the dataset
(i.e. attributes) while y contains the labels."""
x = iris.data
y = iris.target
print ('sepal-length', 'sepal-width', 'petal-length', 'petal-width')
print(x)
print('class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica')
print(y)
""" Splits the dataset into 70% train data and 30% test data. This
means that out of total 150 records, the training set will contain
105 records and the test set contains 45 of those records"""
x_train, x_test, y_train, y_test =train_test_split(x,y,test_size=0.3)
#To Training the model and Nearest nighbors K=5
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train, y_train)
#to make predictions on our test data
y_pred=classifier.predict(x_test)
""" For evaluating an algorithm, confusion matrix, precision, recall
and f1 score are the most commonly used metrics."""
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))

Data Set:

Iris Plants Dataset: Dataset contains 150 instances (50 in each of three classes) Number
of Attributes: 4 numeric, predictive attributes and the Class
EXPERIMENT-12

Implement the non-parametric Locally Weighted Regression algorithm in order to fit

data points. Select appropriate data set for your experiment and draw graphs.

PROGRAM:

import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.io import push_notebook
def local_regression(x0, X, Y, tau):# add bias term
x0 = np.r_[1, x0] # Add one to avoid the loss in information
X = np.c_[np.ones(len(X)), X]
# fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = np.linalg.pinv(xw @ X) @ xw @ Y
#@ Matrix Multiplication or Dot Product
# predict value
return x0 @ beta # @ Matrix Multiplication or Dot Product for prediction
def radial_kernel(x0, X, tau):
return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Weight or Radial Kernal Bias Function
n = 1000
# generate dataset
X = np.linspace(-3, 3, num=n)
print("The Data Set ( 10 Samples) X :\n",X[1:10])
Y = np.log(np.abs(X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y:\n",Y[1:10])
# jitter X
X += np.random.normal(scale=.1, size=n)
print("Normalised (10 Samples) X :\n",X[1:10])
domain = np.linspace(-3, 3, num=300)
print(" Xo Domain Space(10 Samples) :\n",domain[1:10])
def plot_lwr(tau):
# prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(plot_width=400, plot_height=400)
plot.title.text='tau=%g' % tau
plot.scatter(X, Y, alpha=.3)
plot.line(domain, prediction, line_width=2, color='red')
return plot
show(gridplot([[plot_lwr(10.), plot_lwr(1.)],[plot_lwr(0.1), plot_lwr(0.01)]]))

RapitLog Manual Español
100% (1)
RapitLog Manual Español
45 pages
Rubric Ee Music
No ratings yet
Rubric Ee Music
5 pages
AIML Final Programs
No ratings yet
AIML Final Programs
8 pages
Ailmml
No ratings yet
Ailmml
1 page
AIML Lab Programs
No ratings yet
AIML Lab Programs
13 pages
AI and ML LAB
No ratings yet
AI and ML LAB
9 pages
Lab PRGM
No ratings yet
Lab PRGM
16 pages
ML Lab Prog1-5 (5) College PDF
No ratings yet
ML Lab Prog1-5 (5) College PDF
12 pages
Lab
No ratings yet
Lab
25 pages
AIML Lab Manual
No ratings yet
AIML Lab Manual
9 pages
15CSL76 Students
No ratings yet
15CSL76 Students
18 pages
Wa0027.
No ratings yet
Wa0027.
34 pages
ML Lab Record
No ratings yet
ML Lab Record
33 pages
Machine Learning Through Python Lab Mannual
No ratings yet
Machine Learning Through Python Lab Mannual
33 pages
ML Lab File Batch 1
No ratings yet
ML Lab File Batch 1
20 pages
Code MLT
No ratings yet
Code MLT
9 pages
11
No ratings yet
11
10 pages
Program 1
No ratings yet
Program 1
25 pages
Machine Learning Laboratory Manual
No ratings yet
Machine Learning Laboratory Manual
11 pages
ML Lab Manual
No ratings yet
ML Lab Manual
90 pages
Py Lab Programs
No ratings yet
Py Lab Programs
14 pages
ML Lab Programs 1-10-Converted NAM COLLEGE PDF
No ratings yet
ML Lab Programs 1-10-Converted NAM COLLEGE PDF
33 pages
AIML Lab Program
No ratings yet
AIML Lab Program
11 pages
AIR Codes BECOA113
No ratings yet
AIR Codes BECOA113
20 pages
Ex 2
No ratings yet
Ex 2
6 pages
Aiml Lab
No ratings yet
Aiml Lab
10 pages
MLAll Practical
No ratings yet
MLAll Practical
27 pages
All in One
No ratings yet
All in One
9 pages
AIML
No ratings yet
AIML
12 pages
Lecture 04 - Install Python Dan Searching in Python
No ratings yet
Lecture 04 - Install Python Dan Searching in Python
10 pages
0 Aimlfinal
No ratings yet
0 Aimlfinal
24 pages
Quiz Pertemuan 3 Dan Pertemuan 4
No ratings yet
Quiz Pertemuan 3 Dan Pertemuan 4
10 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
Ai ML Programs
No ratings yet
Ai ML Programs
34 pages
ML Lab Manual
No ratings yet
ML Lab Manual
14 pages
Shashidhar-18csl76 Final
No ratings yet
Shashidhar-18csl76 Final
19 pages
ML1 3 Merged
No ratings yet
ML1 3 Merged
19 pages
ML 5
No ratings yet
ML 5
23 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
Ailml Removed
No ratings yet
Ailml Removed
45 pages
EXP-5 Rimendra RA2011033010064
No ratings yet
EXP-5 Rimendra RA2011033010064
7 pages
Lab Manual
No ratings yet
Lab Manual
25 pages
Machine Learning - Lab Manual
No ratings yet
Machine Learning - Lab Manual
35 pages
Ai Lab
No ratings yet
Ai Lab
14 pages
Array
No ratings yet
Array
12 pages
Test (Final)
No ratings yet
Test (Final)
15 pages
1 - All Python Codes + Neo4j Samples
No ratings yet
1 - All Python Codes + Neo4j Samples
16 pages
Aiml Lab
No ratings yet
Aiml Lab
19 pages
AI Lab Program SEM
No ratings yet
AI Lab Program SEM
29 pages
ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING - BhavyaSharan - 059
No ratings yet
ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING - BhavyaSharan - 059
47 pages
Assignment
No ratings yet
Assignment
11 pages
DAA Record
No ratings yet
DAA Record
15 pages
Design and Analysis of Algorithm Lab Manual - Answers
No ratings yet
Design and Analysis of Algorithm Lab Manual - Answers
13 pages
5fdacf78 Da18 40a2 9b4c df4
No ratings yet
5fdacf78 Da18 40a2 9b4c df4
12 pages
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
No ratings yet
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
28 pages
Machine Learning Lab (17CSL76)
No ratings yet
Machine Learning Lab (17CSL76)
48 pages
ML File
No ratings yet
ML File
13 pages
AIES LAB PROGRAM (1) (2) .Docx 20240926 204425 0000
No ratings yet
AIES LAB PROGRAM (1) (2) .Docx 20240926 204425 0000
66 pages
Ai Final Lab Mannual
No ratings yet
Ai Final Lab Mannual
20 pages
ID3 Program4
No ratings yet
ID3 Program4
3 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Practice Problems (Calculus)
100% (1)
Practice Problems (Calculus)
15 pages
SCR3443 Tutorial 4: Modern Cryptography
No ratings yet
SCR3443 Tutorial 4: Modern Cryptography
2 pages
1 Corinthians 11
No ratings yet
1 Corinthians 11
32 pages
PBD May 2022 Y6 BI
No ratings yet
PBD May 2022 Y6 BI
15 pages
Oral Proficiency Interview 2024 - Exam Format, Scoring Guide, and Test Modes Explained
No ratings yet
Oral Proficiency Interview 2024 - Exam Format, Scoring Guide, and Test Modes Explained
2 pages
Beginner's Somali Grammar
100% (2)
Beginner's Somali Grammar
169 pages
Soal Tes Masuk SMA Mata Pelajaran Bahasa Inggris
100% (1)
Soal Tes Masuk SMA Mata Pelajaran Bahasa Inggris
4 pages
Chapter-3 1
No ratings yet
Chapter-3 1
21 pages
Domains of Development Observation1
No ratings yet
Domains of Development Observation1
8 pages
(PDF Download) Modern Statistics With R Måns Thulin Fulll Chapter
100% (3)
(PDF Download) Modern Statistics With R Måns Thulin Fulll Chapter
64 pages
Thí nghiệm-IT1016-Tin học đại cương - 2022
No ratings yet
Thí nghiệm-IT1016-Tin học đại cương - 2022
17 pages
A Fundamental, Practical Theology of Children, Mothers', and Fathers in Modern Societies
No ratings yet
A Fundamental, Practical Theology of Children, Mothers', and Fathers in Modern Societies
436 pages
Baguio City National High School Ict-Technical Drafting 10 Module 5: Autocad Working Environment
No ratings yet
Baguio City National High School Ict-Technical Drafting 10 Module 5: Autocad Working Environment
5 pages
COE - GEC105 - Course Guide
No ratings yet
COE - GEC105 - Course Guide
9 pages
The Magic of The Pen: Select Miniatures From The Khamsa of Nizami Ganjavi
No ratings yet
The Magic of The Pen: Select Miniatures From The Khamsa of Nizami Ganjavi
276 pages
Kn25sa (It) Agility 125
No ratings yet
Kn25sa (It) Agility 125
91 pages
Types of Constraints in DBMS
No ratings yet
Types of Constraints in DBMS
15 pages
Introduction To Computer Objectives
No ratings yet
Introduction To Computer Objectives
24 pages
The Elements and Principles in Visual Arts
No ratings yet
The Elements and Principles in Visual Arts
5 pages
128 Programmer Is Suide (1985)
No ratings yet
128 Programmer Is Suide (1985)
468 pages
Hades and The Underworld
No ratings yet
Hades and The Underworld
5 pages
Inf and Gerund
No ratings yet
Inf and Gerund
8 pages
Stabilization of Nonlinear Time-Varying Systems: A Control Lyapunov Function Approach
No ratings yet
Stabilization of Nonlinear Time-Varying Systems: A Control Lyapunov Function Approach
14 pages
Prog Numerically
No ratings yet
Prog Numerically
57 pages
The Rust Reference
No ratings yet
The Rust Reference
9 pages
Disability in Fairy Tales 2
No ratings yet
Disability in Fairy Tales 2
19 pages
RoseIso LP1
No ratings yet
RoseIso LP1
8 pages
Module 3.0 PPT - Social Political Background of Jesus Birth
100% (1)
Module 3.0 PPT - Social Political Background of Jesus Birth
39 pages

ML Lab Manual

Uploaded by

ML Lab Manual

Uploaded by

EXPERIMENT-1

Write a Program to Implement Water-Jug problem using Python.

Write a Program to Implement AO* Algorithm using Python.

def least_cost_group(and_nodes, or_nodes, marked):

def change_heuristic(n, cost):

print("\n Find S: Finding a Maximally Specific Hypothesis\n")

print("\nFinal general hypothesis:\n",gh)

total_entropy=entropy([row[-1] for row in data])

for xtest in testdata:

Day Outlook Temperature Humidity Wind PlayTennis

Day Outlook Temperature Humidity Wind

Build an Artificial Neural Network by implementing the Backpropagation algorithm

#Derivative of Sigmoid Function

#weight and bias initialization

#draws a random range of numbers uniformly of dim x*y

#how much hidden layer weights contributed to error

# dotproduct of nextlayererror and currentlayerop

print("Input: \n" + str(X))

Example Sleep Study Expected % in Exams

Normalize the input:

Example Sleep Study Expected % in Exams

1 2/3 = 0.66666667 9/9 = 1 0.92

2 1/3 = 0.33333333 5/9 = 0.55555556 0.86

3 3/3 = 1 6/9 = 0.66666667 0.89

def splitdataset(dataset, splitratio):

def summarize(dataset): #creates a dictionary of classes

def calculateprobability(x, mean, stdev):

def calculateclassprobabilities(summaries, inputvector):

def predict(summaries, inputvector): #training and test data is passed

def getpredictions(summaries, testset):

def getaccuracy(testset, predictions):

trainingset, testset = splitdataset(dataset, splitratio)

1 6 148 72 35 0 33.6 0.627 50 1

3 8 183 64 0 0 23.3 0.672 32 1

5 0 137 40 35 168 43.1 2.288 33 1

6 5 116 74 0 0 25.6 0.201 30 0

8 10 115 0 0 0 35.3 0.134 29 0

9 2 197 70 45 543 30.5 0.158 53 1

print('The dimensions of the dataset',msg.shape)

#splitting the dataset into train and test data

print ('\n the total number of Training Data :',ytrain.shape)

#output of the words or Tokens in the text documents

# Training Naive Bayes (NB) classifier on training data.

#printing accuracy, Confusion matrix, Precision and Recall

print('\n Confusion matrix')

print('\n The value of Precision', metrics.precision_score(ytest,predicted))

print('\n The value of Recall', metrics.recall_score(ytest,predicted))

Text Documents Label

print('Sample instances from the dataset are given below')

print('\n Attributes and datatypes')

print('\n Inferencing with Bayesian Network:')

print('\n 1. Probability of HeartDisease given evidence= restecg')

print('\n 2. Probability of HeartDisease given evidence= cp ')

# Plot the Original Classifications

from sklearn import preprocessing

from sklearn.mixture import GaussianMixture

from sklearn.model_selection import train_test_split

Implement the non-parametric Locally Weighted Regression algorithm in order to fit

You might also like