ML Lab Manual
ML Lab Manual
PROGRAM:
x=0
y=0
m=4
n=3
print("Initial state = (0,0)")
print("Capacities = (4,3)")
print("Goal state = (2,y)")
while x != 2:
r = int(input("Enter rule"))
if(r == 1):
x=m
elif(r == 2):
y=n
elif(r == 3):
x=0
elif(r == 4):
y=0
elif(r == 5):
t=n-y
y=n
x -= t
elif(r == 6):
t=m-x
x=m
y -= t
elif(r == 7):
y += x
x=0
elif(r == 8):
x += y
y=0
print (x, y)
EXPERIMENT-2
PROGRAM:
def recAOStar(n):
global finalPath
print("Expanding Node:",n)
and_nodes = []
or_nodes =[]
if(n in allNodes):
if 'AND' in allNodes[n]:
and_nodes = allNodes[n]['AND']
if 'OR' in allNodes[n]:
or_nodes = allNodes[n]['OR']
if len(and_nodes)==0 and len(or_nodes)==0:
return
solvable = False
marked ={}
while not solvable:
if len(marked)==len(and_nodes)+len(or_nodes):
min_cost_least,min_cost_group_least = least_cost_group(and_nodes,or_nodes,{})
solvable = True
change_heuristic(n,min_cost_least)
optimal_child_group[n] = min_cost_group_least
continue
min_cost,min_cost_group = least_cost_group(and_nodes,or_nodes,marked)
is_expanded = False
if len(min_cost_group)>1:
if(min_cost_group[0] in allNodes):
is_expanded = True
recAOStar(min_cost_group[0])
if(min_cost_group[1] in allNodes):
is_expanded = True
recAOStar(min_cost_group[1])
else:
if(min_cost_group in allNodes):
is_expanded = True
recAOStar(min_cost_group)
if is_expanded:
min_cost_verify, min_cost_group_verify = least_cost_group(and_nodes, or_nodes, {})
if min_cost_group == min_cost_group_verify:
solvable = True
change_heuristic(n, min_cost_verify)
optimal_child_group[n] = min_cost_group
else:
solvable = True
change_heuristic(n, min_cost)
optimal_child_group[n] = min_cost_group
marked[min_cost_group]=1
return heuristic(n)
def heuristic(n):
return H_dist[n]
def print_path(node):
print(optimal_child_group[node], end="")
node = optimal_child_group[node]
if len(node) > 1:
if node[0] in optimal_child_group:
print("->", end="")
print_path(node[0])
if node[1] in optimal_child_group:
print("->", end="")
print_path(node[1])
else:
if node in optimal_child_group:
print("->", end="")
print_path(node)
H_dist = {
'A': -1,
'B': 4,
'C': 2,
'D': 3,
'E': 6,
'F': 8,
'G': 2,
'H': 0,
'I': 0,
'J': 0
}
allNodes = {
'A': {'AND': [('C', 'D')], 'OR': ['B']},
'B': {'OR': ['E', 'F']},
'C': {'OR': ['G'], 'AND': [('H', 'I')]},
'D': {'OR': ['J']}
}
optimal_child_group = {}
optimal_cost = recAOStar('A')
print('Nodes which gives optimal cost are')
print_path('A')
print('\nOptimal Cost is :: ', optimal_cost)
EXPERIMENT-3
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.
PROGRAM:
import csv
num_attributes = 6
a = []
print("\n The Given Training Data Set \n")
with open('enjoysport.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
a.append (row)
print(row)
print("\n The initial value of hypothesis: ")
hypothesis = ['0'] * num_attributes
print(hypothesis)
for j in range(0,num_attributes):
hypothesis[j] = a[0][j];
Data Set:
EXPERIMENT-4
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set of
all hypotheses consistent with the training examples.
PROGRAM:
import csv
with open("trainingdata.csv") as f:
csv_file=csv.reader(f)
data=list(csv_file)
s=data[1][:-1]
g=[['?' for i in range(len(s))] for j in range(len(s))]
for i in data:
if i[-1]=="Yes":
for j in range(len(s)):
if i[j]!=s[j]:
s[j]='?'
g[j][j]='?'
elif i[-1]=="No":
for j in range(len(s)):
if i[j]!=s[j]:
g[j][j]=s[j]
else:
g[j][j]="?"
print("\nSteps of Candidate Elimination Algorithm",data.index(i)+1)
print(s)
print(g)
gh=[]
for i in g:
for j in i:
if j!='?':
gh.append(i)
break
print("\nFinal specific hypothesis:\n",s)
Write a program to demonstrate the working of the decision tree based ID3 algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.
PROGRAM:
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers
class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node
n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child))
return node
def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return
print(" "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")
Training Dataset:
Test Dataset:
PROGRAM:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
Training Examples:
1 2 9 92
2 1 5 86
3 3 6 89
EXPERIMENT-7
Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test
data sets.
PROGRAM:
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean and
std
return summaries
def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);
main()
Data Set:
Diabetic
Exampl Pregnan BloodPre SkinThick Pedigree Outcom
es cies Glucose ssure ness Insulin BMI Function Age e
2 1 85 66 29 0 26.6 0.351 31 0
4 1 89 66 23 94 28.1 0.167 21 0
7 3 78 50 32 88 31 0.248 26 1
10 8 125 96 0 0 0 0.232 54 1
EXPERIMENT-8
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Built-in Java classes/API can be used to write the
program. Calculate the accuracy, precision, and recall for your data set.
PROGRAM:
import pandas as pd
msg=pd.read_csv('naivetext.csv',names=['message','label'])
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
EXPERIMENT-9
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set. You can use Java/Python ML library classes/API
PROGRAM:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?',np.nan)
model=BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),
('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
Data Set:
ag sex c trestbp chol fbs restec thalac exan oldpea slop ca thal Heartdiseas
e p s g h g k e e
6 1 1 145 233 1 2 150 0 2.3 3 0 6 0
3
6 1 4 160 286 0 2 108 1 1.5 2 3 3 2
7
6 1 4 120 229 0 2 129 1 2.6 2 2 7 1
7
4 0 2 130 204 0 2 172 0 1.4 1 0 3 0
1
6 0 4 140 268 0 2 160 0 3.6 3 2 3 3
2
6 1 4 130 206 0 2 132 1 2.4 2 2 7 4
0
EXPERIMENT-10
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set
for clustering using k-Means algorithm. Compare the results of these two algorithms
and comment on the quality of clustering. You can add Java/Python ML library
classes/API in the program.
PROGRAM:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
iris = datasets.load_iris()
dataset = pd.read_csv("iris.csv")
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X)
plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])
y_gmm = gmm.predict(xs)
#y_cluster_gmm
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.show()
print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))
EXPERIMENT-11
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions. Java/Python ML library classes can be
used for this problem.
PROGRAM:
Data Set:
Iris Plants Dataset: Dataset contains 150 instances (50 in each of three classes) Number
of Attributes: 4 numeric, predictive attributes and the Class
EXPERIMENT-12
PROGRAM:
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.io import push_notebook
def local_regression(x0, X, Y, tau):# add bias term
x0 = np.r_[1, x0] # Add one to avoid the loss in information
X = np.c_[np.ones(len(X)), X]
# fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = np.linalg.pinv(xw @ X) @ xw @ Y
#@ Matrix Multiplication or Dot Product
# predict value
return x0 @ beta # @ Matrix Multiplication or Dot Product for prediction
def radial_kernel(x0, X, tau):
return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Weight or Radial Kernal Bias Function
n = 1000
# generate dataset
X = np.linspace(-3, 3, num=n)
print("The Data Set ( 10 Samples) X :\n",X[1:10])
Y = np.log(np.abs(X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y:\n",Y[1:10])
# jitter X
X += np.random.normal(scale=.1, size=n)
print("Normalised (10 Samples) X :\n",X[1:10])
domain = np.linspace(-3, 3, num=300)
print(" Xo Domain Space(10 Samples) :\n",domain[1:10])
def plot_lwr(tau):
# prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(plot_width=400, plot_height=400)
plot.title.text='tau=%g' % tau
plot.scatter(X, Y, alpha=.3)
plot.line(domain, prediction, line_width=2, color='red')
return plot
show(gridplot([[plot_lwr(10.), plot_lwr(1.)],[plot_lwr(0.1), plot_lwr(0.01)]]))