0% found this document useful (0 votes)
40 views21 pages

ML Lab Programs

This summarizes a series of machine learning programs from different labs: 1. The first lab program uses a Find-S algorithm to generate the most specific hypothesis from a CSV file of data. 2. The second lab program uses a Candidate Elimination algorithm to learn hypotheses from a CSV file. 3. The third lab program implements the ID3 decision tree algorithm to classify data from a CSV file. 4. The fourth lab program demonstrates backpropagation for neural network training on sample data. 5. The fifth lab program uses the Gaussian Naive Bayes classifier from scikit-learn to classify Iris data.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
40 views21 pages

ML Lab Programs

This summarizes a series of machine learning programs from different labs: 1. The first lab program uses a Find-S algorithm to generate the most specific hypothesis from a CSV file of data. 2. The second lab program uses a Candidate Elimination algorithm to learn hypotheses from a CSV file. 3. The third lab program implements the ID3 decision tree algorithm to classify data from a CSV file. 4. The fourth lab program demonstrates backpropagation for neural network training on sample data. 5. The fifth lab program uses the Gaussian Naive Bayes classifier from scikit-learn to classify Iris data.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

ML LAB PROGRAMS

LAB1 [FIND-S ALGORITHM] :

PROGRAM:

import csv
with open('[Link]','r') as f:
reader=[Link](f)
your_list=list(reader)
h=[['0','0','0','0','0','0']]
for i in your_list:
print(i)
if i[-1]=="TRUE":
j=0
for x in i:
if x!="TRUE":
if x!=h[0][j] and h[0][j]=='0':
h[0][j]=x
elif x!=h[0][j] and h[0][j]!='0':
h[0][j]='?'
else:
pass
j=j+1
print("Most Specific Hyothesis")
print(h)
INPUT[EXCEL]:[Link]
sunny,warm,normal,strong,warm,same,TRUE
sunny,warm,high,strong,warm,same,TRUE
rainy,cold,high,strong,warm,change,FALSE
sunny,warm,high,strong,cold,change,TRUE
OUTPUT:
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'TRUE']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'TRUE']
['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'FALSE']
['sunny', 'warm', 'high', 'strong', 'cold', 'change', 'TRUE']
Most Specific Hypothesis
[['sunny', 'warm', '?', 'strong', '?', '?']]

LAB2 [CANDIDATE ELIMINATION ALGORITHM]:


PROGRAM:
dataarr=[]
with open('[Link]') as f:
for line in f:
[Link]([Link]().split(','))
rows=len(dataarr)
cols=len(dataarr[0])
shypo=['0']*(cols-1)
ghypo=[['?']*(cols-1)]
print "initial specific hypothesis is: ",shypo
print "initial general hypothesis is: ",ghypo

for x in range(1,rows):
lst=dataarr[x]

if lst[cols-1]=="1":
for i in range(0,cols-1):
if shypo[i]==lst[i]:
continue
shypo[i]='?' if shypo[i]!='0' else lst[i]
for g in ghypo:
if g[i]!='?' and shypo[i]=='?':
[Link](g)

elif lst[cols-1]=="0":
for i in range(0,cols-1):
if lst[i]!=shypo[i] and shypo[i]!='?':
temp_list=['?']*i+[shypo[i]]+(['?']*(cols-2-i))
if temp_list not in ghypo:
[Link](temp_list)
print "S",[x],"=",shypo
print "G",[x],"=",ghypo
print "S[4]=",shypo
print "G[4]=",ghypo
print "\nFinal hypothesis is: ",shypo
INPUT[EXCEL]:[Link]
sunny,warm,normal,strong,warm,same,1
sunny,warm,normal,strong,warm,same,1
rainy,cold,high,strong,warm,change,0
sunny,warm,high,strong,cool,change,1
OUTPUT:
initial specific hypothesis is: ['0', '0', '0', '0', '0', '0']
initial general hypothesis is: [['?', '?', '?', '?', '?', '?']]
S [1] = [‘sunny’,’warm’,’normal’,’strong’,’warm’,’same’]
G [1] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [2] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [2] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [3] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [3] = [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
S[4]= ['sunny', 'warm', '?', 'strong', '?', '?']
G[4]= [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]

Final hypothesis is: ['sunny', 'warm', '?', 'strong', '?', '?']


LAB3 [ID3 ALGORITHM]:
PROGRAM:
[Link]
import numpy as np
import math
from data_loader import read_data
class Node:
def __init__(self,attribute):
[Link]=attribute
[Link]=[]
[Link]=""
def subtables(data,col,delete):
dict={}
items=[Link](data[:,col])
count=[Link](([Link][0],1),dtype=np.int32)
for x in range([Link][0]):
for y in range([Link][0]):
if data[y,col]==items[x]:
count[x]+=1
for x in range([Link][0]):
dict[items[x]]=[Link]((int(count[x]),[Link][1]),dtype="S32")
pos=0
for y in range([Link][0]):
if data[y,col]==items[x]:
dict[items[x]][pos]=data[y]
pos+=1
if delete:
dict[items[x]]=[Link](dict[items[x]],col,1)
return items,dict
def entropy(S):
items=[Link](S)
if [Link]==1:
return 0
counts=[Link](([Link][0],1))
sums=0
for x in range([Link][0]):
counts[x]=sum(S==items[x])/([Link]*1.0)
for count in counts:
sums+=-1*count*[Link](count,2)
return sums
def gain_ratio(data,col):
items,dict=subtables(data,col,delete=False)
total_size=[Link][0]
entropies=[Link](([Link][0],1))
intrinsic=[Link](([Link][0],1))
for x in range([Link][0]):
ratio=dict[items[x]].shape[0]/(total_size*1.0)
entropies[x]=ratio*entropy(dict[items[x]][:,-1])
intrinsic[x]=ratio*[Link](ratio,2)
total_entropy=entropy(data[:,-1])
iv=-1*sum(intrinsic)
for x in range([Link][0]):
total_entropy-=entropies[x]
return total_entropy/iv
def create_node(data,metadata):
if([Link](data[:,-1])).shape[0]==1:
node=Node("")
[Link]=[Link](data[:,-1])[0]
return node
gains=[Link](([Link][1]-1,1))
for col in range([Link][1]-1):
gains[col]=gain_ratio(data,col)
split=[Link](gains)
node=Node(metadata[split])
metadata=[Link](metadata,split,0)
items,dict=subtables(data,split,delete=True)
for x in range([Link][0]):
child=create_node(dict[items[x]],metadata)
[Link]((items[x],child))
return node
def empty(size):
s=""
for x in range(size):
s+=" "
return s
def print_tree(node,level):
if [Link]!="":
print(empty(level),[Link])
return
print(empty(level),[Link])
for value,n in [Link]:
print(empty(level+1),value)
print_tree(n,level+2)
metadata,traindata=read_data("[Link]")
data=[Link](traindata)
node=create_node(data,metadata)
print_tree(node,0)
data_loader.py
import csv
def read_data(filename):
with open(filename,'r') as csvfile:
datareader=[Link](csvfile,delimiter=',')
headers=next(datareader)
metadata=[]
traindata=[]
for name in headers:
[Link](name)
for row in datareader:
[Link](row)
return(metadata,traindata)
INPUT[EXCEL]:[Link]
OUTLOOK,AIR_TEMP,HUMIDITY,WIND,ENJOYSPORT
SUNNY,HOT,HIGH,WEAK,NO
SUNNY,HOT,HIGH,STRONG,NO
OVERCAST,HOT,HIGH,WEAK,YES
RAIN,MILD,HIGH,WEAK,YES
RAIN,COOL,NORMAL,WEAK,YES
RAIN,COOL,NORMAL,STRONG,NO
OVERCAST,COOL,NORMAL,STRONG,YES
SUNNY,MILD,HIGH,WEAK,NO
SUNNY,COOL,NORMAL,WEAK,YES
RAIN,MILD,NORMAL,WEAK,YES
SUNNY,MILD,NORMAL,STRONG,YES
OVERCAST,MILD,HIGH,STRONG,YES
OVERCAST,HOT,NORMAL,WEAK,YES
RAIN,MILD,HIGH,STRONG,NO
OUTPUT:
('', 'OUTLOOK')
(' ', 'OVERCAST')
(' ', 'YES')
(' ', 'RAIN')
(' ', 'WIND')
(' ', 'STRONG')
(' ', 'NO')
(' ', 'WEAK')
(' ', 'YES')
(' ', 'SUNNY')
(' ', 'HUMIDITY')
(' ', 'HIGH')
(' ', 'NO')
(' ', 'NORMAL')
(' ', 'YES')
LAB4 [BACK PROPAGATION ALGORITHM]:
PROGRAM:
import numpy as np
x=[Link](([2,9],[1,5],[3,6]),dtype=float)
y=[Link](([92],[86],[89]),dtype=float)
x=x/[Link](x,axis=0)
y=y/100
def sigmoid(x):
return 1/(1+[Link](-x))
def derivatives_sigmoid(x):
return x*(1-x)
epoch=7000
lr=0.1
inputlayer_neurons=2
hiddenlayer_neurons=3
output_neurons=1
wh=[Link](size=(inputlayer_neurons,hiddenlayer_neurons))
bh=[Link](size=(1,hiddenlayer_neurons))
wout=[Link](size=(hiddenlayer_neurons,output_neurons))
bout=[Link](size=(1,output_neurons))
for i in range(epoch):
hinp1=[Link](x,wh)
hinp=hinp1+bh
hlayer_act=sigmoid(hinp)
outinp1=[Link](hlayer_act,wout)
outinp=outinp1+bout
output=sigmoid(outinp)
print("Input:\n"+str(x))
print("Actual output:\n"+str(y))
print("Predicted output:\n",output)
OUTPUT:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual output:
[[0.92]
[0.86]
[0.89]]
('Predicted output:\n', array([[0.87550341],
[0.86351179],
[0.87336642]]))
LAB 5 [NAÏVE BAYESIAN CLASSIFIER]:
PROGRAM:
from [Link] import load_iris
iris=load_iris()
x=[Link]
y=[Link]
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.4,random_state=2)
print("training data",xtrain)
print("training data",ytrain)
print("testing data",xtest)
print("testing data",ytest)
gnb=GaussianNB()
[Link](xtrain,ytrain)
y_pred=[Link](xtest)
from sklearn import metrics
print("accuracy is",metrics.accuracy_score(ytest,y_pred)*100)

OUTPUT:
('accuracy is', 93.33333333333333)

LAB 6 [BAYESIAN TEXT CLASSIFIER MODEL]:


PROGRAM:
import pandas as pd
msg=pd.read_csv('[Link]',names=['message','label'])
print"dimensions of dataset",[Link]
msg['labelnum']=[Link]({'pos':1,'neg':0})
x=[Link]
y=[Link]
print(x)
print(y)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y)
print([Link])
print([Link])
print([Link])
print([Link])
from sklearn.feature_extraction.text import CountVectorizer
count_vect=CountVectorizer()
xtrain_dtm=count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=[Link](xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)
print(xtrain_dtm)
from sklearn.naive_bayes import MultinomialNB
df=MultinomialNB().fit(xtrain_dtm,ytrain)
predicted=[Link](xtest_dtm)
from sklearn import metrics
print'accuracy metrics'
print'accuracy of the classifier is',metrics.accuracy_score(ytest,predicted)
print'confusion matrix'
print metrics.confusion_matrix(ytest,predicted)
print'recall and precision'
print metrics.precision_score(ytest,predicted)
INPUT[EXCEL]:[Link]
I love this sandwich,pos
This is an amazing place,pos
I feel very good about these beers,pos
This is my best work,pos
what an awesome view,pos
I do not like this restaurant,neg
I am tired of this stuff,neg
I can't deal with this,neg
He is my sworn enemy,neg
My boss is horrible,neg
This is an awesome place,pos
I do not like the taste of this juice,neg
I love to dance,pos
I am sick and tired of this place,neg
What a great holiday,pos
That is a bad locality to stay,neg
We will have good fun tomorrow,pos
I went to my enemy's house today,neg
OUTPUT:
[13 rows x 45 columns]
(0, 25) 1
(0, 1) 1
(0, 2) 1
(0, 17) 1
(0, 34) 1
(1, 4) 1
(1, 25) 1
(1, 2) 1
(1, 17) 1
.
.
.
(11, 34) 1
(12, 26) 1
(12, 19) 1
(12, 23) 1
(12, 8) 1
(12, 34) 1
accuracy metrics
accuracy of the classifier is 0.6
confusion matrix
[[2 0]
[2 1]]
recall and precision
1.0

LAB 7 [BAYESIAN NETWORK]:


PROGRAM:
from pomegranate import *
Asia=DiscreteDistribution({'True':0.5,'False':0.5})
Tuberculosis=ConditionalProbabilityTable(
[['True','True',0.2],
['True','False',0.8],
['False','True',0.1],
['False','False',0.98]],[Asia])

smoking=DiscreteDistribution({'True':0.5,'False':0.5})
Lung=ConditionalProbabilityTablse(
[['True','True',0.75],
['True','False',0.25],
['False','True',0.02],
['False','False',0.98]],[smoking])

Bronchitis=ConditionalProbabilityTable(
[['True','True',0.92],
['True','False',0.08],
['False','True',0.03],
['False','False',0.98]],[smoking])

Tuberculosis_or_cancer=ConditionalProbabilityTable(
[['True','True','True',1.0],
['True','True','False',0.0]
['True','False','True',1.0],
['True','False','False',0.0],
['False','True','True',1.0],
['False','True','False',0.0],
['False','False','True',1.0]
['False','False','False',0.0]],[Tuberculosis,Lung])

Xray=ConditionalProbabilityTable(
[['True','True',0.885],
['True','False',0.115],
['False','True',0.04],
['False','False',0.96]],[Tuberculosis_or_cancer])

dyspnea=ConditionalProbabilityTable(
[['True','True','True',0.96],
['True','True','False',0.04]
['True','False','True',0.89],
['True','False','False',0.11],
['False','True','True',0.96],
['False','True','False',0.04],
['False','False','True',0.89]
['False','False','False',0.11]],[Tuberculosis_or_cancer,Bronchitis])

s0=State(Asia,name="Asia")
s1=State(Tuberculosis,name="Tuberculosis")
s2=State(smoking,name="smoker")
network=BayesianNetwork("asia")
network.add_nodes(s0,s1,s2)
network.add_edge(s0,s1)
network.add_edges(s1,s2)
[Link]()
Print(network.predict_probal({'tuberculosis':'True'}))
OUTPUT :

LAB 8 [EM-KMEANS]:
PROGRAM:
import [Link] as plt
from sklearn import datasets
from [Link] import KMeans
import pandas as pd
import numpy as np
import [Link] as sm
iris=datasets.load_iris()
X=[Link]([Link])
[Link]=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=[Link]([Link])
[Link]=['Targets']
[Link](figsize=(14,7))
model=KMeans(n_clusters=3)
[Link](X)
model.labels_
[Link](figsize=(14,7))
colormap=[Link](['red','lime','black'])
[Link](1,2,1)
[Link](X.Petal_Length,X.Petal_Width,c=colormap[[Link]],s=40)
[Link]('EM clustering')
[Link](1,2,2)
[Link](X.Petal_Length,X
.Petal_Width,c=colormap[model.labels_],s=40)
[Link]('K-Means clustering')
acc=sm.accuracy_score(y,model.labels_)
print(acc*100)
OUTPUT:

LAB 9 [K-NEAREST NEIGHBOR ALGORITHM]:


PROGRAM:
from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier
from [Link] import classification_report,confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=[Link]
iris_labels=[Link]
print(iris_data)
print(iris_labels)
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.30)
classifier=KNeighborsClassifier(n_neighbors=5)
[Link](x_train,y_train)
y_pred=[Link](x_test)
print('confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy metrices')
print(classification_report(y_test,y_pred))
OUTPUT:
confusion matrix is as follows
[[17 0 0]
[ 0 14 1]
[ 0 1 12]]
Accuracy metrices
precision recall f1-score support

0 1.00 1.00 1.00 17


1 0.93 0.93 0.93 15
2 0.92 0.92 0.92 13

avg / total 0.96 0.96 0.96 45

LAB 10 [LOCALLY WEIGHTED REGRESSION ALGORITHM]:


PROGRAM:
import [Link] as plt
import pandas as pd
#import [Link] as np
import numpy as np1
#from [Link] import pearsonr

def kernel(point,xmat,k):
m,n=[Link](xmat)
weights=[Link]([Link]((m)))
for j in range(m):
diff=point-x[j]
weights[j,j]=[Link](diff*diff.T/(-2.0*k**2))
return weights
def localweight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
w=(x.T*(wei*x)).I*(x.T*(wei*ymat.T))
return w
def localweightregression(xmat,ymat,k):
m,n=[Link](xmat)
ypred=[Link](m)
for i in range(m):
ypred[i]=xmat[i]*localweight(xmat[i],xmat,ymat,k)
return ypred
data=pd.read_csv('[Link]')
bill=[Link](data.total_bill)
tip=[Link]([Link])
mbill=[Link](bill)
mtip=[Link](tip)
m=[Link](mbill)[1]
one=[Link]([Link](m))
x=[Link]((one.T,mbill.T))
ypred=localweightregression(x,mtip,2)
sortindex=x[:,1].argsort(0)
xsort=x[sortindex][:,0]
fig=[Link]()
ax=fig.add_subplot(1,1,1)
[Link](bill,tip,color='green')
[Link](xsort[:,1],ypred[sortindex],color='red',linewidth=3)
[Link]('total bill')
[Link]('tip')
OUTPUT:

You might also like