ML LAB PROGRAMS
LAB1 [FIND-S ALGORITHM] :
PROGRAM:
import csv
with open('[Link]','r') as f:
reader=[Link](f)
your_list=list(reader)
h=[['0','0','0','0','0','0']]
for i in your_list:
print(i)
if i[-1]=="TRUE":
j=0
for x in i:
if x!="TRUE":
if x!=h[0][j] and h[0][j]=='0':
h[0][j]=x
elif x!=h[0][j] and h[0][j]!='0':
h[0][j]='?'
else:
pass
j=j+1
print("Most Specific Hyothesis")
print(h)
INPUT[EXCEL]:[Link]
sunny,warm,normal,strong,warm,same,TRUE
sunny,warm,high,strong,warm,same,TRUE
rainy,cold,high,strong,warm,change,FALSE
sunny,warm,high,strong,cold,change,TRUE
OUTPUT:
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'TRUE']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'TRUE']
['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'FALSE']
['sunny', 'warm', 'high', 'strong', 'cold', 'change', 'TRUE']
Most Specific Hypothesis
[['sunny', 'warm', '?', 'strong', '?', '?']]
LAB2 [CANDIDATE ELIMINATION ALGORITHM]:
PROGRAM:
dataarr=[]
with open('[Link]') as f:
for line in f:
[Link]([Link]().split(','))
rows=len(dataarr)
cols=len(dataarr[0])
shypo=['0']*(cols-1)
ghypo=[['?']*(cols-1)]
print "initial specific hypothesis is: ",shypo
print "initial general hypothesis is: ",ghypo
for x in range(1,rows):
lst=dataarr[x]
if lst[cols-1]=="1":
for i in range(0,cols-1):
if shypo[i]==lst[i]:
continue
shypo[i]='?' if shypo[i]!='0' else lst[i]
for g in ghypo:
if g[i]!='?' and shypo[i]=='?':
[Link](g)
elif lst[cols-1]=="0":
for i in range(0,cols-1):
if lst[i]!=shypo[i] and shypo[i]!='?':
temp_list=['?']*i+[shypo[i]]+(['?']*(cols-2-i))
if temp_list not in ghypo:
[Link](temp_list)
print "S",[x],"=",shypo
print "G",[x],"=",ghypo
print "S[4]=",shypo
print "G[4]=",ghypo
print "\nFinal hypothesis is: ",shypo
INPUT[EXCEL]:[Link]
sunny,warm,normal,strong,warm,same,1
sunny,warm,normal,strong,warm,same,1
rainy,cold,high,strong,warm,change,0
sunny,warm,high,strong,cool,change,1
OUTPUT:
initial specific hypothesis is: ['0', '0', '0', '0', '0', '0']
initial general hypothesis is: [['?', '?', '?', '?', '?', '?']]
S [1] = [‘sunny’,’warm’,’normal’,’strong’,’warm’,’same’]
G [1] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [2] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [2] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [3] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [3] = [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
S[4]= ['sunny', 'warm', '?', 'strong', '?', '?']
G[4]= [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
Final hypothesis is: ['sunny', 'warm', '?', 'strong', '?', '?']
LAB3 [ID3 ALGORITHM]:
PROGRAM:
[Link]
import numpy as np
import math
from data_loader import read_data
class Node:
def __init__(self,attribute):
[Link]=attribute
[Link]=[]
[Link]=""
def subtables(data,col,delete):
dict={}
items=[Link](data[:,col])
count=[Link](([Link][0],1),dtype=np.int32)
for x in range([Link][0]):
for y in range([Link][0]):
if data[y,col]==items[x]:
count[x]+=1
for x in range([Link][0]):
dict[items[x]]=[Link]((int(count[x]),[Link][1]),dtype="S32")
pos=0
for y in range([Link][0]):
if data[y,col]==items[x]:
dict[items[x]][pos]=data[y]
pos+=1
if delete:
dict[items[x]]=[Link](dict[items[x]],col,1)
return items,dict
def entropy(S):
items=[Link](S)
if [Link]==1:
return 0
counts=[Link](([Link][0],1))
sums=0
for x in range([Link][0]):
counts[x]=sum(S==items[x])/([Link]*1.0)
for count in counts:
sums+=-1*count*[Link](count,2)
return sums
def gain_ratio(data,col):
items,dict=subtables(data,col,delete=False)
total_size=[Link][0]
entropies=[Link](([Link][0],1))
intrinsic=[Link](([Link][0],1))
for x in range([Link][0]):
ratio=dict[items[x]].shape[0]/(total_size*1.0)
entropies[x]=ratio*entropy(dict[items[x]][:,-1])
intrinsic[x]=ratio*[Link](ratio,2)
total_entropy=entropy(data[:,-1])
iv=-1*sum(intrinsic)
for x in range([Link][0]):
total_entropy-=entropies[x]
return total_entropy/iv
def create_node(data,metadata):
if([Link](data[:,-1])).shape[0]==1:
node=Node("")
[Link]=[Link](data[:,-1])[0]
return node
gains=[Link](([Link][1]-1,1))
for col in range([Link][1]-1):
gains[col]=gain_ratio(data,col)
split=[Link](gains)
node=Node(metadata[split])
metadata=[Link](metadata,split,0)
items,dict=subtables(data,split,delete=True)
for x in range([Link][0]):
child=create_node(dict[items[x]],metadata)
[Link]((items[x],child))
return node
def empty(size):
s=""
for x in range(size):
s+=" "
return s
def print_tree(node,level):
if [Link]!="":
print(empty(level),[Link])
return
print(empty(level),[Link])
for value,n in [Link]:
print(empty(level+1),value)
print_tree(n,level+2)
metadata,traindata=read_data("[Link]")
data=[Link](traindata)
node=create_node(data,metadata)
print_tree(node,0)
data_loader.py
import csv
def read_data(filename):
with open(filename,'r') as csvfile:
datareader=[Link](csvfile,delimiter=',')
headers=next(datareader)
metadata=[]
traindata=[]
for name in headers:
[Link](name)
for row in datareader:
[Link](row)
return(metadata,traindata)
INPUT[EXCEL]:[Link]
OUTLOOK,AIR_TEMP,HUMIDITY,WIND,ENJOYSPORT
SUNNY,HOT,HIGH,WEAK,NO
SUNNY,HOT,HIGH,STRONG,NO
OVERCAST,HOT,HIGH,WEAK,YES
RAIN,MILD,HIGH,WEAK,YES
RAIN,COOL,NORMAL,WEAK,YES
RAIN,COOL,NORMAL,STRONG,NO
OVERCAST,COOL,NORMAL,STRONG,YES
SUNNY,MILD,HIGH,WEAK,NO
SUNNY,COOL,NORMAL,WEAK,YES
RAIN,MILD,NORMAL,WEAK,YES
SUNNY,MILD,NORMAL,STRONG,YES
OVERCAST,MILD,HIGH,STRONG,YES
OVERCAST,HOT,NORMAL,WEAK,YES
RAIN,MILD,HIGH,STRONG,NO
OUTPUT:
('', 'OUTLOOK')
(' ', 'OVERCAST')
(' ', 'YES')
(' ', 'RAIN')
(' ', 'WIND')
(' ', 'STRONG')
(' ', 'NO')
(' ', 'WEAK')
(' ', 'YES')
(' ', 'SUNNY')
(' ', 'HUMIDITY')
(' ', 'HIGH')
(' ', 'NO')
(' ', 'NORMAL')
(' ', 'YES')
LAB4 [BACK PROPAGATION ALGORITHM]:
PROGRAM:
import numpy as np
x=[Link](([2,9],[1,5],[3,6]),dtype=float)
y=[Link](([92],[86],[89]),dtype=float)
x=x/[Link](x,axis=0)
y=y/100
def sigmoid(x):
return 1/(1+[Link](-x))
def derivatives_sigmoid(x):
return x*(1-x)
epoch=7000
lr=0.1
inputlayer_neurons=2
hiddenlayer_neurons=3
output_neurons=1
wh=[Link](size=(inputlayer_neurons,hiddenlayer_neurons))
bh=[Link](size=(1,hiddenlayer_neurons))
wout=[Link](size=(hiddenlayer_neurons,output_neurons))
bout=[Link](size=(1,output_neurons))
for i in range(epoch):
hinp1=[Link](x,wh)
hinp=hinp1+bh
hlayer_act=sigmoid(hinp)
outinp1=[Link](hlayer_act,wout)
outinp=outinp1+bout
output=sigmoid(outinp)
print("Input:\n"+str(x))
print("Actual output:\n"+str(y))
print("Predicted output:\n",output)
OUTPUT:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual output:
[[0.92]
[0.86]
[0.89]]
('Predicted output:\n', array([[0.87550341],
[0.86351179],
[0.87336642]]))
LAB 5 [NAÏVE BAYESIAN CLASSIFIER]:
PROGRAM:
from [Link] import load_iris
iris=load_iris()
x=[Link]
y=[Link]
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.4,random_state=2)
print("training data",xtrain)
print("training data",ytrain)
print("testing data",xtest)
print("testing data",ytest)
gnb=GaussianNB()
[Link](xtrain,ytrain)
y_pred=[Link](xtest)
from sklearn import metrics
print("accuracy is",metrics.accuracy_score(ytest,y_pred)*100)
OUTPUT:
('accuracy is', 93.33333333333333)
LAB 6 [BAYESIAN TEXT CLASSIFIER MODEL]:
PROGRAM:
import pandas as pd
msg=pd.read_csv('[Link]',names=['message','label'])
print"dimensions of dataset",[Link]
msg['labelnum']=[Link]({'pos':1,'neg':0})
x=[Link]
y=[Link]
print(x)
print(y)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y)
print([Link])
print([Link])
print([Link])
print([Link])
from sklearn.feature_extraction.text import CountVectorizer
count_vect=CountVectorizer()
xtrain_dtm=count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=[Link](xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)
print(xtrain_dtm)
from sklearn.naive_bayes import MultinomialNB
df=MultinomialNB().fit(xtrain_dtm,ytrain)
predicted=[Link](xtest_dtm)
from sklearn import metrics
print'accuracy metrics'
print'accuracy of the classifier is',metrics.accuracy_score(ytest,predicted)
print'confusion matrix'
print metrics.confusion_matrix(ytest,predicted)
print'recall and precision'
print metrics.precision_score(ytest,predicted)
INPUT[EXCEL]:[Link]
I love this sandwich,pos
This is an amazing place,pos
I feel very good about these beers,pos
This is my best work,pos
what an awesome view,pos
I do not like this restaurant,neg
I am tired of this stuff,neg
I can't deal with this,neg
He is my sworn enemy,neg
My boss is horrible,neg
This is an awesome place,pos
I do not like the taste of this juice,neg
I love to dance,pos
I am sick and tired of this place,neg
What a great holiday,pos
That is a bad locality to stay,neg
We will have good fun tomorrow,pos
I went to my enemy's house today,neg
OUTPUT:
[13 rows x 45 columns]
(0, 25) 1
(0, 1) 1
(0, 2) 1
(0, 17) 1
(0, 34) 1
(1, 4) 1
(1, 25) 1
(1, 2) 1
(1, 17) 1
.
.
.
(11, 34) 1
(12, 26) 1
(12, 19) 1
(12, 23) 1
(12, 8) 1
(12, 34) 1
accuracy metrics
accuracy of the classifier is 0.6
confusion matrix
[[2 0]
[2 1]]
recall and precision
1.0
LAB 7 [BAYESIAN NETWORK]:
PROGRAM:
from pomegranate import *
Asia=DiscreteDistribution({'True':0.5,'False':0.5})
Tuberculosis=ConditionalProbabilityTable(
[['True','True',0.2],
['True','False',0.8],
['False','True',0.1],
['False','False',0.98]],[Asia])
smoking=DiscreteDistribution({'True':0.5,'False':0.5})
Lung=ConditionalProbabilityTablse(
[['True','True',0.75],
['True','False',0.25],
['False','True',0.02],
['False','False',0.98]],[smoking])
Bronchitis=ConditionalProbabilityTable(
[['True','True',0.92],
['True','False',0.08],
['False','True',0.03],
['False','False',0.98]],[smoking])
Tuberculosis_or_cancer=ConditionalProbabilityTable(
[['True','True','True',1.0],
['True','True','False',0.0]
['True','False','True',1.0],
['True','False','False',0.0],
['False','True','True',1.0],
['False','True','False',0.0],
['False','False','True',1.0]
['False','False','False',0.0]],[Tuberculosis,Lung])
Xray=ConditionalProbabilityTable(
[['True','True',0.885],
['True','False',0.115],
['False','True',0.04],
['False','False',0.96]],[Tuberculosis_or_cancer])
dyspnea=ConditionalProbabilityTable(
[['True','True','True',0.96],
['True','True','False',0.04]
['True','False','True',0.89],
['True','False','False',0.11],
['False','True','True',0.96],
['False','True','False',0.04],
['False','False','True',0.89]
['False','False','False',0.11]],[Tuberculosis_or_cancer,Bronchitis])
s0=State(Asia,name="Asia")
s1=State(Tuberculosis,name="Tuberculosis")
s2=State(smoking,name="smoker")
network=BayesianNetwork("asia")
network.add_nodes(s0,s1,s2)
network.add_edge(s0,s1)
network.add_edges(s1,s2)
[Link]()
Print(network.predict_probal({'tuberculosis':'True'}))
OUTPUT :
LAB 8 [EM-KMEANS]:
PROGRAM:
import [Link] as plt
from sklearn import datasets
from [Link] import KMeans
import pandas as pd
import numpy as np
import [Link] as sm
iris=datasets.load_iris()
X=[Link]([Link])
[Link]=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=[Link]([Link])
[Link]=['Targets']
[Link](figsize=(14,7))
model=KMeans(n_clusters=3)
[Link](X)
model.labels_
[Link](figsize=(14,7))
colormap=[Link](['red','lime','black'])
[Link](1,2,1)
[Link](X.Petal_Length,X.Petal_Width,c=colormap[[Link]],s=40)
[Link]('EM clustering')
[Link](1,2,2)
[Link](X.Petal_Length,X
.Petal_Width,c=colormap[model.labels_],s=40)
[Link]('K-Means clustering')
acc=sm.accuracy_score(y,model.labels_)
print(acc*100)
OUTPUT:
LAB 9 [K-NEAREST NEIGHBOR ALGORITHM]:
PROGRAM:
from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier
from [Link] import classification_report,confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=[Link]
iris_labels=[Link]
print(iris_data)
print(iris_labels)
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.30)
classifier=KNeighborsClassifier(n_neighbors=5)
[Link](x_train,y_train)
y_pred=[Link](x_test)
print('confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy metrices')
print(classification_report(y_test,y_pred))
OUTPUT:
confusion matrix is as follows
[[17 0 0]
[ 0 14 1]
[ 0 1 12]]
Accuracy metrices
precision recall f1-score support
0 1.00 1.00 1.00 17
1 0.93 0.93 0.93 15
2 0.92 0.92 0.92 13
avg / total 0.96 0.96 0.96 45
LAB 10 [LOCALLY WEIGHTED REGRESSION ALGORITHM]:
PROGRAM:
import [Link] as plt
import pandas as pd
#import [Link] as np
import numpy as np1
#from [Link] import pearsonr
def kernel(point,xmat,k):
m,n=[Link](xmat)
weights=[Link]([Link]((m)))
for j in range(m):
diff=point-x[j]
weights[j,j]=[Link](diff*diff.T/(-2.0*k**2))
return weights
def localweight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
w=(x.T*(wei*x)).I*(x.T*(wei*ymat.T))
return w
def localweightregression(xmat,ymat,k):
m,n=[Link](xmat)
ypred=[Link](m)
for i in range(m):
ypred[i]=xmat[i]*localweight(xmat[i],xmat,ymat,k)
return ypred
data=pd.read_csv('[Link]')
bill=[Link](data.total_bill)
tip=[Link]([Link])
mbill=[Link](bill)
mtip=[Link](tip)
m=[Link](mbill)[1]
one=[Link]([Link](m))
x=[Link]((one.T,mbill.T))
ypred=localweightregression(x,mtip,2)
sortindex=x[:,1].argsort(0)
xsort=x[sortindex][:,0]
fig=[Link]()
ax=fig.add_subplot(1,1,1)
[Link](bill,tip,color='green')
[Link](xsort[:,1],ypred[sortindex],color='red',linewidth=3)
[Link]('total bill')
[Link]('tip')
OUTPUT: