0% found this document useful (0 votes)
15 views

ML Lab Programs

This summarizes a series of machine learning programs from different labs: 1. The first lab program uses a Find-S algorithm to generate the most specific hypothesis from a CSV file of data. 2. The second lab program uses a Candidate Elimination algorithm to learn hypotheses from a CSV file. 3. The third lab program implements the ID3 decision tree algorithm to classify data from a CSV file. 4. The fourth lab program demonstrates backpropagation for neural network training on sample data. 5. The fifth lab program uses the Gaussian Naive Bayes classifier from scikit-learn to classify Iris data.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views

ML Lab Programs

This summarizes a series of machine learning programs from different labs: 1. The first lab program uses a Find-S algorithm to generate the most specific hypothesis from a CSV file of data. 2. The second lab program uses a Candidate Elimination algorithm to learn hypotheses from a CSV file. 3. The third lab program implements the ID3 decision tree algorithm to classify data from a CSV file. 4. The fourth lab program demonstrates backpropagation for neural network training on sample data. 5. The fifth lab program uses the Gaussian Naive Bayes classifier from scikit-learn to classify Iris data.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 21

ML LAB PROGRAMS

LAB1 [FIND-S ALGORITHM] :

PROGRAM:

import csv
with open('file1.csv','r') as f:
reader=csv.reader(f)
your_list=list(reader)
h=[['0','0','0','0','0','0']]
for i in your_list:
print(i)
if i[-1]=="TRUE":
j=0
for x in i:
if x!="TRUE":
if x!=h[0][j] and h[0][j]=='0':
h[0][j]=x
elif x!=h[0][j] and h[0][j]!='0':
h[0][j]='?'
else:
pass
j=j+1
print("Most Specific Hyothesis")
print(h)
INPUT[EXCEL]:file1.csv
sunny,warm,normal,strong,warm,same,TRUE
sunny,warm,high,strong,warm,same,TRUE
rainy,cold,high,strong,warm,change,FALSE
sunny,warm,high,strong,cold,change,TRUE
OUTPUT:
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'TRUE']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'TRUE']
['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'FALSE']
['sunny', 'warm', 'high', 'strong', 'cold', 'change', 'TRUE']
Most Specific Hypothesis
[['sunny', 'warm', '?', 'strong', '?', '?']]

LAB2 [CANDIDATE ELIMINATION ALGORITHM]:


PROGRAM:
dataarr=[]
with open('laba2.csv') as f:
for line in f:
dataarr.append(line.strip().split(','))
rows=len(dataarr)
cols=len(dataarr[0])
shypo=['0']*(cols-1)
ghypo=[['?']*(cols-1)]
print "initial specific hypothesis is: ",shypo
print "initial general hypothesis is: ",ghypo

for x in range(1,rows):
lst=dataarr[x]

if lst[cols-1]=="1":
for i in range(0,cols-1):
if shypo[i]==lst[i]:
continue
shypo[i]='?' if shypo[i]!='0' else lst[i]
for g in ghypo:
if g[i]!='?' and shypo[i]=='?':
ghypo.remove(g)

elif lst[cols-1]=="0":
for i in range(0,cols-1):
if lst[i]!=shypo[i] and shypo[i]!='?':
temp_list=['?']*i+[shypo[i]]+(['?']*(cols-2-i))
if temp_list not in ghypo:
ghypo.append(temp_list)
print "S",[x],"=",shypo
print "G",[x],"=",ghypo
print "S[4]=",shypo
print "G[4]=",ghypo
print "\nFinal hypothesis is: ",shypo
INPUT[EXCEL]:lab2.csv
sunny,warm,normal,strong,warm,same,1
sunny,warm,normal,strong,warm,same,1
rainy,cold,high,strong,warm,change,0
sunny,warm,high,strong,cool,change,1
OUTPUT:
initial specific hypothesis is: ['0', '0', '0', '0', '0', '0']
initial general hypothesis is: [['?', '?', '?', '?', '?', '?']]
S [1] = [‘sunny’,’warm’,’normal’,’strong’,’warm’,’same’]
G [1] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [2] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [2] = [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]
S [3] = ['sunny', 'warm', '?', 'strong', '?', '?']
G [3] = [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
S[4]= ['sunny', 'warm', '?', 'strong', '?', '?']
G[4]= [['?', '?', '?', '?', '?', '?'], ['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]

Final hypothesis is: ['sunny', 'warm', '?', 'strong', '?', '?']


LAB3 [ID3 ALGORITHM]:
PROGRAM:
ID3.PY
import numpy as np
import math
from data_loader import read_data
class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dict={}
items=np.unique(data[:,col])
count=np.zeros((items.shape[0],1),dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y,col]==items[x]:
count[x]+=1
for x in range(items.shape[0]):
dict[items[x]]=np.empty((int(count[x]),data.shape[1]),dtype="S32")
pos=0
for y in range(data.shape[0]):
if data[y,col]==items[x]:
dict[items[x]][pos]=data[y]
pos+=1
if delete:
dict[items[x]]=np.delete(dict[items[x]],col,1)
return items,dict
def entropy(S):
items=np.unique(S)
if items.size==1:
return 0
counts=np.zeros((items.shape[0],1))
sums=0
for x in range(items.shape[0]):
counts[x]=sum(S==items[x])/(S.size*1.0)
for count in counts:
sums+=-1*count*math.log(count,2)
return sums
def gain_ratio(data,col):
items,dict=subtables(data,col,delete=False)
total_size=data.shape[0]
entropies=np.zeros((items.shape[0],1))
intrinsic=np.zeros((items.shape[0],1))
for x in range(items.shape[0]):
ratio=dict[items[x]].shape[0]/(total_size*1.0)
entropies[x]=ratio*entropy(dict[items[x]][:,-1])
intrinsic[x]=ratio*math.log(ratio,2)
total_entropy=entropy(data[:,-1])
iv=-1*sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy-=entropies[x]
return total_entropy/iv
def create_node(data,metadata):
if(np.unique(data[:,-1])).shape[0]==1:
node=Node("")
node.answer=np.unique(data[:,-1])[0]
return node
gains=np.zeros((data.shape[1]-1,1))
for col in range(data.shape[1]-1):
gains[col]=gain_ratio(data,col)
split=np.argmax(gains)
node=Node(metadata[split])
metadata=np.delete(metadata,split,0)
items,dict=subtables(data,split,delete=True)
for x in range(items.shape[0]):
child=create_node(dict[items[x]],metadata)
node.children.append((items[x],child))
return node
def empty(size):
s=""
for x in range(size):
s+=" "
return s
def print_tree(node,level):
if node.answer!="":
print(empty(level),node.answer)
return
print(empty(level),node.attribute)
for value,n in node.children:
print(empty(level+1),value)
print_tree(n,level+2)
metadata,traindata=read_data("lab3.csv")
data=np.array(traindata)
node=create_node(data,metadata)
print_tree(node,0)
data_loader.py
import csv
def read_data(filename):
with open(filename,'r') as csvfile:
datareader=csv.reader(csvfile,delimiter=',')
headers=next(datareader)
metadata=[]
traindata=[]
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return(metadata,traindata)
INPUT[EXCEL]:lab3.csv
OUTLOOK,AIR_TEMP,HUMIDITY,WIND,ENJOYSPORT
SUNNY,HOT,HIGH,WEAK,NO
SUNNY,HOT,HIGH,STRONG,NO
OVERCAST,HOT,HIGH,WEAK,YES
RAIN,MILD,HIGH,WEAK,YES
RAIN,COOL,NORMAL,WEAK,YES
RAIN,COOL,NORMAL,STRONG,NO
OVERCAST,COOL,NORMAL,STRONG,YES
SUNNY,MILD,HIGH,WEAK,NO
SUNNY,COOL,NORMAL,WEAK,YES
RAIN,MILD,NORMAL,WEAK,YES
SUNNY,MILD,NORMAL,STRONG,YES
OVERCAST,MILD,HIGH,STRONG,YES
OVERCAST,HOT,NORMAL,WEAK,YES
RAIN,MILD,HIGH,STRONG,NO
OUTPUT:
('', 'OUTLOOK')
(' ', 'OVERCAST')
(' ', 'YES')
(' ', 'RAIN')
(' ', 'WIND')
(' ', 'STRONG')
(' ', 'NO')
(' ', 'WEAK')
(' ', 'YES')
(' ', 'SUNNY')
(' ', 'HUMIDITY')
(' ', 'HIGH')
(' ', 'NO')
(' ', 'NORMAL')
(' ', 'YES')
LAB4 [BACK PROPAGATION ALGORITHM]:
PROGRAM:
import numpy as np
x=np.array(([2,9],[1,5],[3,6]),dtype=float)
y=np.array(([92],[86],[89]),dtype=float)
x=x/np.amax(x,axis=0)
y=y/100
def sigmoid(x):
return 1/(1+np.exp(-x))
def derivatives_sigmoid(x):
return x*(1-x)
epoch=7000
lr=0.1
inputlayer_neurons=2
hiddenlayer_neurons=3
output_neurons=1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
hinp1=np.dot(x,wh)
hinp=hinp1+bh
hlayer_act=sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp=outinp1+bout
output=sigmoid(outinp)
print("Input:\n"+str(x))
print("Actual output:\n"+str(y))
print("Predicted output:\n",output)
OUTPUT:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual output:
[[0.92]
[0.86]
[0.89]]
('Predicted output:\n', array([[0.87550341],
[0.86351179],
[0.87336642]]))
LAB 5 [NAÏVE BAYESIAN CLASSIFIER]:
PROGRAM:
from sklearn.datasets import load_iris
iris=load_iris()
x=iris.data
y=iris.target
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.4,random_state=2)
print("training data",xtrain)
print("training data",ytrain)
print("testing data",xtest)
print("testing data",ytest)
gnb=GaussianNB()
gnb.fit(xtrain,ytrain)
y_pred=gnb.predict(xtest)
from sklearn import metrics
print("accuracy is",metrics.accuracy_score(ytest,y_pred)*100)

OUTPUT:
('accuracy is', 93.33333333333333)

LAB 6 [BAYESIAN TEXT CLASSIFIER MODEL]:


PROGRAM:
import pandas as pd
msg=pd.read_csv('laba6.txt',names=['message','label'])
print"dimensions of dataset",msg.shape
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
x=msg.message
y=msg.labelnum
print(x)
print(y)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
from sklearn.feature_extraction.text import CountVectorizer
count_vect=CountVectorizer()
xtrain_dtm=count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)
print(xtrain_dtm)
from sklearn.naive_bayes import MultinomialNB
df=MultinomialNB().fit(xtrain_dtm,ytrain)
predicted=df.predict(xtest_dtm)
from sklearn import metrics
print'accuracy metrics'
print'accuracy of the classifier is',metrics.accuracy_score(ytest,predicted)
print'confusion matrix'
print metrics.confusion_matrix(ytest,predicted)
print'recall and precision'
print metrics.precision_score(ytest,predicted)
INPUT[EXCEL]:lab6.txt
I love this sandwich,pos
This is an amazing place,pos
I feel very good about these beers,pos
This is my best work,pos
what an awesome view,pos
I do not like this restaurant,neg
I am tired of this stuff,neg
I can't deal with this,neg
He is my sworn enemy,neg
My boss is horrible,neg
This is an awesome place,pos
I do not like the taste of this juice,neg
I love to dance,pos
I am sick and tired of this place,neg
What a great holiday,pos
That is a bad locality to stay,neg
We will have good fun tomorrow,pos
I went to my enemy's house today,neg
OUTPUT:
[13 rows x 45 columns]
(0, 25) 1
(0, 1) 1
(0, 2) 1
(0, 17) 1
(0, 34) 1
(1, 4) 1
(1, 25) 1
(1, 2) 1
(1, 17) 1
.
.
.
(11, 34) 1
(12, 26) 1
(12, 19) 1
(12, 23) 1
(12, 8) 1
(12, 34) 1
accuracy metrics
accuracy of the classifier is 0.6
confusion matrix
[[2 0]
[2 1]]
recall and precision
1.0

LAB 7 [BAYESIAN NETWORK]:


PROGRAM:
from pomegranate import *
Asia=DiscreteDistribution({'True':0.5,'False':0.5})
Tuberculosis=ConditionalProbabilityTable(
[['True','True',0.2],
['True','False',0.8],
['False','True',0.1],
['False','False',0.98]],[Asia])

smoking=DiscreteDistribution({'True':0.5,'False':0.5})
Lung=ConditionalProbabilityTablse(
[['True','True',0.75],
['True','False',0.25],
['False','True',0.02],
['False','False',0.98]],[smoking])

Bronchitis=ConditionalProbabilityTable(
[['True','True',0.92],
['True','False',0.08],
['False','True',0.03],
['False','False',0.98]],[smoking])

Tuberculosis_or_cancer=ConditionalProbabilityTable(
[['True','True','True',1.0],
['True','True','False',0.0]
['True','False','True',1.0],
['True','False','False',0.0],
['False','True','True',1.0],
['False','True','False',0.0],
['False','False','True',1.0]
['False','False','False',0.0]],[Tuberculosis,Lung])

Xray=ConditionalProbabilityTable(
[['True','True',0.885],
['True','False',0.115],
['False','True',0.04],
['False','False',0.96]],[Tuberculosis_or_cancer])

dyspnea=ConditionalProbabilityTable(
[['True','True','True',0.96],
['True','True','False',0.04]
['True','False','True',0.89],
['True','False','False',0.11],
['False','True','True',0.96],
['False','True','False',0.04],
['False','False','True',0.89]
['False','False','False',0.11]],[Tuberculosis_or_cancer,Bronchitis])

s0=State(Asia,name="Asia")
s1=State(Tuberculosis,name="Tuberculosis")
s2=State(smoking,name="smoker")
network=BayesianNetwork("asia")
network.add_nodes(s0,s1,s2)
network.add_edge(s0,s1)
network.add_edges(s1,s2)
network.bake()
Print(network.predict_probal({'tuberculosis':'True'}))
OUTPUT :

LAB 8 [EM-KMEANS]:
PROGRAM:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import sklearn.metrics as sm
iris=datasets.load_iris()
X=pd.DataFrame(iris.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=pd.DataFrame(iris.target)
y.columns=['Targets']
plt.figure(figsize=(14,7))
model=KMeans(n_clusters=3)
model.fit(X)
model.labels_
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
plt.subplot(1,2,1)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)
plt.title('EM clustering')
plt.subplot(1,2,2)
plt.scatter(X.Petal_Length,X
.Petal_Width,c=colormap[model.labels_],s=40)
plt.title('K-Means clustering')
acc=sm.accuracy_score(y,model.labels_)
print(acc*100)
OUTPUT:

LAB 9 [K-NEAREST NEIGHBOR ALGORITHM]:


PROGRAM:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
print(iris_data)
print(iris_labels)
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.30)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
print('confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy metrices')
print(classification_report(y_test,y_pred))
OUTPUT:
confusion matrix is as follows
[[17 0 0]
[ 0 14 1]
[ 0 1 12]]
Accuracy metrices
precision recall f1-score support

0 1.00 1.00 1.00 17


1 0.93 0.93 0.93 15
2 0.92 0.92 0.92 13

avg / total 0.96 0.96 0.96 45

LAB 10 [LOCALLY WEIGHTED REGRESSION ALGORITHM]:


PROGRAM:
import matplotlib.pyplot as plt
import pandas as pd
#import numpy.linalg as np
import numpy as np1
#from scipy.stats.stats import pearsonr

def kernel(point,xmat,k):
m,n=np1.shape(xmat)
weights=np1.mat(np1.eye((m)))
for j in range(m):
diff=point-x[j]
weights[j,j]=np1.exp(diff*diff.T/(-2.0*k**2))
return weights
def localweight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
w=(x.T*(wei*x)).I*(x.T*(wei*ymat.T))
return w
def localweightregression(xmat,ymat,k):
m,n=np1.shape(xmat)
ypred=np1.zeros(m)
for i in range(m):
ypred[i]=xmat[i]*localweight(xmat[i],xmat,ymat,k)
return ypred
data=pd.read_csv('10data.csv')
bill=np1.array(data.total_bill)
tip=np1.array(data.tip)
mbill=np1.mat(bill)
mtip=np1.mat(tip)
m=np1.shape(mbill)[1]
one=np1.mat(np1.ones(m))
x=np1.hstack((one.T,mbill.T))
ypred=localweightregression(x,mtip,2)
sortindex=x[:,1].argsort(0)
xsort=x[sortindex][:,0]
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.scatter(bill,tip,color='green')
ax.plot(xsort[:,1],ypred[sortindex],color='red',linewidth=3)
plt.xlabel('total bill')
plt.ylabel('tip')
OUTPUT:

You might also like