0% found this document useful (0 votes)
19 views25 pages

Practical - 1

lab manual of basics of machine learning

Uploaded by

Rushil Beladiya
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views25 pages

Practical - 1

lab manual of basics of machine learning

Uploaded by

Rushil Beladiya
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 25

Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 1
AIM : Implement and demonstrate the FIND-S algorithm for finding the
most specific hypothesis based on a given set of training data samples. Read
the training data from a .CSV file.

Code:
def find_s(examples):
# Initialize hypothesis to the most specific hypothesis
hypothesis = ['ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ']

# For each positive example in the data


for example in examples:
if example[-1] == 'Yes': # Positive example
for i in range(len(hypothesis)):
# Update hypothesis if attribute value is different
if hypothesis[i] == 'ϕ':
hypothesis[i] = example[i]
elif hypothesis[i] != example[i]:
hypothesis[i] = '?'

return hypothesis
# Example usage:
data = [
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes'],
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes'],
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No'],
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
]
hypothesis = find_s(data)
print("Final hypothesis:", hypothesis)

Output:

BAIT,SURAT Page 1
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 2
AIM : For a given set of training data examples stored in a .CSV file,
implement and demonstrate the Candidate-Elimination algorithm. Output a
description of the set of all hypotheses consistent with the training examples.

Code :
import numpy as np
import pandas as pd

data = pd.read_csv('enjoysport.csv')
concepts = np.array(data.iloc[:, 0:-1])
target = np.array(data.iloc[:, -1])

def learn(concepts, target):


specific_h = concepts[0].copy()
print("initialization of specific_h \n", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("initialization of general_h \n", general_h)

for i, h in enumerate(concepts):
if target[i] == "yes":
print("If instance is Positive ")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'

if target[i] == "no":
print("If instance is Negative ")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print(" step {}".format(i + 1))


print(specific_h)
print(general_h)
print("\n")
print("\n")

BAIT,SURAT Page 2
Basics of Machine Learning [1010207718] [2107020701005]

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final, sep="\n")


print("Final General_h:", g_final, sep="\n")

Output :

BAIT,SURAT Page 3
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 3
AIM : Write a program to demonstrate the working of the decision tree
based ID3 algorithm. Use an appropriate data set for building the decision
tree and apply this knowledge to classify a new sample

Code:
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)

return (metadata, traindata)

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute

def subtables(data, col, delete):


dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

BAIT,SURAT Page 4
Basics of Machine Learning [1010207718] [2107020701005]

for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)

return items, dict

def entropy(S):
items = np.unique(S)

if items.size == 1:
return 0

counts = np.zeros((items.shape[0], 1))


sums = 0

for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)

for count in counts:


sums += -1 * count * math.log(count, 2)
return sums

def gain_ratio(data, col):


items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0] / (total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])

BAIT,SURAT Page 5
Basics of Machine Learning [1010207718] [2107020701005]

iv = -1 * sum(intrinsic)

for x in range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):


if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):


gains[col] = gain_ratio(data, col)

split = np.argmax(gains)

node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))

return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):


if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)

BAIT,SURAT Page 6
Basics of Machine Learning [1010207718] [2107020701005]

print_tree(n, level + 2)

metadata, traindata = read_data("id3.csv")


data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

Output:

BAIT,SURAT Page 7
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 4
AIM : Build an Artificial Neural Network by implementing the Back
propagation algorithm and test the same using appropriate data sets.

Code:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer

#weight and bias initialization


wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))

#draws a random range of numbers uniformly of dim x*y


for i in range(epoch):

#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout

BAIT,SURAT Page 8
Basics of Machine Learning [1010207718] [2107020701005]

output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)

#how much hidden layer wts contributed to error


hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad

# dotproduct of nextlayererror and currentlayerop


wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))

print("Actual Output: \n" + str(y))

print("Predicted Output: \n" ,output)

Output:

BAIT,SURAT Page 9
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 5
AIM : Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets.

Code:
import csv
import random
import math

def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
# converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitdataset(dataset, splitratio):


# 67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
# generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]

def separatebyclass(dataset):
separated = {} # dictionary of classes 1 and 0
# creates a dictionary of classes 1 and 0 where the values are
# the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)

BAIT,SURAT Page 10
Basics of Machine Learning [1010207718] [2107020701005]

return separated

def mean(numbers):
return sum(numbers) / float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)

def summarize(dataset): # creates a dictionary of classes


summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] # excluding labels +ve or -ve
return summaries

def summarizebyclass(dataset):
separated = separatebyclass(dataset);
# print(separated)
summaries = {}
for classvalue, instances in separated.items():
# for key,value in dic.items()
# summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) # summarize is used to cal to mean and
std
return summaries

def calculateprobability(x, mean, stdev):


exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

def calculateclassprobabilities(summaries, inputvector):


probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items(): # class and attribute information as
mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] # take mean and sd of every attribute for class 0 and
1 seperaely

BAIT,SURAT Page 11
Basics of Machine Learning [1010207718] [2107020701005]

x = inputvector[i] # testvector's first attribute


probabilities[classvalue] *= calculateprobability(x, mean, stdev); # use normal dist
return probabilities

def predict(summaries, inputvector): # training and test data is passed


probabilities = calculateclassprobabilities(summaries, inputvector)
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities.items(): # assigns that class which has he highest
prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel

def getpredictions(summaries, testset):


predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions

def getaccuracy(testset, predictions):


correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(testset))) * 100.0

def main():
filename = 'pima-indians-diabetes.csv'
splitratio = 0.67
dataset = loadcsv(filename);

trainingset, testset = splitdataset(dataset, splitratio)


print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingset),
len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
# print(summaries)
# test model

BAIT,SURAT Page 12
Basics of Machine Learning [1010207718] [2107020701005]

predictions = getpredictions(summaries, testset) # find the predictions of test data with the
training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))

main()
Output:

BAIT,SURAT Page 13
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 6
AIM : Assuming a set of documents that need to be classified, use the naïve
Bayesian Classifier model to perform this task. Built-in Java classes/API can
be used to write the program. Calculate the accuracy, precision, and recall
for your data set.

Code:
import pandas as pd

msg=pd.read_csv('naivetext.csv',names=['message','label'])

print('The dimensions of the dataset',msg.shape)

msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum

print(X)
print(y)

from sklearn.model_selection import train_test_split


xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print ('\n The total number of Training Data :',ytrain.shape)
print ('\n The total number of Test Data :',ytest.shape)

from sklearn.feature_extraction.text import CountVectorizer


count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print('\n The words or Tokens in the text documents \n')
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())

from sklearn.naive_bayes import MultinomialNB


clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)

from sklearn import metrics


print("\n Accuracy of the classifer is",metrics.accuracy_score(ytest,predicted))

print('\n Confusion matrix')

BAIT,SURAT Page 14
Basics of Machine Learning [1010207718] [2107020701005]

print(metrics.confusion_matrix(ytest,predicted))
print('\n The value of Precision' ,metrics.precision_score(ytest,predicted))
print('\n The value of Recall' ,metrics.recall_score(ytest,predicted))

Output:

BAIT,SURAT Page 15
Basics of Machine Learning [1010207718] [2107020701005]

BAIT,SURAT Page 16
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 7
AIM : Write a program to construct a Bayesian network considering medical
data. Use this model to demonstrate the diagnosis of heart patients using
standard Heart Disease Data Set. You can use Java/Python ML library
lasses/API.

Code:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination

heartDisease = pd.read_csv('7-dataset.csv')
heartDisease = heartDisease.replace('?',np.nan)

print('Sample instances from the dataset are given below')


print(heartDisease.head())

print('\n Attributes and datatypes')


print(heartDisease.dtypes)

model=
BayesianNetwork([('age','heartdisease'),('gender','heartdisease'),('exang','heartdisease'),('cp','h
eartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')


HeartDiseasetest_infer = VariableElimination(model)

print('\n 1. Probability of HeartDisease given evidence= restecg')


q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)

print('\n 2. Probability of HeartDisease given evidence= cp ')


q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)

BAIT,SURAT Page 17
Basics of Machine Learning [1010207718] [2107020701005]

Output:

BAIT,SURAT Page 18
Basics of Machine Learning [1010207718] [2107020701005]

BAIT,SURAT Page 19
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 8
AIM : Apply EM algorithm to cluster a set of data stored in a .CSV file. Use
the same data setfor Clustering using k-Means algorithm. Compare the
results of these two algorithms and comment on the quality of clustering.
You can add Java/Python ML library lasses/API in the program.

Code:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

names = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width', 'Class']

dataset = pd.read_csv("8-dataset.csv", names=names)

X = dataset.iloc[:, :-1]

label = {'Iris-setosa': 0,'Iris-versicolor': 1, 'Iris-virginica': 2}

y = [label[c] for c in dataset.iloc[:, -1]]

plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])

# REAL PLOT
plt.subplot(1,3,1)
plt.title('Real')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y])

# K-PLOT
model=KMeans(n_clusters=3, random_state=0).fit(X)
plt.subplot(1,3,2)
plt.title('KMeans')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[model.labels_])

print('The accuracy score of K-Mean: ',metrics.accuracy_score(y, model.labels_))


print('The Confusion matrixof K-Mean:\n',metrics.confusion_matrix(y, model.labels_))

BAIT,SURAT Page 20
Basics of Machine Learning [1010207718] [2107020701005]

# GMM PLOT
gmm=GaussianMixture(n_components=3, random_state=0).fit(X)
y_cluster_gmm=gmm.predict(X)
plt.subplot(1,3,3)
plt.title('GMM Classification')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm])

print('The accuracy score of EM: ',metrics.accuracy_score(y, y_cluster_gmm))


print('The Confusion matrix of EM:\n ',metrics.confusion_matrix(y, y_cluster_gmm))
plt.show()
Output:

BAIT,SURAT Page 21
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 9
AIM : Write a program to implement k-Nearest Neighbor algorithm to
classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.

Code:
import numpy as np

import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']


dataset = pd.read_csv("9-dataset.csv", names=names)
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
print(X.head())
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)

classifier = KNeighborsClassifier(n_neighbors=5).fit(Xtrain, ytrain)

ypred = classifier.predict(Xtest)

i=0
print ("\n-------------------------------------------------------------------------")
print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
print ("-------------------------------------------------------------------------")
for label in ytest:
print ('%-25s %-25s' % (label, ypred[i]), end="")
if (label == ypred[i]):
print (' %-25s' % ('Correct'))
else:
print (' %-25s' % ('Wrong'))
i=i+1
print ("-------------------------------------------------------------------------")
print("\nConfusion Matrix:\n",metrics.confusion_matrix(ytest, ypred))
print ("-------------------------------------------------------------------------")
print("\nClassification Report:\n",metrics.classification_report(ytest, ypred))
print ("-------------------------------------------------------------------------")
print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(ytest,ypred))
print ("-------------------------------------------------------------------------")

BAIT,SURAT Page 22
Basics of Machine Learning [1010207718] [2107020701005]

Output:

BAIT,SURAT Page 23
Basics of Machine Learning [1010207718] [2107020701005]

PRACTICAL – 10
AIM : Implement the non-parametric Locally Weighted Regression
algorithm in order to fit data points. Select appropriate data set for your
experiment and draw graphs.

Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def kernel(point, xmat, k):


m, n = np.shape(xmat)
weights = np.asmatrix(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j, j] = np.exp(diff * diff.T / (-2.0 * k ** 2))
return weights

def localWeight(point, xmat, ymat, k):


wei = kernel(point, xmat, k)
W = (X.T * (wei * X)).I * (X.T * (wei * ymat.T))
return W

def localWeightRegression(xmat, ymat, k):


m, n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)
return ypred

# load data points


data = pd.read_csv('10-dataset.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)

# preparing and add 1 in bill


mbill = np.asmatrix(bill)
mtip = np.asmatrix(tip)

BAIT,SURAT Page 24
Basics of Machine Learning [1010207718] [2107020701005]

m = np.shape(mbill)[1]
one = np.asmatrix(np.ones(m))
X = np.hstack((one.T, mbill.T))

# set k here
ypred = localWeightRegression(X, mtip, 0.5)
SortIndex = X[:, 1].argsort(0)
xsort = X[SortIndex][:, 0]

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green')
ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();

Output:

BAIT,SURAT Page 25

You might also like