Practical - 1
Practical - 1
PRACTICAL – 1
AIM : Implement and demonstrate the FIND-S algorithm for finding the
most specific hypothesis based on a given set of training data samples. Read
the training data from a .CSV file.
Code:
def find_s(examples):
# Initialize hypothesis to the most specific hypothesis
hypothesis = ['ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ', 'ϕ']
return hypothesis
# Example usage:
data = [
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes'],
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes'],
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No'],
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
]
hypothesis = find_s(data)
print("Final hypothesis:", hypothesis)
Output:
BAIT,SURAT Page 1
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 2
AIM : For a given set of training data examples stored in a .CSV file,
implement and demonstrate the Candidate-Elimination algorithm. Output a
description of the set of all hypotheses consistent with the training examples.
Code :
import numpy as np
import pandas as pd
data = pd.read_csv('enjoysport.csv')
concepts = np.array(data.iloc[:, 0:-1])
target = np.array(data.iloc[:, -1])
for i, h in enumerate(concepts):
if target[i] == "yes":
print("If instance is Positive ")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == "no":
print("If instance is Negative ")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
BAIT,SURAT Page 2
Basics of Machine Learning [1010207718] [2107020701005]
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
Output :
BAIT,SURAT Page 3
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 3
AIM : Write a program to demonstrate the working of the decision tree
based ID3 algorithm. Use an appropriate data set for building the decision
tree and apply this knowledge to classify a new sample
Code:
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
BAIT,SURAT Page 4
Basics of Machine Learning [1010207718] [2107020701005]
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0] / (total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
BAIT,SURAT Page 5
Basics of Machine Learning [1010207718] [2107020701005]
iv = -1 * sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
BAIT,SURAT Page 6
Basics of Machine Learning [1010207718] [2107020701005]
print_tree(n, level + 2)
Output:
BAIT,SURAT Page 7
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 4
AIM : Build an Artificial Neural Network by implementing the Back
propagation algorithm and test the same using appropriate data sets.
Code:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
BAIT,SURAT Page 8
Basics of Machine Learning [1010207718] [2107020701005]
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
Output:
BAIT,SURAT Page 9
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 5
AIM : Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets.
Code:
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
# converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separatebyclass(dataset):
separated = {} # dictionary of classes 1 and 0
# creates a dictionary of classes 1 and 0 where the values are
# the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
BAIT,SURAT Page 10
Basics of Machine Learning [1010207718] [2107020701005]
return separated
def mean(numbers):
return sum(numbers) / float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
# print(separated)
summaries = {}
for classvalue, instances in separated.items():
# for key,value in dic.items()
# summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) # summarize is used to cal to mean and
std
return summaries
BAIT,SURAT Page 11
Basics of Machine Learning [1010207718] [2107020701005]
def main():
filename = 'pima-indians-diabetes.csv'
splitratio = 0.67
dataset = loadcsv(filename);
BAIT,SURAT Page 12
Basics of Machine Learning [1010207718] [2107020701005]
predictions = getpredictions(summaries, testset) # find the predictions of test data with the
training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))
main()
Output:
BAIT,SURAT Page 13
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 6
AIM : Assuming a set of documents that need to be classified, use the naïve
Bayesian Classifier model to perform this task. Built-in Java classes/API can
be used to write the program. Calculate the accuracy, precision, and recall
for your data set.
Code:
import pandas as pd
msg=pd.read_csv('naivetext.csv',names=['message','label'])
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
BAIT,SURAT Page 14
Basics of Machine Learning [1010207718] [2107020701005]
print(metrics.confusion_matrix(ytest,predicted))
print('\n The value of Precision' ,metrics.precision_score(ytest,predicted))
print('\n The value of Recall' ,metrics.recall_score(ytest,predicted))
Output:
BAIT,SURAT Page 15
Basics of Machine Learning [1010207718] [2107020701005]
BAIT,SURAT Page 16
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 7
AIM : Write a program to construct a Bayesian network considering medical
data. Use this model to demonstrate the diagnosis of heart patients using
standard Heart Disease Data Set. You can use Java/Python ML library
lasses/API.
Code:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination
heartDisease = pd.read_csv('7-dataset.csv')
heartDisease = heartDisease.replace('?',np.nan)
model=
BayesianNetwork([('age','heartdisease'),('gender','heartdisease'),('exang','heartdisease'),('cp','h
eartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
BAIT,SURAT Page 17
Basics of Machine Learning [1010207718] [2107020701005]
Output:
BAIT,SURAT Page 18
Basics of Machine Learning [1010207718] [2107020701005]
BAIT,SURAT Page 19
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 8
AIM : Apply EM algorithm to cluster a set of data stored in a .CSV file. Use
the same data setfor Clustering using k-Means algorithm. Compare the
results of these two algorithms and comment on the quality of clustering.
You can add Java/Python ML library lasses/API in the program.
Code:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
X = dataset.iloc[:, :-1]
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
# REAL PLOT
plt.subplot(1,3,1)
plt.title('Real')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y])
# K-PLOT
model=KMeans(n_clusters=3, random_state=0).fit(X)
plt.subplot(1,3,2)
plt.title('KMeans')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[model.labels_])
BAIT,SURAT Page 20
Basics of Machine Learning [1010207718] [2107020701005]
# GMM PLOT
gmm=GaussianMixture(n_components=3, random_state=0).fit(X)
y_cluster_gmm=gmm.predict(X)
plt.subplot(1,3,3)
plt.title('GMM Classification')
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm])
BAIT,SURAT Page 21
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 9
AIM : Write a program to implement k-Nearest Neighbor algorithm to
classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.
Code:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
ypred = classifier.predict(Xtest)
i=0
print ("\n-------------------------------------------------------------------------")
print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
print ("-------------------------------------------------------------------------")
for label in ytest:
print ('%-25s %-25s' % (label, ypred[i]), end="")
if (label == ypred[i]):
print (' %-25s' % ('Correct'))
else:
print (' %-25s' % ('Wrong'))
i=i+1
print ("-------------------------------------------------------------------------")
print("\nConfusion Matrix:\n",metrics.confusion_matrix(ytest, ypred))
print ("-------------------------------------------------------------------------")
print("\nClassification Report:\n",metrics.classification_report(ytest, ypred))
print ("-------------------------------------------------------------------------")
print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(ytest,ypred))
print ("-------------------------------------------------------------------------")
BAIT,SURAT Page 22
Basics of Machine Learning [1010207718] [2107020701005]
Output:
BAIT,SURAT Page 23
Basics of Machine Learning [1010207718] [2107020701005]
PRACTICAL – 10
AIM : Implement the non-parametric Locally Weighted Regression
algorithm in order to fit data points. Select appropriate data set for your
experiment and draw graphs.
Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
BAIT,SURAT Page 24
Basics of Machine Learning [1010207718] [2107020701005]
m = np.shape(mbill)[1]
one = np.asmatrix(np.ones(m))
X = np.hstack((one.T, mbill.T))
# set k here
ypred = localWeightRegression(X, mtip, 0.5)
SortIndex = X[:, 1].argsort(0)
xsort = X[SortIndex][:, 0]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green')
ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();
Output:
BAIT,SURAT Page 25