Machine Learning - Lab Manual
Machine Learning - Lab Manual
NO: 1
DATE :
IMPLEMENTATION OF CANDIDATE –ELIMINATION ALGORITHM
AIM:
To implement and demonstrate the Candidate-Elimination algorithm, for a given set of
training data examples stored in a .CSV file, to output a description of the set of all hypotheses
consistent with the training examples.
ALGORITHM:
PROGRAM:
dataset.csv
2
import numpy as np
import pandas as pd
# Loading Data from a CSV File
data = pd.DataFrame(data=pd.read_csv('E:\BALA\AI\Lab programs\pgms\dataset.csv'))
print(data)
3
def learn(concepts, target):
'''
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''
# Initialise S0 with the first instance from concepts
# .copy() makes sure a new list is created instead of just pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)
5
EX. NO: 2
DATE :
AIM:
To build Decision tree in ID3 algorithm to classify a new sample using python.
ALGORITHM:
PROGRAM:
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
8
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return (metadata, traindata)
class Node:
def init (self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
counts = np.zeros((items.shape[0], 1))
sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for count in counts:
sums += -1 * count * math.log(count, 2)
return sums
def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
10
node = Node("")
OUTPUT:
RESULT:
Thus the program to implement decision tree based ID3 algorithm using python was
executed and verified successfully.
12
EX. NO: 3
DATE :
IMPLEMENTATION OF BACK PROPAGATION ALGORITHM TO BUILD AN
ARTIFICIAL NEURAL NETWORK
AIM:
To implement the Back Propagation algorithm to build an Artificial Neural Network.
ALGORITHM:
PROGRAM:
13
network = list()
hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in
range(n_hidden)]
network.append(hidden_layer)
output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in
range(n_outputs)]
network.append(output_layer)
return network
# Calculate neuron activation for an input
def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))
# Forward propagate input to a network output
def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs
# Calculate the derivative of an neuron output
14
def transfer_derivative(output):
return output * (1.0 - output)
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()
if i != len(network)-1:
for j in range(len(layer)):
error = 0.0
for neuron in network[i + 1]:
error += (neuron['weights'][j] * neuron['delta'])
errors.append(error)
else:
for j in range(len(layer)):
neuron = layer[j]
errors.append(neuron['output'] - expected[j])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
# Update network weights with error
def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]
for neuron in network[i]:
for j in range(len(inputs)):
15
neuron['weights'][j] -= l_rate * neuron['delta'] * inputs[j]
neuron['weights'][-1] -= l_rate * neuron['delta']
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
for epoch in range(n_epoch):
sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i]-outputs[i])**2 for i in
range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
# Test training backprop algorithm
seed(1)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))
16
network = initialize_network(n_inputs, 2, n_outputs)
train_network(network, dataset, 0.5, 20, n_outputs)
for layer in network:
print(layer)
OUTPUT:
>epoch=0, lrate=0.500, error=6.350
>epoch=1, lrate=0.500, error=5.531
>epoch=2, lrate=0.500, error=5.221
>epoch=3, lrate=0.500, error=4.951
>epoch=4, lrate=0.500, error=4.519
>epoch=5, lrate=0.500, error=4.173
>epoch=6, lrate=0.500, error=3.835
>epoch=7, lrate=0.500, error=3.506
>epoch=8, lrate=0.500, error=3.192
>epoch=9, lrate=0.500, error=2.898
>epoch=10, lrate=0.500, error=2.626
>epoch=11, lrate=0.500, error=2.377
>epoch=12, lrate=0.500, error=2.153
>epoch=13, lrate=0.500, error=1.953
>epoch=14, lrate=0.500, error=1.774
>epoch=15, lrate=0.500, error=1.614
>epoch=16, lrate=0.500, error=1.472
>epoch=17, lrate=0.500, error=1.346
>epoch=18, lrate=0.500, error=1.233
>epoch=19, lrate=0.500, error=1.132
[{'weights': [-1.4688375095432327, 1.850887325439514, 1.0858178629550297], 'output': 0.029
980305604426185, 'delta': 0.0059546604162323625}, {'weights': [0.37711098142462157, -0.06
25909894552989, 0.2765123702642716], 'output': 0.9456229000211323, 'delta': -0.0026279652
850863837}]
[{'weights': [2.515394649397849, -0.3391927502445985, -0.9671565426390275], 'output': 0.23
648794202357587, 'delta': 0.04270059278364587}, {'weights': [-2.5584149848484263, 1.00364
22106209202, 0.42383086467582715], 'output': 0.7790535202438367, 'delta': -0.038031325964
37354}]
RESULT:
Thus the Back propagation algorithm to build an Artificial Neural networks was
implemented successfully.
17
EX.NO: 4
DATE:
IMPLEMENTATION OF NAÏVE BAYESIAN CLASSIFIER FOR A SAMPLE
AIM:
To implement Naïve Bayesian classifier for Tennis data set and to compute the accuracy
with few datasets.
ALGORITHM:
Here we have P (Sunny |Yes) = 3/9 = 0.33, P(Sunny) = 5/14 = 0.36, P( Yes)= 9/14 = 0.64
Now, P (Yes | Sunny) = 0.33 * 0.64 / 0.36 = 0.60, which has higher probability.
4. Exit.
PROGRAM:
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv("E:\BALA\AI\Lab programs\pgms\Tennis.csv")
print("The first 5 values of data is :\n",data.head())
18
# obtain Train data and Train output
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
19
print("\nNow the Train data is :\n",X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
OUTPUT:
RESULT:
Thus the program to implement Naïve Bayesian classifier to compute the accuracy with
few datasets using python was executed and verified successfully.
20
EX. NO: 5
DATE :
AIM:
To classify a set of documents using Naïve Bayesian classifier and to measure the accuracy
and precision
ALGORITHM:
PROGRAM:
import numpy as np
twenty_train = fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
twenty_test = fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
print(len(twenty_train.data))
print(len(twenty_test.data))
print(twenty_train.target_names)
21
print("\n".join(twenty_train.data[0].split("\n")))
print(twenty_train.target[0])
OUTPUT:
count_vect = CountVectorizer()
X_train_tf = count_vect.fit_transform(twenty_train.data)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_tf)
X_train_tfidf.shape
22
from sklearn.naive_bayes import MultinomialNB
mod = MultinomialNB()
mod.fit(X_train_tfidf, twenty_train.target)
X_test_tf = count_vect.transform(twenty_test.data)
X_test_tfidf = tfidf_transformer.transform(X_test_tf)
predicted = mod.predict(X_test_tfidf)
print(classification_report(twenty_test.target,predicted,target_names=twenty_test.target_names))
RESULT:
Thus the accuracy and precision was measured by Naïve Bayesian classifier model.
23
EX. NO: 6
DATE :
CONSTRUCTION OF A BAYESIAN NETWORK TO DIAGNOSE CORONA
INFECTION USING STANDARD WHO DATA SET
AIM:
To construct a Bayesian network to diagnose corona infection using WHO data set.
ALGORITHM:
PROGRAM
import pandas as pd
covid_19_data=pd.read_csv("/content/corona.csv")
covid_19_data
24
import warnings
warnings.filterwarnings("ignore",category=FutureWarning)
covid_19_data=pd.read_csv("/content/corona.csv")
print()
print()
import numpy as np
covid_19_data.replace(to_replace='?',value=np.NaN,inplace=True)
print(covid_19_data.describe(include='all'))
print()
25
print(covid_19_data["Loss of Taste/Smell"].value_counts())
print(covid_19_data.isnull().sum())
26
import seaborn as sns
sns.countplot(x="Loss of Taste/Smell",data=covid_19_data,linewidth=3)
27
covid_19_data['Patient ID'].fillna(covid_19_data['Sore Throat'].mode()[0],inplace=True)
X=covid_19_data.drop(['Travel History'],axis=1)
y=covid_19_data.Cough
X=X[['Patient ID','Age']]
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
from sklearn.naive_bayes import GaussianNB
NB_classifier=GaussianNB()
NB_classifier.fit(X_test,y_test)
y_predict=NB_classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_predict)
sns.heatmap(cm,annot=True,cmap='Blues')
from sklearn.metrics import classification_report
print(classification_report(y_test,y_predict))
OUTPUT:
28
RESULT:
Thus the program to diagnose corona infection using Bayesian network was successfully
implemented using python.
29
EX. NO: 7
DATE :
COMPARISON OF CLUSTERING IN EM ALGORITHM AND K-MEANS
ALGORITHM USING THE SAME DATA SETS
AIM:
To compare the clustering in EM algorithm and K-means algorithm using the same data
sets.
ALGORITHM:
1. Expectation step (E - step): It involves the estimation (guess) of all missing values in the
dataset so that after completing this step, there should not be any missing value.
2. Maximization step (M - step): This step involves the use of estimated data in the E-step
and updating the parameters.
3. Repeat E-step and M-step until the convergence of the values occurs.
PROGRAM:
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
30
dataset=load_iris()
# print(dataset)
X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=pd.DataFrame(dataset.target)
y.columns=['Targets']
# print(X)
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
# REAL PLOT
plt.subplot(1,3,1)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)
plt.title('Real')
# K-PLOT
plt.subplot(1,3,2)
model=KMeans(n_clusters=3)
model.fit(X)
predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)
plt.title('KMeans')
# GMM PLOT
scaler=preprocessing.StandardScaler()
scaler.fit(X)
31
xsa=scaler.transform(X)
xs=pd.DataFrame(xsa,columns=X.columns)
gmm=GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm=gmm.predict(xs)
plt.subplot(1,3,3)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')
OUTPUT:
RESULT:
Thus the program to compare clustering in EM and K-means algorithm with few datasets
was performed successfully.
32
EX. NO: 8
DATE :
IMPLEMENTATION OF K-NEAREST NEIGHBOUR ALGORITHM TO CLASSIFY
THE IRIS DATA SET
AIM:
To implement K-Nearest Neighbour algorithm to classify iris data set.
ALGORITHM:
2. Considering that all rows don’t belong to the same class, split the dataset S into subsets using
the feature for which the Information Gain is maximum.
3. Make a decision tree node using the feature with the maximum Information gain.
4. If all rows belong to the same class, make the current node as a leaf node with the class as its
label.
5. Repeat for the remaining features until we run out of all features, or the decision tree has all
leaf nodes.
PROGRAM:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
dataset=load_iris()
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],random_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
for i in range(len(X_test)):
33
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)
print("TARGET=",y_test[i],dataset["target_names"]
[y_test[i]],"PREDICTED=",prediction,dataset["target_names"][prediction])
print(kn.score(X_test,y_test))
OUTPUT:
34
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [2] ['virginica']
0.9736842105263158
RESULT:
Thus the program for K-Nearest Neighbour algorithm was implemented successfully using
an iris data set.
35
EX. NO: 9
DATE :
IMPLEMENTATION OF THE NON-PARAMETRIC LOCALLY WEIGHTED
AIM:
To implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points.
ALGORITHM:
1. Read the Given data Sample to X and the curve (linear or non linear) to Y
6. Prediction = x0*β
PROGRAM:
from math import ceil
import numpy as np
from scipy import linalg
def lowess(x, y, f, iterations):
n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
36
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],[np.sum(weights * x),
np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)
import matplotlib.pyplot as plt
plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")
37
OUTPUT:
RESULT:
Thus the non-parametric Locally Weighted Regression algorithm to fit data points was
implemented successfully.
38