ML Lab Programs
ML Lab Programs
1) Aim: Illustrate and Demonstrate the working model and principle of Find-S algorithm.
Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Find-
S algorithm to output a description of the set of all hypotheses consistent with the training examples.
import csv
def loadCsv(filename):
lines=csv.reader(open(filename,"r"))
dataset=list(lines)
headers=dataset.pop(0)
return dataset,headers
def print_hypothesis(h):
print('<',end='')
for i in range (0,len(h)-1):
print(h[i],end=',')
print('>')
def findS():
dataset,features=loadCsv("/content/sports1.csv")
rows=len(dataset)
cols=len(dataset[0])
flag=0
for x in range(0,rows):
t=dataset[x]
if t[-1]=='1' and flag==0:
flag=1
h=dataset[x]
elif t[-1]=='1':
for y in range(cols):
if h[y]!=t[y]:
h[y]='?'
print("The maximally specific hypothesis for a given training exams")
print_hypothesis(h)
findS()
Output
2) Aim: Demonstrate the working model and principle of candidate elimination algorithm.
Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the
Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with the training
examples.
import numpy as np
import pandas as pd
df = pd.read_csv("/content/sports1.csv")
concept=np.array(df.iloc[:,0:-1])
target=np.array(df.iloc[:,-1])
def learn(concept,target):
specific_h=concept[0].copy()
print("Most specific",specific_h)
general_h=[["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("General",general_h)
if(target[i]==0):
print("instance is negative")
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
general_h[x][x]=specific_h[x]
else:
general_h[x][x]='?'
print("Specific boundary", i+1, specific_h)
print("General bundary", i+1, general_h)
print("\n")
learn(concept, target)
OUTPUT
Most specific ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
General [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instances 1 is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
Instance is positive
Specific boundary 1 ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
General bundary 1 [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
3) Aim: To construct the Decision tree using the training data sets under supervised learning
concept.
Program: Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this knowledge
to classify a new sample.
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn import tree
df=pd.read_csv("/content/Iris.csv")
df.head()
df.drop('Id',axis=1,inplace=True)
df.head()
le = LabelEncoder()
df['Species']= le.fit_transform(df['Species'])
df['Species'].unique()
X=df.iloc[:,:4]
y=df.iloc[:,4:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=123)
lf=DecisionTreeClassifier(criterion='entropy', splitter='best', max_leaf_nodes=3)
clf.fit(X_train,y_train.values.ravel())
y_pred=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
confusion_matrix(y_test, y_pred)
fn=['SepalLengthCm' ,'SepalWidthCm' ,'PetalLengthCm', 'PetalWidthCm']#column names of the
dataset
cn=['Iris-setosa',' Iris-versicolor','Iris-virginica']#names of classes to be classified
fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (2,2), dpi=200)
tree.plot_tree(clf,
feature_names = fn,
class_names=cn,
filled = True);
species_check = clf.predict([[4.7, 3.2, 1.3, 0.2]])[0]
print(cn[species_check])
output
4.Aim: To understand the working principle of Artificial Neural network with feed forward
and feed backward principle.
Program: Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
import numpy as np
# Input dataset
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep, study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
# Normalize the input features
X = X / np.amax(X, axis=0) # Normalize features in X
y = y / 100 # Normalize output to be in the range [0, 1]
# Sigmoid Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
# Variable initialization
epoch = 5000 # Setting training iterations
lr = 0.1 # Setting learning rate
inputlayer_neurons = 2 # Number of features in dataset
hiddenlayer_neurons = 3 # Number of neurons in hidden layer
output_neurons = 1 # Number of neurons in output layer
# Weight and bias initialization
wh = np.random.uniform(size=(inputlayer_neurons, hiddenlayer_neurons)) # Weights between input
and hidden layer
bh = np.random.uniform(size=(1, hiddenlayer_neurons)) # Bias for hidden layer
wout = np.random.uniform(size=(hiddenlayer_neurons, output_neurons)) # Weights between hidden
and output layer
bout = np.random.uniform(size=(1, output_neurons)) # Bias for output layer
# Training the neural network
for i in range(epoch):
# Forward Propagation
hinp1 = np.dot(X, wh)
hinp = hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1 = np.dot(hlayer_act, wout)
outinp = outinp1 + bout
output = sigmoid(outinp)
# Backpropagation
EO = y - output # Error at output
outgrad = derivatives_sigmoid(output) # Gradient of output
d_output = EO * outgrad # Delta output
EH = d_output.dot(wout.T) # Error at hidden layer
# How much hidden layer weights contributed to error
hiddengrad = derivatives_sigmoid(hlayer_act) # Gradient of hidden layer
d_hiddenlayer = EH * hiddengrad # Delta hidden layer
# Update weights and biases
wout += hlayer_act.T.dot(d_output) * lr
bout += np.sum(d_output, axis=0, keepdims=True) * lr
wh += X.T.dot(d_hiddenlayer) * lr
bh += np.sum(d_hiddenlayer, axis=0, keepdims=True) * lr
# Print results
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" + str(output))
output
5) Aim: Demonstrate the text classifier using Naïve bayes classifier algorithm.
Program: Write a program to implement the naive Bayesian classifier for a sample training
data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data
sets.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.metrics import classification_report
df=pd.read_csv('/content/naive.csv',header=None)
X=df.drop([8],axis=1)
Y=df[8]
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=1)
gn=GaussianNB()
gn.fit(x_train,y_train)
y_pred=gn.predict(x_test)
print(len(x_test))
print(len(x_train))
print(metrics.accuracy_score(y_test,y_pred)*100)
print(classification_report(y_test,y_pred))
Output
10th as 6th Aim: Implement and demonstrate classification algorithm using
Support vector machine Algorithm.
Program: Implement and demonstrate the working of SVM algorithm for
classification.
OUTPUT
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 SepalLengthCm 150 non-null float64
1 SepalWidthCm 150 non-null float64
2 PetalLengthCm 150 non-null float64
3 PetalWidthCm 150 non-null float64
4 Species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
precision recall f1-score support
accuracy 0.98 45
macro avg 0.97 0.98 0.97 45
weighted avg 0.98 0.98 0.98 45
7)Aim: Implement and demonstrate the working model of K-means clustering
algorithm with Expectation Maximization Concept.
Program: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the
same data set for clustering using k-Means algorithm. Compare the results of these
two algorithms and comment on the quality of clustering. You can add Python ML
library classes/API in the program.
OUTPUT
class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0000000000000111111111111111111111111
1111111111111111111111111122222222222
2222222222222222222222222222222222222
2 2]
Confusion Matrix
[[18 0 0]
[ 0 16 1]
[ 0 0 10]]
Accuracy Metrics
precision recall f1-score support
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
# Enable Bokeh to display the plots in the notebook
output_notebook()
def local_regression(x0, X, Y, tau):
# Add bias term to x0
x0 = np.r_[1, x0] # Add one to avoid the loss in information
# Add bias term to X
X = np.c_[np.ones(len(X)), X]
# Fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = np.linalg.pinv(xw @ X) @ xw @ Y # @ is matrix multiplication (dot product)
# Predict value
return x0 @ beta # @ is matrix multiplication (dot product) for prediction
def radial_kernel(x0, X, tau):
# Weight or Radial Kernel Bias Function
return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Generate dataset
n = 1000
X = np.linspace(-3, 3, num=n)
print("The Data Set (10 Samples) X :\n", X[:10])
Y = np.log(np.abs(X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y :\n", Y[:10])
# Jitter X
X += np.random.normal(scale=.1, size=n)
print("Normalized (10 Samples) X :\n", X[:10])
# Domain for prediction
domain = np.linspace(-3, 3, num=300)
print("X0 Domain Space (10 Samples) :\n", domain[:10])
def plot_lwr(tau):
# Prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(width=400, height=400)
plot.title.text = f'tau={tau}'
plot.scatter(X, Y, alpha=.3)
plot.line(domain, prediction, line_width=2, color='red')
return plot
# Create and display the plots
plots = [
[plot_lwr(10.), plot_lwr(1.)],
[plot_lwr(0.1), plot_lwr(0.01)]
]
show(gridplot(plots))
OUTPUT
10th as 6th
Aim: Demonstrate and Analyse the results sets obtained from Bayesian belief
network Principle.
Program:- Write a program to construct a Bayesian network considering medical
data. Use this model to demonstrate the diagnosis of heart patients using standard
Heart Disease Data Set. You can use Python ML library classes/API.
import pandas as pd
import numpy as np
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
import networkx as nx
import matplotlib.pyplot as plt
df=pd.read_csv("medical dataset.csv")
df.head()
print(df.info())
print(pd.unique(df['age']))
model=
BayesianModel([('age','heartdisease'),('gender','heartdisease'),('exang','heartdisease'),('c
p','heartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
model.fit(df,estimator=MaximumLikelihoodEstimator)
print('\n Inferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
print('\n 1. Probability of HeartDisease given evidence= cp')
q1=HeartDisease_infer.query(variables=['heartdisease'],evidence={'gender':1})
print(q1)
print('\n 2. Probability of HeartDisease given evidence= restecg')
q2=HeartDisease_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q2)
graph = nx.DiGraph(model.edges())
nx.draw_networkx(graph,with_labels=True)
#plot
plt.show()
OUTPUT