Machine Learning Techniques Lab: Session: 2023-24, Even Semester
Machine Learning Techniques Lab: Session: 2023-24, Even Semester
B.Tech. Semester VI
NOIDA
6
Assuming a set of documents that need to be classified, use 08/04/2024
the naïve Bayesian Classifier model to perform this task.
7 Write a program to construct a Bayesian network 15/04/2024
Apply EM algorithm to cluster a set of data stored in a .CSV
8 file. Use the same data set for clustering using k Means 22/04/2024
algorithm.
Write a program to implement k-Nearest Neighbour
9 algorithm to classify the iris data set. Print both correct and 29/04/2024
wrong predictions.
Implement the non-parametric Locally Weighted
10 Regression algorithm in order to fit data points. Select 29/04/2024
appropriate data set for your experiment and draw graphs
Experiment 1
Objective: Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from a .CSV file.
Solution
import csv
def find_s_algorithm(data):
# Initialize the hypothesis with the first training instance
hypothesis = data[0][:-1] # Take attributes from the first instance
# Iterate over the rest of the training instances
for instance in data:
if instance[-1] == 'Yes': # Check if it's a positive example
for i in range(len(hypothesis)):
# If attribute value in hypothesis doesn't match instance, make it more general
if hypothesis[i] != instance[i]:
hypothesis[i] = '?' # '?' represents any value
return hypothesis
def main():
# Load training data from CSV file
with open('training_data.csv', 'r') as file:
reader = csv.reader(file)
data = list(reader)
# Apply FIND-S algorithm to find the most specific hypothesis
hypothesis = find_s_algorithm(data)
# Print the most specific hypothesis
print("The most specific hypothesis is:", hypothesis)
if __name__ == "__main__":
main()
Output
The most specific hypothesis is: ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Experiment 2
Objective: For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate Elimination algorithm to output a description of the set of all
hypotheses consistent with the training examples.
Solution
import csv
def initialize_hypotheses(n):
# Initialize the set of hypotheses
hypotheses = []
for i in range(n):
hypotheses.append(['?',] * n)
return hypotheses
def candidate_elimination_algorithm(data):
# Initialize the version space
n = len(data[0]) - 1
hypotheses = initialize_hypotheses(n)
specific_h = ['0'] * n # Initialize the most specific hypothesis
general_h = ['?'] * n # Initialize the most general hypothesis
if __name__ == "__main__":
main()
output
Objective: Write a program to demonstrate the working of the decision tree based ID3 algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to classify a
new sample
Solution
import numpy as np
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = {}
def entropy(class_labels):
_, counts = np.unique(class_labels, return_counts=True)
probabilities = counts / len(class_labels)
entropy_value = -np.sum(probabilities * np.log2(probabilities))
return entropy_value
attribute_value = sample[tree.attribute]
if attribute_value not in tree.children:
return "Can't classify"
return predict(tree.children[attribute_value], sample)
def main():
# Define the dataset
data = {
'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain',
'Sunny', 'Overcast', 'Overcast', 'Rain'],
'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild',
'Hot', 'Mild'],
'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal',
'Normal', 'High', 'Normal', 'High'],
'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak',
'Strong', 'Strong', 'Weak', 'Strong'],
'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}
Output
Decision Tree:
{'attribute': 'Outlook', 'children': {'Sunny': {'attribute': 'Humidity', 'children': {'High': 'No', 'Normal':
'Yes'}}, 'Overcast': 'Yes', 'Rain': {'attribute': 'Wind', 'children': {'Weak': 'Yes', 'Strong': 'No'}}}}
Predicted class for sample {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind':
'Weak'}: No
Experiment 4
Solution
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
# Example usage
if __name__ == "__main__":
# Example dataset for binary classification
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Predict
predictions = neural_net.predict(X)
print("Predictions:", predictions)
Output
Objective: Write a program to implement the naïve Bayesian classifier and compute the accuracy
of the classifier
Solution
import pandas as pd
import numpy as np
# Calculate likelihoods
feature_likelihoods = calculate_likelihoods(X_train, y_train)
# Make predictions
predictions = predict(X_test, prior_probabilities, feature_likelihoods)
# Compute accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")
Output
Accuracy: 1.00
Experiment 6
Objective: Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier
model to perform this task.
Solution
# Load dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
train_data = fetch_20newsgroups(subset='train', categories=categories, shuffle=True,
random_state=42)
test_data = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)
# Make predictions
y_pred = clf.predict(X_test)
# Print results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
Output
Accuracy: 0.9340878828229028
Precision: 0.9347763292524094
Recall: 0.9340878828229028
Experiment 7
Solution
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
# Define the dataset (you can replace this with your own dataset)
data = pd.DataFrame({
'A': [0, 1, 1, 0, 0, 1, 1, 0, 1, 0],
'B': [0, 0, 1, 1, 0, 1, 0, 1, 1, 0],
'C': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
'D': [0, 0, 1, 1, 1, 0, 1, 1, 1, 0],
'Class': ['No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No']
})
Output
Objective: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set
for clustering using k Means algorithm.
Solution
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
# Number of clusters
k=3
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis', edgecolor='k')
plt.title('k-Means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar()
plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1], c=em_labels, cmap='viridis', edgecolor='k')
plt.title('EM (GMM) Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar()
plt.tight_layout()
plt.show()
Output
Experiment 9
Objective: Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions.
Solution
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)
print("Number of correct predictions:", correct_predictions)
print("Number of wrong predictions:", wrong_predictions)
Output
Accuracy: 1.0
Number of correct predictions: 30
Number of wrong predictions: 0
Experiment 10
Objective: Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs
Solution
import numpy as np
import matplotlib.pyplot as plt
m = x.shape[0]
weights = np.exp(-((x - query_point) ** 2) / (2 * tau * tau))
W = np.diag(weights)
X = np.ones((m, 2))
X[:, 1] = x
np.random.seed(0)
X = np.linspace(0, 10, 100)
y = np.sin(X) + np.random.normal(0, 0.1, 100)