ML File

FIND – S ALGORITHM
import csv
def more_general(h1, h2):
return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))
def find_s(data):
most_specific = data[0][1:]
for instance in data[1:]:
current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in

range(len(most_specific))]
if more_general(current, most_specific):
most_specific = current
return most_specific
with open('training_data.csv', 'r') as file:
reader = csv.reader(file)
next(reader) # Skip header
data = list(reader)
print("Most Specific Hypothesis:", find_s(data))

CANDIDATE – ELIMINATION ALGO
import csv
def more_general(h1, h2):
return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))
def find_s(data):
most_specific = data[0][1:]
for instance in data[1:]:
current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in

range(len(most_specific))]
if more_general(current, most_specific):
most_specific = current
return most_specific
def find_g(data):
most_general = ['?'] * len(data[0][1:])
for instance in data:
current = [instance[1:][i] if instance[1:][i] != most_general[i] else '?' for i in

range(len(most_general))]
if not more_general(most_general, current):
most_general = current
return most_general
with open('training_data.csv', 'r') as file:
reader = csv.reader(file)
next(reader) # Skip header
data = list(reader)
s, g = find_s(data), find_g(data)
print("Set of all hypotheses consistent with the training examples:")
for i, (s_val, g_val) in enumerate(zip(s, g), start=1):
print(f"Attribute {i}: {s_val if g_val == '?' else g_val}")

DECISION TREE – ID3
import math
def entropy(data, target_attr):
val_freq = {}
for record in data:
val_freq[record[target_attr]] = val_freq.get(record[target_attr], 0) + 1
return sum(-freq/len(data) * math.log2(freq/len(data)) for freq in val_freq.values())
def info_gain(data, attr, target_attr):
base_entropy = entropy(data, target_attr)
attr_vals = set(record[attr] for record in data)
exp_entropy = sum((len([rec for rec in data if rec[attr] == val])/len(data)) * entropy([rec for

rec in data if rec[attr] == val], target_attr) for val in attr_vals)
return base_entropy - exp_entropy
def id3(data, attrs, target_attr):
base_entropy = entropy(data, target_attr)
if base_entropy == 0:
return next(iter(set(record[target_attr] for record in data)))
elif len(attrs) == 0:
return max(set(record[target_attr] for record in data), key=[record[target_attr] for

record in data].count)
else:
attr_gains = [info_gain(data, attr, target_attr) for attr in attrs]
selected_attr = attrs[attr_gains.index(max(attr_gains))]
node = {selected_attr: {}}
attr_values = set(record[selected_attr] for record in data)
for value in attr_values:
new_data = [record for record in data if record[selected_attr] == value]
new_attrs = attrs.copy()
new_attrs.remove(selected_attr)
child_node = id3(new_data, new_attrs, target_attr)
node[selected_attr][value] = child_node
return node
def classify(tree, sample):
for attr, values in tree.items():
value = sample[attributes.index(attr)]
if value in values:
child = values[value]
if isinstance(child, dict):
return classify(child, sample)
else:
return child
data = [['Sunny', 'Hot', 'High', 'False'], ['Sunny', 'Hot', 'High', 'True'], ['Overcast', 'Hot', 'High',
'False'], ['Rain', 'Mild', 'High', 'False'], ['Rain', 'Cool', 'Normal', 'False'], ['Rain', 'Cool', 'Normal',
'True'], ['Overcast', 'Cool', 'Normal', 'True'], ['Sunny', 'Mild', 'High', 'False'], ['Sunny', 'Cool',
'Normal', 'False'], ['Rain', 'Mild', 'Normal', 'False'], ['Sunny', 'Mild', 'Normal', 'True'],
['Overcast', 'Mild', 'High', 'True'], ['Overcast', 'Hot', 'Normal', 'False'], ['Rain', 'Mild', 'High',
'True']]
attributes = ['Outlook', 'Temperature', 'Humidity', 'Play Tennis']
tree = id3(data, [i for i in range(len(data[0])-1)], len(data[0])-1)
print("Decision Tree:", tree)
sample = ['Rain', 'Mild', 'High']
prediction = classify(tree, sample)
print("Prediction for sample", sample, ":", prediction)

Backpropagation algorithm --
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
iris = datasets.load_iris()
X, y = iris.data, iris.target
y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = StandardScaler().fit_transform(X_train)
X_test = StandardScaler().transform(X_test)
input_size, hidden_size, output_size = 4, 8, 3
W1, b1, W2, b2 = np.random.randn(input_size, hidden_size), np.zeros((1, hidden_size)),

np.random.randn(hidden_size, output_size), np.zeros((1, output_size))
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_derivative = lambda x: x * (1 - x)
def backpropagation(X, y, learning_rate=0.1, epochs=10000):
for _ in range(epochs):
layer1 = sigmoid(np.dot(X, W1) + b1)
layer2 = sigmoid(np.dot(layer1, W2) + b2)
error = y - layer2
delta2 = error * sigmoid_derivative(layer2)
W2_grad, b2_grad = np.dot(layer1.T, delta2), np.sum(delta2, axis=0, keepdims=True)
delta1 = np.dot(delta2, W2.T) * sigmoid_derivative(layer1)
W1_grad, b1_grad = np.dot(X.T, delta1), np.sum(delta1, axis=0, keepdims=True)
W2 += learning_rate * W2_grad
b2 += learning_rate * b2_grad
W1 += learning_rate * W1_grad
b1 += learning_rate * b1_grad
return W1, b1, W2, b2
W1, b1, W2, b2 = backpropagation(X_train, y_train)
def predict(X, W1, b1, W2, b2):
layer1 = sigmoid(np.dot(X, W1) + b1)
layer2 = sigmoid(np.dot(layer1, W2) + b2)
return np.argmax(layer2, axis=1)
y_pred = predict(X_test, W1, b1, W2, b2)
accuracy = np.mean(y_pred == np.argmax(y_test, axis=1))
print(f"Test accuracy: {accuracy * 100:.2f}%")

naïve Bayesian classifier—
import csv
import math
def load_data(filename):
return [row for row in csv.reader(open(filename))]
def split_data(data):
features = [row[:-1] for row in data]
labels = [row[-1] for row in data]
return features, labels
def get_stats(feature):
values = [float(x) for x in feature if x.isdigit()]
mean = sum(values) / len(values)
stdev = (sum((x - mean) ** 2 for x in values) / len(values)) ** 0.5
return mean, stdev
def pdf(x, mean, stdev):
return math.exp(-((x - mean) ** 2) / (2 * stdev ** 2)) / (stdev * math.sqrt(2 * math.pi))
def prob_cat(value, values):
return values.count(value) / len(values)
def train(features, labels):
label_counts = {label: labels.count(label) for label in set(labels)}
feature_stats = [{label: [float(feat) if feat.isdigit() else feat for feat in feats] for label, feats
in zip([label] * len(features), zip(*features))} for label in set(labels)]
for i, feature in enumerate(feature_stats):
for label, values in feature.items():
if all(isinstance(val, float) for val in values):

mean, stdev = get_stats(values)
feature[label] = (mean, stdev)
return label_counts, feature_stats
def classify(feature_vector, label_counts, feature_stats):
label_probs = {label: math.log(count / sum(label_counts.values())) for label, count in

label_counts.items()}
for i, feature_value in enumerate(feature_vector):
for label, stats in feature_stats[i].items():
if isinstance(stats, tuple):
mean, stdev = stats
label_probs[label] += math.log(pdf(float(feature_value), mean, stdev))
else:
label_probs[label] += math.log(prob_cat(feature_value, stats))
return max(label_probs.items(), key=lambda x: x[1])[0]
def compute_accuracy(test_features, test_labels, label_counts, feature_stats):
correct = sum(classify(feature_vector, label_counts, feature_stats) == label for

feature_vector, label in zip(test_features, test_labels))
return correct / len(test_labels)
training_data = load_data('training_data.csv')
features, labels = split_data(training_data)
label_counts, feature_stats = train(features, labels)
test_data = load_data('test_data.csv')
test_features, test_labels = split_data(test_data)
accuracy = compute_accuracy(test_features, test_labels, label_counts, feature_stats)
print(f"Accuracy: {accuracy * 100:.2f}%")

Bayesian network—
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
# Construct the Bayesian network
model = BayesianModel([('age', 'num'), ('sex', 'num'), ('cp', 'num'), ('trestbps', 'num'), ('chol',
'num'), ('fbs', 'num'), ('restecg', 'num'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'),
('slope', 'num'), ('ca', 'num'), ('thal', 'num')])
# Estimate the parameters of the Bayesian network
estimator = MaximumLikelihoodEstimator(model, heart_data)
estimator.estimate()
# Perform inference
inference = model.fit(heart_data)
# Example: Predict the probability of heart disease for a new patient
patient_data = {'age': 50, 'sex': 1, 'cp': 3, 'trestbps': 130, 'chol': 250, 'fbs': 0, 'restecg': 0,
'thalach': 180, 'exang': 0, 'oldpeak': 0.8, 'slope': 2, 'ca': 0, 'thal': 3}
# Convert categorical variables to integer values for the new patient
patient_data = pd.DataFrame([patient_data]).replace({'cp': {0: 'cp_0', 1: 'cp_1', 2: 'cp_2', 3:

'cp_3'}, 'restecg': {0: 'restecg_0', 1: 'restecg_1', 2: 'restecg_2'}, 'slope': {0: 'slope_0', 1:
'slope_1', 2: 'slope_2'}, 'thal': {0: 'thal_0', 1: 'thal_1', 2: 'thal_2', 3: 'thal_3'}})
# Perform inference for the new patient
query = inference.map_query('num', patient_data)
# Print the probability of heart disease
print(f"Probability of heart disease: {query.values[-1]:.2f}")

EM algorithm—
import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
# Load the dataset from a CSV file
data = pd.read_csv('dataset.csv')
X = data.iloc[:, :-1].values # Features
y = data.iloc[:, -1].values # Labels (for evaluation purposes)
# EM Clustering
print("EM Clustering:")
em = GaussianMixture(n_components=3, covariance_type='full', max_iter=100,

random_state=42)
em_labels = em.fit_predict(X)
em_score = silhouette_score(X, em_labels)
print(f"Silhouette Score: {em_score:.2f}")
# k-Means Clustering
print("\nk-Means Clustering:")
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(X)
kmeans_score = silhouette_score(X, kmeans_labels)
print(f"Silhouette Score: {kmeans_score:.2f}")
# Compare the results
print("\nComparison:")
if em_score > kmeans_score:
print("EM algorithm performs better than k-Means for this dataset.")
else:
print("k-Means algorithm performs better than EM for this dataset.")

k-Nearest Neighbour algorithm—
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a kNN classifier object
knn = KNeighborsClassifier(n_neighbors=3)
# Train the classifier
knn.fit(X_train, y_train)
# Make predictions on the test set
y_pred = knn.predict(X_test)
# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Print correct and wrong predictions
print("\nCorrect Predictions:")
correct_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred == true]
for i in correct_indices:
print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:
{iris.target_names[y_test[i]]})")
print("\nWrong Predictions:")
wrong_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred != true]
for i in wrong_indices:
print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:

{iris.target_names[y_test[i]]})")
non-parametric Locally Weighted Regression algorithm—
import numpy as np
import matplotlib.pyplot as plt
# Sample dataset
X = np.linspace(-3, 3, 50)
y = np.sin(X) + np.random.normal(0, 0.2, len(X))
# Function to calculate the weight for a given x and x_i
def weight(x, x_i, tau):
return np.exp(-(x - x_i)**2 / (2 * tau**2))
# LWR function
def lwr(x, X, y, tau):
weights = np.array([weight(x, x_i, tau) for x_i in X])
W = np.diag(weights)
X_mat = np.vstack([np.ones(len(X)), X]).T
theta = np.linalg.pinv(X_mat.T @ W @ X_mat) @ (X_mat.T @ W @ y)
return theta[1]
# LWR predictions
tau = 1.0
y_pred = [lwr(x_val, X, y, tau) for x_val in X]
# Plot the data and LWR fit
plt.figure(figsize=(10, 6))
plt.scatter(X, y, label='Data Points', color='b', marker='o')
plt.plot(X, y_pred, label='LWR Fit', color='r')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Locally Weighted Regression')
plt.legend()
plt.show()

ML File

Uploaded by

Copyright:

Available Formats

ML File

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML File

Uploaded by

Copyright:

Available Formats

FIND – S ALGORITHM

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in

with open('training_data.csv', 'r') as file:

next(reader) # Skip header

print("Most Specific Hypothesis:", find_s(data))

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in

most_general = ['?'] * len(data[0][1:])

for instance in data:

current = [instance[1:][i] if instance[1:][i] != most_general[i] else '?' for i in

if not more_general(most_general, current):

with open('training_data.csv', 'r') as file:

next(reader) # Skip header

print("Set of all hypotheses consistent with the training examples:")

for i, (s_val, g_val) in enumerate(zip(s, g), start=1):

print(f"Attribute {i}: {s_val if g_val == '?' else g_val}")

def entropy(data, target_attr):

for record in data:

return sum(-freq/len(data) * math.log2(freq/len(data)) for freq in val_freq.values())

def info_gain(data, attr, target_attr):

base_entropy = entropy(data, target_attr)

attr_vals = set(record[attr] for record in data)

exp_entropy = sum((len([rec for rec in data if rec[attr] == val])/len(data)) * entropy([rec for

return base_entropy - exp_entropy

def id3(data, attrs, target_attr):

base_entropy = entropy(data, target_attr)

return next(iter(set(record[target_attr] for record in data)))

return max(set(record[target_attr] for record in data), key=[record[target_attr] for

attr_gains = [info_gain(data, attr, target_attr) for attr in attrs]

node = {selected_attr: {}}

attr_values = set(record[selected_attr] for record in data)

for value in attr_values:

new_data = [record for record in data if record[selected_attr] == value]

def classify(tree, sample):

for attr, values in tree.items():

return classify(child, sample)

attributes = ['Outlook', 'Temperature', 'Humidity', 'Play Tennis']

tree = id3(data, [i for i in range(len(data[0])-1)], len(data[0])-1)

print("Decision Tree:", tree)

sample = ['Rain', 'Mild', 'High']

prediction = classify(tree, sample)

print("Prediction for sample", sample, ":", prediction)

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder, StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

input_size, hidden_size, output_size = 4, 8, 3

W1, b1, W2, b2 = np.random.randn(input_size, hidden_size), np.zeros((1, hidden_size)),

sigmoid = lambda x: 1 / (1 + np.exp(-x))

def backpropagation(X, y, learning_rate=0.1, epochs=10000):

layer1 = sigmoid(np.dot(X, W1) + b1)

layer2 = sigmoid(np.dot(layer1, W2) + b2)

delta2 = error * sigmoid_derivative(layer2)

W2_grad, b2_grad = np.dot(layer1.T, delta2), np.sum(delta2, axis=0, keepdims=True)

stdev = (sum((x - mean) 2 for x in values) / len(values)) 0.5