ML File

Download as pdf or txt
Download as pdf or txt
You are on page 1of 13

FIND – S ALGORITHM

import csv

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

def find_s(data):

most_specific = data[0][1:]

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in


range(len(most_specific))]

if more_general(current, most_specific):

most_specific = current

return most_specific

with open('training_data.csv', 'r') as file:

reader = csv.reader(file)

next(reader) # Skip header

data = list(reader)

print("Most Specific Hypothesis:", find_s(data))


CANDIDATE – ELIMINATION ALGO

import csv

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

def find_s(data):

most_specific = data[0][1:]

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in


range(len(most_specific))]

if more_general(current, most_specific):

most_specific = current

return most_specific

def find_g(data):

most_general = ['?'] * len(data[0][1:])

for instance in data:

current = [instance[1:][i] if instance[1:][i] != most_general[i] else '?' for i in


range(len(most_general))]

if not more_general(most_general, current):

most_general = current

return most_general

with open('training_data.csv', 'r') as file:

reader = csv.reader(file)

next(reader) # Skip header

data = list(reader)

s, g = find_s(data), find_g(data)

print("Set of all hypotheses consistent with the training examples:")

for i, (s_val, g_val) in enumerate(zip(s, g), start=1):

print(f"Attribute {i}: {s_val if g_val == '?' else g_val}")


DECISION TREE – ID3

import math

def entropy(data, target_attr):

val_freq = {}

for record in data:

val_freq[record[target_attr]] = val_freq.get(record[target_attr], 0) + 1

return sum(-freq/len(data) * math.log2(freq/len(data)) for freq in val_freq.values())

def info_gain(data, attr, target_attr):

base_entropy = entropy(data, target_attr)

attr_vals = set(record[attr] for record in data)

exp_entropy = sum((len([rec for rec in data if rec[attr] == val])/len(data)) * entropy([rec for


rec in data if rec[attr] == val], target_attr) for val in attr_vals)

return base_entropy - exp_entropy

def id3(data, attrs, target_attr):

base_entropy = entropy(data, target_attr)

if base_entropy == 0:

return next(iter(set(record[target_attr] for record in data)))

elif len(attrs) == 0:

return max(set(record[target_attr] for record in data), key=[record[target_attr] for


record in data].count)

else:

attr_gains = [info_gain(data, attr, target_attr) for attr in attrs]

selected_attr = attrs[attr_gains.index(max(attr_gains))]

node = {selected_attr: {}}

attr_values = set(record[selected_attr] for record in data)

for value in attr_values:

new_data = [record for record in data if record[selected_attr] == value]

new_attrs = attrs.copy()

new_attrs.remove(selected_attr)
child_node = id3(new_data, new_attrs, target_attr)

node[selected_attr][value] = child_node

return node

def classify(tree, sample):

for attr, values in tree.items():

value = sample[attributes.index(attr)]

if value in values:

child = values[value]

if isinstance(child, dict):

return classify(child, sample)

else:

return child

data = [['Sunny', 'Hot', 'High', 'False'], ['Sunny', 'Hot', 'High', 'True'], ['Overcast', 'Hot', 'High',
'False'], ['Rain', 'Mild', 'High', 'False'], ['Rain', 'Cool', 'Normal', 'False'], ['Rain', 'Cool', 'Normal',
'True'], ['Overcast', 'Cool', 'Normal', 'True'], ['Sunny', 'Mild', 'High', 'False'], ['Sunny', 'Cool',
'Normal', 'False'], ['Rain', 'Mild', 'Normal', 'False'], ['Sunny', 'Mild', 'Normal', 'True'],
['Overcast', 'Mild', 'High', 'True'], ['Overcast', 'Hot', 'Normal', 'False'], ['Rain', 'Mild', 'High',
'True']]

attributes = ['Outlook', 'Temperature', 'Humidity', 'Play Tennis']

tree = id3(data, [i for i in range(len(data[0])-1)], len(data[0])-1)

print("Decision Tree:", tree)

sample = ['Rain', 'Mild', 'High']

prediction = classify(tree, sample)

print("Prediction for sample", sample, ":", prediction)


Backpropagation algorithm --

import numpy as np

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder, StandardScaler

iris = datasets.load_iris()

X, y = iris.data, iris.target

y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = StandardScaler().fit_transform(X_train)

X_test = StandardScaler().transform(X_test)

input_size, hidden_size, output_size = 4, 8, 3

W1, b1, W2, b2 = np.random.randn(input_size, hidden_size), np.zeros((1, hidden_size)),


np.random.randn(hidden_size, output_size), np.zeros((1, output_size))

sigmoid = lambda x: 1 / (1 + np.exp(-x))

sigmoid_derivative = lambda x: x * (1 - x)

def backpropagation(X, y, learning_rate=0.1, epochs=10000):

for _ in range(epochs):

layer1 = sigmoid(np.dot(X, W1) + b1)

layer2 = sigmoid(np.dot(layer1, W2) + b2)

error = y - layer2

delta2 = error * sigmoid_derivative(layer2)

W2_grad, b2_grad = np.dot(layer1.T, delta2), np.sum(delta2, axis=0, keepdims=True)

delta1 = np.dot(delta2, W2.T) * sigmoid_derivative(layer1)

W1_grad, b1_grad = np.dot(X.T, delta1), np.sum(delta1, axis=0, keepdims=True)

W2 += learning_rate * W2_grad

b2 += learning_rate * b2_grad
W1 += learning_rate * W1_grad

b1 += learning_rate * b1_grad

return W1, b1, W2, b2

W1, b1, W2, b2 = backpropagation(X_train, y_train)

def predict(X, W1, b1, W2, b2):

layer1 = sigmoid(np.dot(X, W1) + b1)

layer2 = sigmoid(np.dot(layer1, W2) + b2)

return np.argmax(layer2, axis=1)

y_pred = predict(X_test, W1, b1, W2, b2)

accuracy = np.mean(y_pred == np.argmax(y_test, axis=1))

print(f"Test accuracy: {accuracy * 100:.2f}%")


naïve Bayesian classifier—

import csv

import math

def load_data(filename):

return [row for row in csv.reader(open(filename))]

def split_data(data):

features = [row[:-1] for row in data]

labels = [row[-1] for row in data]

return features, labels

def get_stats(feature):

values = [float(x) for x in feature if x.isdigit()]

mean = sum(values) / len(values)

stdev = (sum((x - mean) ** 2 for x in values) / len(values)) ** 0.5

return mean, stdev

def pdf(x, mean, stdev):

return math.exp(-((x - mean) ** 2) / (2 * stdev ** 2)) / (stdev * math.sqrt(2 * math.pi))

def prob_cat(value, values):

return values.count(value) / len(values)

def train(features, labels):

label_counts = {label: labels.count(label) for label in set(labels)}

feature_stats = [{label: [float(feat) if feat.isdigit() else feat for feat in feats] for label, feats
in zip([label] * len(features), zip(*features))} for label in set(labels)]

for i, feature in enumerate(feature_stats):

for label, values in feature.items():

if all(isinstance(val, float) for val in values):


mean, stdev = get_stats(values)

feature[label] = (mean, stdev)

return label_counts, feature_stats

def classify(feature_vector, label_counts, feature_stats):

label_probs = {label: math.log(count / sum(label_counts.values())) for label, count in


label_counts.items()}

for i, feature_value in enumerate(feature_vector):

for label, stats in feature_stats[i].items():

if isinstance(stats, tuple):

mean, stdev = stats

label_probs[label] += math.log(pdf(float(feature_value), mean, stdev))

else:

label_probs[label] += math.log(prob_cat(feature_value, stats))

return max(label_probs.items(), key=lambda x: x[1])[0]

def compute_accuracy(test_features, test_labels, label_counts, feature_stats):

correct = sum(classify(feature_vector, label_counts, feature_stats) == label for


feature_vector, label in zip(test_features, test_labels))

return correct / len(test_labels)

training_data = load_data('training_data.csv')

features, labels = split_data(training_data)

label_counts, feature_stats = train(features, labels)

test_data = load_data('test_data.csv')

test_features, test_labels = split_data(test_data)

accuracy = compute_accuracy(test_features, test_labels, label_counts, feature_stats)

print(f"Accuracy: {accuracy * 100:.2f}%")


Bayesian network—

from pgmpy.models import BayesianModel

from pgmpy.estimators import MaximumLikelihoodEstimator

# Construct the Bayesian network

model = BayesianModel([('age', 'num'), ('sex', 'num'), ('cp', 'num'), ('trestbps', 'num'), ('chol',
'num'), ('fbs', 'num'), ('restecg', 'num'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'),
('slope', 'num'), ('ca', 'num'), ('thal', 'num')])

# Estimate the parameters of the Bayesian network

estimator = MaximumLikelihoodEstimator(model, heart_data)

estimator.estimate()

# Perform inference

inference = model.fit(heart_data)

# Example: Predict the probability of heart disease for a new patient

patient_data = {'age': 50, 'sex': 1, 'cp': 3, 'trestbps': 130, 'chol': 250, 'fbs': 0, 'restecg': 0,
'thalach': 180, 'exang': 0, 'oldpeak': 0.8, 'slope': 2, 'ca': 0, 'thal': 3}

# Convert categorical variables to integer values for the new patient

patient_data = pd.DataFrame([patient_data]).replace({'cp': {0: 'cp_0', 1: 'cp_1', 2: 'cp_2', 3:


'cp_3'}, 'restecg': {0: 'restecg_0', 1: 'restecg_1', 2: 'restecg_2'}, 'slope': {0: 'slope_0', 1:
'slope_1', 2: 'slope_2'}, 'thal': {0: 'thal_0', 1: 'thal_1', 2: 'thal_2', 3: 'thal_3'}})

# Perform inference for the new patient

query = inference.map_query('num', patient_data)

# Print the probability of heart disease

print(f"Probability of heart disease: {query.values[-1]:.2f}")


EM algorithm—

import pandas as pd

from sklearn.mixture import GaussianMixture

from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score

# Load the dataset from a CSV file

data = pd.read_csv('dataset.csv')

X = data.iloc[:, :-1].values # Features

y = data.iloc[:, -1].values # Labels (for evaluation purposes)

# EM Clustering

print("EM Clustering:")

em = GaussianMixture(n_components=3, covariance_type='full', max_iter=100,


random_state=42)

em_labels = em.fit_predict(X)

em_score = silhouette_score(X, em_labels)

print(f"Silhouette Score: {em_score:.2f}")

# k-Means Clustering

print("\nk-Means Clustering:")

kmeans = KMeans(n_clusters=3, random_state=42)

kmeans_labels = kmeans.fit_predict(X)

kmeans_score = silhouette_score(X, kmeans_labels)

print(f"Silhouette Score: {kmeans_score:.2f}")

# Compare the results

print("\nComparison:")

if em_score > kmeans_score:

print("EM algorithm performs better than k-Means for this dataset.")

else:

print("k-Means algorithm performs better than EM for this dataset.")


k-Nearest Neighbour algorithm—

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

# Load the iris dataset

iris = load_iris()

X, y = iris.data, iris.target

# Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a kNN classifier object

knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier

knn.fit(X_train, y_train)

# Make predictions on the test set

y_pred = knn.predict(X_test)

# Calculate the accuracy

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")

# Print correct and wrong predictions

print("\nCorrect Predictions:")

correct_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred == true]

for i in correct_indices:
print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:
{iris.target_names[y_test[i]]})")

print("\nWrong Predictions:")

wrong_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred != true]

for i in wrong_indices:

print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:


{iris.target_names[y_test[i]]})")
non-parametric Locally Weighted Regression algorithm—

import numpy as np

import matplotlib.pyplot as plt

# Sample dataset

X = np.linspace(-3, 3, 50)

y = np.sin(X) + np.random.normal(0, 0.2, len(X))

# Function to calculate the weight for a given x and x_i

def weight(x, x_i, tau):

return np.exp(-(x - x_i)**2 / (2 * tau**2))

# LWR function

def lwr(x, X, y, tau):

weights = np.array([weight(x, x_i, tau) for x_i in X])

W = np.diag(weights)

X_mat = np.vstack([np.ones(len(X)), X]).T

theta = np.linalg.pinv(X_mat.T @ W @ X_mat) @ (X_mat.T @ W @ y)

return theta[1]

# LWR predictions

tau = 1.0

y_pred = [lwr(x_val, X, y, tau) for x_val in X]

# Plot the data and LWR fit

plt.figure(figsize=(10, 6))

plt.scatter(X, y, label='Data Points', color='b', marker='o')

plt.plot(X, y_pred, label='LWR Fit', color='r')

plt.xlabel('X')

plt.ylabel('y')

plt.title('Locally Weighted Regression')

plt.legend()

plt.show()

You might also like