0% found this document useful (0 votes)
30 views15 pages

Aml - Lab (1-6)

The document contains six experiments related to advanced machine learning techniques, including Bayesian probability, k-means clustering, k-nearest neighbors classification, decision tree classification, and linear regression. Each experiment includes a description, Python code implementation, and output results. The experiments demonstrate various machine learning concepts and their practical applications using synthetic and real datasets.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
30 views15 pages

Aml - Lab (1-6)

The document contains six experiments related to advanced machine learning techniques, including Bayesian probability, k-means clustering, k-nearest neighbors classification, decision tree classification, and linear regression. Each experiment includes a description, Python code implementation, and output results. The experiments demonstrate various machine learning concepts and their practical applications using synthetic and real datasets.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 15

20A33601P ADVANCED MACHINE LEARNING LAB

EXPERIMENT: 1
The probability that it is Friday and that a student is absent is 3% since there are 5
school days in a week. the probability that it is Friday is 20% what is probability that a
student is absent given that today is Friday? apply baye's rule in python to get the result.

PROGRAM:
# Function to calculate conditional probability using Bayes' Rule
def calculate_conditional_probability(P_joint, P_event):
"""
Calculate P(A | B) using Bayes' Rule:
P(A | B) = P(A and B) / P(B)
:param P_joint: Probability of A and B (P(A and B))
:param P_event: Probability of B (P(B))
:return: Conditional probability P(A | B)
"""
return P_joint / P_event

# Given probabilities
P_Friday_and_Absent = 0.03 # P(Friday and Absent)
P_Friday = 0.20 # P(Friday)

# Calculate P(Absent | Friday)


P_Absent_given_Friday = calculate_conditional_probability(P_Friday_and_Absent,
P_Friday)

# Output the result


print(f"The probability that a student is absent given that today is Friday is:
{P_Absent_given_Friday:.2f}")

OUTPUT:
The probability that a student is absent given that today is Friday is: 0.15
EXPERIMENT: 2
Create a k-means clustering algorithms from scratch in python?

PROGRAM:
import numpy as np
import matplotlib.pyplot as plt

class KMeans:
def init (self, k=3, max_iters=100, tolerance=1e-4):
"""
K-Means clustering algorithm.

Args:
k (int): Number of clusters.
max_iters (int): Maximum number of iterations.
tolerance (float): Tolerance for convergence.
"""
self.k = k
self.max_iters = max_iters
self.tolerance = tolerance
self.centroids = None
self.labels = None

def fit(self, data):


"""
Fits the K-Means model to the data.

Args:
data (numpy.ndarray): Input data array of shape (n_samples, n_features).
"""
# Randomly initialize centroids
np.random.seed(42)
random_indices = np.random.permutation(data.shape[0])[:self.k]
self.centroids = data[random_indices]

for i in range(self.max_iters):
# Assign clusters
self.labels = self._assign_clusters(data)

# Compute new centroids


new_centroids = self._compute_centroids(data)

# Check for convergence


if np.all(np.abs(new_centroids - self.centroids) < self.tolerance):
break
self.centroids = new_centroids

def _assign_clusters(self, data):


"""
Assigns each data point to the nearest centroid.

Args:
data (numpy.ndarray): Input data array.

Returns:
numpy.ndarray: Cluster labels for each data point.
"""
distances = np.linalg.norm(data[:, np.newaxis] - self.centroids, axis=2)
return np.argmin(distances, axis=1)

def _compute_centroids(self, data):


"""
Computes the centroids of the clusters.

Args:
data (numpy.ndarray): Input data array.

Returns:
numpy.ndarray: New centroids.
"""
return np.array([data[self.labels == i].mean(axis=0) for i in range(self.k)])

def predict(self, data):


"""
Predicts the cluster for each data point.

Args:
data (numpy.ndarray): Input data array.

Returns:
numpy.ndarray: Cluster
labels. """
return self._assign_clusters(data)

# Example usage
if name == " main ":
# Generate synthetic data
from sklearn.datasets import make_blobs
data, _ = make_blobs(n_samples=300, centers=3, cluster_std=0.6, random_state=42)
# Apply K-Means
kmeans = KMeans(k=3)
kmeans.fit(data)

# Visualize the results


plt.scatter(data[:, 0], data[:, 1], c=kmeans.labels, cmap='viridis', alpha=0.5)
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], s=300, c='red', marker='x')
plt.title("K-Means Clustering")
plt.show()

OUTPUT:
EXPERIMENT: 3
Implementation k-nearest neighbours classification using python.
PROGRAM:
import numpy as np
from collections import Counter

class KNearestNeighbors:
def init (self, k=3):
"""
K-Nearest Neighbors classifier.

Args:
k (int): Number of neighbors to consider.
"""
self.k = k
self.data = None
self.labels = None

def fit(self, X, y):


"""
Fit the training data.

Args:
X (numpy.ndarray): Training data of shape (n_samples, n_features).
y (numpy.ndarray): Labels of shape (n_samples,).
"""
self.data = X
self.labels = y

def predict(self, X):


"""
Predict the class labels for given input data.

Args:
X (numpy.ndarray): Input data of shape (n_samples, n_features).

Returns:
numpy.ndarray: Predicted class labels.
"""
predictions = [self._predict_single_point(x) for x in X]
return np.array(predictions)

def _predict_single_point(self, x):


"""
Predict the label for a single data point.
Args:
x (numpy.ndarray): A single input data point.

Returns:
int/str: Predicted class label.
"""
# Compute distances to all training points
distances = np.linalg.norm(self.data - x, axis=1)

# Get indices of the k nearest neighbors


k_indices = np.argsort(distances)[:self.k]

# Get the labels of the k nearest neighbors


k_labels = self.labels[k_indices]

# Majority vote
most_common = Counter(k_labels).most_common(1)
return most_common[0][0]

# Example usage
if name == " main ":
# Generate synthetic dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Create synthetic data


X, y = make_classification(n_samples=200, n_features=2, n_classes=2, n_redundant=0,
random_state=42)

# Split into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Instantiate and train the k-NN model


knn = KNearestNeighbors(k=5)
knn.fit(X_train, y_train)

# Predict on the test set


predictions = knn.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")

# Visualize the decision boundary


import matplotlib.pyplot as plt

h = 0.02 # step size in the mesh


x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)


plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k',
cmap=plt.cm.coolwarm) plt.title("k-NN Decision Boundary")
plt.show()

OUTPUT:

Accuracy: 0.83
EXPERIMENT: 4
Given the following data. which specify classification for nine combinations of VAR1
and VAR2 predict a classification for a case where VAR1=0.906 VAR2=0.606. using the
result of k-means clustering with 3 means (i.e., 3 centroids) VAR1 VAR2 CLASS 1.713 1.586
0 0.180
1.786 1 0.353 1.240 1 0.940 1.566 0 1.486 0.759 1 1.266 1.106 0 1.540 0.419 1 0.459 1.799 1
0.773 0.186 1
PROGRAM:
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter

# Given data
data = np.array([
[1.713, 1.586],
[0.180, 1.786],
[0.353, 1.240],
[0.940, 1.566],
[1.486, 0.759],
[1.266, 1.106],
[1.540, 0.419],
[0.459, 1.799],
[0.773, 0.186]
])

labels = np.array([0, 1, 1, 0, 1, 0, 1, 1, 1]) # Corresponding classes

# Step 1: Apply k-means clustering


kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(data)
centroids = kmeans.cluster_centers_

# Step 2: Assign each data point to a cluster


cluster_labels = kmeans.labels_

# Step 3: Map clusters to majority class


cluster_to_class = {}
for cluster in np.unique(cluster_labels):
cluster_points_labels = labels[cluster_labels == cluster]
majority_class = Counter(cluster_points_labels).most_common(1)[0][0]
cluster_to_class[cluster] = majority_class

# Step 4: Predict for new point


new_point = np.array([[0.906, 0.606]])
predicted_cluster = kmeans.predict(new_point)[0]
predicted_class = cluster_to_class[predicted_cluster]
print(f"The predicted class for VAR1=0.906 and VAR2=0.606 is: {predicted_class}")

OUTPUT:
The predicted class for VAR1=0.906 and VAR2=0.606 is: 1
EXPERIMENT: 5
The following training examples map descriptions of individuals onto high, medium
and low credit-worthiness.

Input attributes are (from left to right) income, recreation, job, status, age group, home-
owner. Find the unconditional probability of ‘golf’ and the conditional probability of ‘single’
given ‘med Risk’ in the dataset?
PROGRAM:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

# Create a DataFrame from the given data


data = {
"Income": ["Medium", "High", "Low", "Medium", "High", "Low", "Medium", "Medium",
"High", "Low"],
"Recreation": ["skiing", "golf", "speedway", "football", "flying", "football", "golf", "golf",
"skiing", "golf"],
"Job": ["design", "trading", "transport", "banking", "media", "security", "media",
"transport", "banking", "unemployed"],
"Status": ["single", "married", "married", "single", "married", "single", "single", "married",
"single", "married"],
"Age group": ["twenties", "forties", "thirties", "thirties", "fifties", "twenties", "thirties",
"forties", "thirties", "forties"],
"Homeowner": ["no", "yes", "yes", "yes", "yes", "no", "yes", "yes", "yes", "yes"],
"Risk": ["High risk", "Low risk", "Med risk", "Low risk", "High risk", "Med risk", "Med
risk", "Low risk", "High risk", "High risk"],
}

df = pd.DataFrame(data)

# Encode categorical features and target variable


label_encoders = {}
for column in df.columns:
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
label_encoders[column] = le

# Split features and target


X = df.drop("Risk", axis=1)
y = df["Risk"]

# Train a decision tree classifier


clf = DecisionTreeClassifier(criterion="entropy", random_state=42)
clf.fit(X, y)

# Plot the decision tree


plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=X.columns, class_names=label_encoders["Risk"].classes_,
filled=True)
plt.title("Decision Tree for Credit-Worthiness Classification")
plt.show()

# Predict risk for a new individual


new_individual = pd.DataFrame({
"Income": ["Medium"],
"Recreation": ["golf"],
"Job": ["trading"],
"Status": ["single"],
"Age group": ["thirties"],
"Homeowner": ["no"],
})

# Encode the new individual


for column in new_individual.columns:
new_individual[column] = label_encoders[column].transform(new_individual[column])

# Predict the risk


predicted_risk = clf.predict(new_individual)
predicted_risk_label = label_encoders["Risk"].inverse_transform(predicted_risk)
print(f"Predicted risk: {predicted_risk_label[0]}")
OUTPUT:

Predicted risk: Med risk


EXPERIMENT: 6
Implement linear regression using python.
PROGRAM:
import numpy as np

class LinearRegression:
def init (self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.weights = None
self.bias = None

def fit(self, X, y):


"""
Train the linear regression model using gradient descent.

Args:
X (numpy.ndarray): Feature matrix (n_samples, n_features).
y (numpy.ndarray): Target vector (n_samples,).
"""
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0

# Gradient descent
for _ in range(self.n_iterations):
y_predicted = np.dot(X, self.weights) + self.bias
dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
db = (1 / n_samples) * np.sum(y_predicted - y)

self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db

def predict(self, X):


"""
Predict target values for given input data.

Args:
X (numpy.ndarray): Feature matrix (n_samples, n_features).

Returns:
numpy.ndarray: Predicted target values.
"""
return np.dot(X, self.weights) + self.bias
# Example usage
if name == " main ":
# Generate synthetic data
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X.squeeze() + np.random.randn(100) # Linear relation with noise

# Split data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model


model = LinearRegression(learning_rate=0.1, n_iterations=1000)
model.fit(X_train, y_train)

# Predict on test set


y_pred = model.predict(X_test)

# Evaluate the model


mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Visualize the results


import matplotlib.pyplot as plt
plt.scatter(X_test, y_test, color="blue", label="Actual")
plt.plot(X_test, y_pred, color="red", label="Predicted")
plt.legend()
plt.title("Linear Regression Results")
plt.xlabel("X")
plt.ylabel("y")
plt.show()
OUTPUT:
Mean Squared Error: 0.65

You might also like