Program Code:
# Sample dataset
dataset = [2, 4, 7, 9, 12, 14, 56, 43] # Dataset for clustering
# No of clusters = 2
k1, k2 = [], [] # Lists to store the data points for two clusters
# Initial means (arbitrarily chosen data points as initial means)
m1, m2 = dataset[2], dataset[4] # m1=7, m2=12
no_of_iter = 1 # Counter to track the number of iterations
# 1st Iteration - Assign each data point to the nearest mean
n = len(dataset) # Length of dataset
for i in range(n): # Loop over each data point in dataset
# Check which mean is closer to the current data point
if abs(m1 - dataset[i]) <= abs(m2 - dataset[i]):
# Assign to cluster k1 if closer to m1
k1.append(dataset[i]) # Add the data point to cluster k1
else:
# Assign to cluster k2 if closer to m2
k2.append(dataset[i]) # Add the data point to cluster k2
print("Iteration No.", no_of_iter)
print(k1, k2)
# Update the means of both clusters
m1 = sum(k1) / len(k1) # New mean of cluster k1
m2 = sum(k2) / len(k2) # New mean of cluster k2
# Keep iterating until clusters do not change (convergence)
k1old, k2old = [], [] # Previous values of k1 and k2 to check for convergence
while k1old != k1 and k2old != k2: # Loop until clusters stop changing
# Store the current clusters for comparison in the next iteration
k1old, k2old = k1, k2
k1, k2 = [], [] # Reset the clusters for the next iteration
no_of_iter += 1 # Increment iteration counter
print("Iteration No.", no_of_iter) # Print iteration number
# Reassign each data point to the nearest mean
for i in range(n): # Loop over each data point in dataset
if abs(m1 - dataset[i]) <= abs(m2 - dataset[i]): # which mean is closer
k1.append(dataset[i]) # Add the data point to cluster k1
else:
k2.append(dataset[i]) # Add the data point to cluster k2
# Update the means of both clusters again
m1 = sum(k1) / len(k1) # New mean of cluster k1
m2 = sum(k2) / len(k2) # New mean of cluster k2
print(m1, m2) # Print final means of both clusters
print(k1, k2) # Print final data points in each cluster
# Final means and clusters after convergence
print("##############################################")
print("Final Clusters and their means :")
print("Cluster #1 :", k1, "With mean :", m1)
print("Cluster #1 :", k2, "With mean :", m2)
print("##############################################")
Output:
Program Code:
from itertools import combinations
# Define the dataset
transactions = [
['I1', 'I2', 'I5'],
['I2', 'I4'],
['I2', 'I3'],
['I1', 'I2', 'I4'],
['I1', 'I3'],
['I2', 'I3'],
['I1', 'I3'],
['I1', 'I2', 'I3', 'I5'],
['I1', 'I2', 'I3']
]
# Function to calculate support
def calculate_support(itemset, transactions):
count = sum(1 for transaction in transactions if set(
itemset).issubset(set(transaction)))
return count / len(transactions)
# Function to generate candidate itemsets
def generate_candidates(prev_freq_sets, k):
candidates = []
prev_freq_sets = list(prev_freq_sets)
for i in range(len(prev_freq_sets)):
for j in range(i + 1, len(prev_freq_sets)):
candidate = sorted(
list(set(prev_freq_sets[i]) | set(prev_freq_sets[j])))
if len(candidate) == k and candidate not in candidates:
candidates.append(candidate)
return candidates
# Function to run the Apriori algorithm
def apriori(transactions, min_support):
items = sorted(
set(item for transaction in transactions for item in transaction))
current_itemsets = [[item] for item in items] # Generate 1-itemsets
freq_itemsets = []
k=1
while current_itemsets:
current_freq_itemsets = []
for itemset in current_itemsets:
support = calculate_support(itemset, transactions)
if support >= min_support:
current_freq_itemsets.append(itemset)
print(f"Frequent {
k}-itemset: {itemset}, support: {support:.2f}")
freq_itemsets.extend(current_freq_itemsets)
k += 1
current_itemsets = generate_candidates(current_freq_itemsets, k)
return freq_itemsets
# Function to generate association rules and recommend a single item
def generate_rules_and_recommend(freq_itemsets, transactions, min_confidence):
best_rule = None
best_confidence = 0
for itemset in freq_itemsets:
if len(itemset) > 1:
for i in range(1, len(itemset)):
for antecedent in combinations(itemset, i):
consequent = set(itemset) - set(antecedent)
antecedent_support = calculate_support(
antecedent, transactions)
itemset_support = calculate_support(itemset, transactions)
confidence = itemset_support / antecedent_support
if confidence >= min_confidence:
print(f"
Rule:{set(antecedent)} -> {consequent}, confidence: {confidence:.2f}")
if confidence > best_confidence:
best_confidence = confidence
best_rule = (set(antecedent), consequent)
# Recommend the item with the highest confidence
if best_rule:
antecedent, consequent = best_rule
print(f"\nRecommendation: Based on {antecedent}, you should choose {
consequent} with confidence {best_confidence:.2f}")
else:
print("No suitable recommendation found based on the given confidence threshold.")
# Set minimum support and confidence
min_support = 0.2
min_confidence = 0.6
# Run Apriori algorithm
frequent_itemsets = apriori(transactions, min_support)
# Generate association rules and recommend an item based on confidence
print("\nAssociation Rules and Recommendation:")
generate_rules_and_recommend(frequent_itemsets, transactions, min_confidence)
Output:
Program Code:
import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import load_iris
def plot_dendrogram(model, **kwargs):
# Create linkage matrix and then plot the dendrogram
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
iris = load_iris()
X = iris.data
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
plt.title("Hierarchical Clustering Dendrogram")
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode="level", p=3)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()
Output:
Program Code:
import math
import numpy as np
# normalize the matrix (make it a probability matrix (all cols sum to 1))
def normalizeAdjacencyMatrix(A):
n = len(A) # n = num of rows/cols in A
for j in range(len(A[0])):
sumOfCol = 0
for i in range(len(A)):
sumOfCol += A[i][j]
if sumOfCol == 0: # adjust for dangling nodes (columns of zeros)
for val in range(n):
A[val][j] = 1/n
else:
for val in range(n):
A[val][j] = (A[val][j] / sumOfCol)
return A
# implement damping matrix using formula
# M = dA + (1-d)(1/n)Q, where Q is an array of 1's and d is the damping factor
def dampingMatrix(A):
n = len(A) # n = num of rows/cols in A
dampingFactor = 0.85
Q = [[1/n]*n]*n
arrA = np.array(A)
arrQ = np.array(Q)
arrM = np.add((dampingFactor)*arrA, (1-dampingFactor)
* arrQ) # create damping matrix
return arrM
# find eigenvector corresponding to eigenvalue 1
def findSteadyState(M, n):
# find eigenvectors
evectors = np.linalg.eig(M)[1]
# find eigenvalues
eigenValues = np.linalg.eig(M)[0]
lstEVals = []
for val in eigenValues:
lstEVals.append(np.round(val))
# find eigenvector with eigenvalue 1
idxWithEval1 = lstEVals.index(1)
steadyStateVector = evectors[:, idxWithEval1]
# normalize steady state vector so its components sum to 1
lstVersionSteadyState = []
sumOfComps = 0
returnVector = []
for val in steadyStateVector:
sumOfComps += val
lstVersionSteadyState.append(val)
for val in lstVersionSteadyState:
returnVector.append(val/sumOfComps)
return returnVector
def pageRank(A):
n = len(A) # n = num of rows/cols in A
A = normalizeAdjacencyMatrix(A)
M = dampingMatrix(A)
# find steady state vector
steadyStateVectorOfA = findSteadyState(M, n)
return steadyStateVectorOfA
# TEST CASES
print("\nPage Rank Examples")
# 1) (corresponds to directed graph (1) on readme.md)
matrix1 = [[0, 1, 0, 0],
[0, 0, 0, 0],
[0, 1, 0, 1],
[0, 0, 1, 0]]
print("1) matrix 1 = \n", np.array(matrix1))
print("steady state vector: ")
print(pageRank(matrix1))
# expected output: [0.077, 0.054, 0.441, 0.429]
# 2)
matrix2 = [[0, 0, 1, 0, 0, 0, 0, 0],
[1, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 1, 1, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 0, 0]]
print("\n2) matrix 2 = \n", np.array(matrix2))
print("steady state vector: ")
print(pageRank(matrix2))
# expected output: [0.03037, 0.0536, 0.02735, 0.0617, 0.1621, 0.2836, 0.2419, .1393]
Output: