0% found this document useful (0 votes)
13 views12 pages

Prac7 8 9 10

Uploaded by

dumacc000.111
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views12 pages

Prac7 8 9 10

Uploaded by

dumacc000.111
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

Program Code:

# Sample dataset
dataset = [2, 4, 7, 9, 12, 14, 56, 43] # Dataset for clustering

# No of clusters = 2
k1, k2 = [], [] # Lists to store the data points for two clusters
# Initial means (arbitrarily chosen data points as initial means)
m1, m2 = dataset[2], dataset[4] # m1=7, m2=12
no_of_iter = 1 # Counter to track the number of iterations

# 1st Iteration - Assign each data point to the nearest mean


n = len(dataset) # Length of dataset
for i in range(n): # Loop over each data point in dataset
# Check which mean is closer to the current data point
if abs(m1 - dataset[i]) <= abs(m2 - dataset[i]):
# Assign to cluster k1 if closer to m1
k1.append(dataset[i]) # Add the data point to cluster k1
else:
# Assign to cluster k2 if closer to m2
k2.append(dataset[i]) # Add the data point to cluster k2
print("Iteration No.", no_of_iter)
print(k1, k2)

# Update the means of both clusters


m1 = sum(k1) / len(k1) # New mean of cluster k1
m2 = sum(k2) / len(k2) # New mean of cluster k2

# Keep iterating until clusters do not change (convergence)


k1old, k2old = [], [] # Previous values of k1 and k2 to check for convergence

while k1old != k1 and k2old != k2: # Loop until clusters stop changing
# Store the current clusters for comparison in the next iteration
k1old, k2old = k1, k2
k1, k2 = [], [] # Reset the clusters for the next iteration
no_of_iter += 1 # Increment iteration counter
print("Iteration No.", no_of_iter) # Print iteration number

# Reassign each data point to the nearest mean


for i in range(n): # Loop over each data point in dataset
if abs(m1 - dataset[i]) <= abs(m2 - dataset[i]): # which mean is closer
k1.append(dataset[i]) # Add the data point to cluster k1
else:
k2.append(dataset[i]) # Add the data point to cluster k2

# Update the means of both clusters again


m1 = sum(k1) / len(k1) # New mean of cluster k1
m2 = sum(k2) / len(k2) # New mean of cluster k2
print(m1, m2) # Print final means of both clusters
print(k1, k2) # Print final data points in each cluster

# Final means and clusters after convergence


print("##############################################")
print("Final Clusters and their means :")
print("Cluster #1 :", k1, "With mean :", m1)
print("Cluster #1 :", k2, "With mean :", m2)
print("##############################################")

Output:
Program Code:
from itertools import combinations

# Define the dataset


transactions = [
['I1', 'I2', 'I5'],
['I2', 'I4'],
['I2', 'I3'],
['I1', 'I2', 'I4'],
['I1', 'I3'],
['I2', 'I3'],
['I1', 'I3'],
['I1', 'I2', 'I3', 'I5'],
['I1', 'I2', 'I3']
]

# Function to calculate support


def calculate_support(itemset, transactions):
count = sum(1 for transaction in transactions if set(
itemset).issubset(set(transaction)))
return count / len(transactions)

# Function to generate candidate itemsets


def generate_candidates(prev_freq_sets, k):
candidates = []
prev_freq_sets = list(prev_freq_sets)
for i in range(len(prev_freq_sets)):
for j in range(i + 1, len(prev_freq_sets)):
candidate = sorted(
list(set(prev_freq_sets[i]) | set(prev_freq_sets[j])))
if len(candidate) == k and candidate not in candidates:
candidates.append(candidate)
return candidates

# Function to run the Apriori algorithm


def apriori(transactions, min_support):
items = sorted(
set(item for transaction in transactions for item in transaction))
current_itemsets = [[item] for item in items] # Generate 1-itemsets
freq_itemsets = []
k=1
while current_itemsets:
current_freq_itemsets = []
for itemset in current_itemsets:
support = calculate_support(itemset, transactions)
if support >= min_support:
current_freq_itemsets.append(itemset)
print(f"Frequent {
k}-itemset: {itemset}, support: {support:.2f}")
freq_itemsets.extend(current_freq_itemsets)
k += 1
current_itemsets = generate_candidates(current_freq_itemsets, k)

return freq_itemsets

# Function to generate association rules and recommend a single item


def generate_rules_and_recommend(freq_itemsets, transactions, min_confidence):
best_rule = None
best_confidence = 0

for itemset in freq_itemsets:


if len(itemset) > 1:
for i in range(1, len(itemset)):
for antecedent in combinations(itemset, i):
consequent = set(itemset) - set(antecedent)
antecedent_support = calculate_support(
antecedent, transactions)
itemset_support = calculate_support(itemset, transactions)
confidence = itemset_support / antecedent_support
if confidence >= min_confidence:
print(f"
Rule:{set(antecedent)} -> {consequent}, confidence: {confidence:.2f}")
if confidence > best_confidence:
best_confidence = confidence
best_rule = (set(antecedent), consequent)

# Recommend the item with the highest confidence


if best_rule:
antecedent, consequent = best_rule
print(f"\nRecommendation: Based on {antecedent}, you should choose {
consequent} with confidence {best_confidence:.2f}")
else:
print("No suitable recommendation found based on the given confidence threshold.")

# Set minimum support and confidence


min_support = 0.2
min_confidence = 0.6

# Run Apriori algorithm


frequent_itemsets = apriori(transactions, min_support)

# Generate association rules and recommend an item based on confidence


print("\nAssociation Rules and Recommendation:")
generate_rules_and_recommend(frequent_itemsets, transactions, min_confidence)
Output:
Program Code:
import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import load_iris

def plot_dendrogram(model, **kwargs):


# Create linkage matrix and then plot the dendrogram
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)

iris = load_iris()
X = iris.data

# setting distance_threshold=0 ensures we compute the full tree.


model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
plt.title("Hierarchical Clustering Dendrogram")
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode="level", p=3)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()

Output:
Program Code:
import math
import numpy as np

# normalize the matrix (make it a probability matrix (all cols sum to 1))
def normalizeAdjacencyMatrix(A):
n = len(A) # n = num of rows/cols in A
for j in range(len(A[0])):
sumOfCol = 0
for i in range(len(A)):
sumOfCol += A[i][j]
if sumOfCol == 0: # adjust for dangling nodes (columns of zeros)
for val in range(n):
A[val][j] = 1/n
else:
for val in range(n):
A[val][j] = (A[val][j] / sumOfCol)
return A

# implement damping matrix using formula


# M = dA + (1-d)(1/n)Q, where Q is an array of 1's and d is the damping factor
def dampingMatrix(A):
n = len(A) # n = num of rows/cols in A
dampingFactor = 0.85
Q = [[1/n]*n]*n
arrA = np.array(A)
arrQ = np.array(Q)
arrM = np.add((dampingFactor)*arrA, (1-dampingFactor)
* arrQ) # create damping matrix
return arrM

# find eigenvector corresponding to eigenvalue 1


def findSteadyState(M, n):
# find eigenvectors
evectors = np.linalg.eig(M)[1]
# find eigenvalues
eigenValues = np.linalg.eig(M)[0]
lstEVals = []
for val in eigenValues:
lstEVals.append(np.round(val))
# find eigenvector with eigenvalue 1
idxWithEval1 = lstEVals.index(1)
steadyStateVector = evectors[:, idxWithEval1]
# normalize steady state vector so its components sum to 1
lstVersionSteadyState = []
sumOfComps = 0
returnVector = []
for val in steadyStateVector:
sumOfComps += val
lstVersionSteadyState.append(val)
for val in lstVersionSteadyState:
returnVector.append(val/sumOfComps)
return returnVector

def pageRank(A):
n = len(A) # n = num of rows/cols in A
A = normalizeAdjacencyMatrix(A)
M = dampingMatrix(A)
# find steady state vector
steadyStateVectorOfA = findSteadyState(M, n)
return steadyStateVectorOfA
# TEST CASES
print("\nPage Rank Examples")
# 1) (corresponds to directed graph (1) on readme.md)
matrix1 = [[0, 1, 0, 0],
[0, 0, 0, 0],
[0, 1, 0, 1],
[0, 0, 1, 0]]
print("1) matrix 1 = \n", np.array(matrix1))
print("steady state vector: ")
print(pageRank(matrix1))
# expected output: [0.077, 0.054, 0.441, 0.429]

# 2)
matrix2 = [[0, 0, 1, 0, 0, 0, 0, 0],
[1, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 1, 1, 0, 0, 1],
[0, 0, 0, 0, 0, 1, 0, 0]]
print("\n2) matrix 2 = \n", np.array(matrix2))
print("steady state vector: ")
print(pageRank(matrix2))
# expected output: [0.03037, 0.0536, 0.02735, 0.0617, 0.1621, 0.2836, 0.2419, .1393]
Output:

You might also like