Subspace Cluster I Nig
Subspace Cluster I Nig
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import davies_bouldin_score
from scipy.spatial.distance import cdist
class PROCLUS:
def __init__(self, k, l, max_iter=10):
"""
PROCLUS Subspace Clustering Algorithm
:param k: Number of clusters
:param l: Number of dimensions per cluster
:param max_iter: Maximum iterations for convergence
"""
self.k = k
self.l = l
self.max_iter = max_iter
self.medoids = None
self.subspaces = None
self.clusters = None
for _ in range(self.max_iter):
clusters = self.assign_points(data)
self.subspaces = self.compute_subspaces(data, clusters)
new_medoids = self.update_medoids(data, clusters)
if np.all(new_medoids == self.medoids):
break
self.medoids = new_medoids
return clusters
for i in range(len(valid_clusters)):
for j in range(i + 1, len(valid_clusters)):
inter_dists.append(np.min(cdist(data[valid_clusters[i]],
data[valid_clusters[j]], metric='euclidean')))
if len(inter_dists) == 0 or len(intra_dists) == 0:
return float("inf") # Return infinity if calculation is
not possible
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df)
print(data_scaled)
# Run PROCLUS
k = 5 # Number of clusters
l = 3 # Subspace dimensionality
proclus = PROCLUS(k, l)
clusters = proclus.fit(data_scaled)
# Assign Labels
labels = np.zeros(len(data_scaled))
for cluster_id, cluster in enumerate(clusters):
for index in cluster:
labels[index] = cluster_id