21MIC0107 Da4
21MIC0107 Da4
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.metrics import silhouette_score
#21MIC0107 Anirudh G
# 1. Dataset Preparation
# Load Iris dataset
iris = datasets.load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
#21MIC0107 Anirudh G
# Data Cleaning & Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
#21MIC0107 Anirudh G
# 2. Hierarchical Clustering Implementation
# Construct Dendrogram
plt.figure(figsize=(12, 6))
linkage_matrix = linkage(X_scaled, method='ward')
dendrogram(linkage_matrix)
plt.title("Dendrogram for Hierarchical Clustering")
plt.xlabel("Samples")
plt.ylabel("Distance")
plt.show()
#21MIC0107 Anirudh G
# Applying Agglomerative Clustering
hierarchical = AgglomerativeClustering(n_clusters=3, linkage='ward')
y_hierarchical = hierarchical.fit_predict(X_scaled)
#21MIC0107 Anirudh G
# 3. Visualization of Hierarchical Clustering
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_scaled[:, 0], y=X_scaled[:, 1],
hue=y_hierarchical, palette='viridis')
plt.title("Hierarchical Clustering")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()
#21MIC0107 Anirudh G
# 4. K-Means Clustering Implementation
kmeans = KMeans(n_clusters=3, random_state=42)
y_kmeans = kmeans.fit_predict(X_scaled)
#21MIC0107 Anirudh G
# Visualization of K-Means Clustering
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_scaled[:, 0], y=X_scaled[:, 1], hue=y_kmeans,
palette='coolwarm')
plt.title("K-Means Clustering")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()
#21MIC0107 Anirudh G
# Silhouette Score Comparison
silhouette_hierarchical = silhouette_score(X_scaled, y_hierarchical)
silhouette_kmeans = silhouette_score(X_scaled, y_kmeans)
print(f"Silhouette Score (Hierarchical Clustering):
{silhouette_hierarchical:.2f}")
print(f"Silhouette Score (K-Means Clustering):
{silhouette_kmeans:.2f}")
#21MIC0107 Anirudh G
# Conclusion
if silhouette_hierarchical > silhouette_kmeans:
print("Hierarchical Clustering is more effective based on the
Silhouette Score.")
else:
print("K-Means Clustering is more effective based on the
Silhouette Score.")