Ass - 11.ipynb - Colab
Ass - 11.ipynb - Colab
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_similarity
from sklearn.preprocessing import StandardScaler, Binarizer
from scipy.spatial.distance import pdist, squareform, jaccard
plt.figure(figsize=(12, 8))
for h in bandwidths:
kde = KernelDensity(kernel='gaussian', bandwidth=h).fit(alcohol.reshape(-1, 1))
log_density = kde.score_samples(alcohol_range)
plt.plot(alcohol_range, np.exp(log_density), label=f'Bandwidth h={h}')
plt.title('Parzen Window Density Estimation for "alcohol" Feature')
plt.xlabel('Alcohol')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()
plt.figure(figsize=(12, 8))
for k in k_values:
nbrs = NearestNeighbors(n_neighbors=k).fit(alcohol.reshape(-1, 1))
distances, _ = nbrs.kneighbors(alcohol_range)
# Volume of the 1D ball is 2*distance in 1D
density = k / (len(alcohol) * 2 * distances[:, -1])
plt.plot(alcohol_range, density, label=f'k={k}')
plt.title('Nearest Neighbor Density Estimation for "alcohol" Feature')
plt.xlabel('Alcohol')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()
Hierarchical Clustering
# Display the first few rows of the dataframe to understand the structure
print(data.head())
# Agglomerative Clustering
for method in linkage_methods:
cluster_model = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage=method)
cluster_labels = cluster_model.fit_predict(features)
while current_clusters:
cluster = current_clusters.pop(0)
clusters.append(cluster)
return clusters