Ass6 (DMDS)
Ass6 (DMDS)
Set A
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
# Check for missing values and drop rows with NaN values
df.dropna(inplace=True)
# Selecting relevant features for clustering (you can choose which features to use)
features = df.drop(columns=['CUST_ID']).values # Using .values to get numpy array
for _ in range(max_iters):
# Calculate distances from data points to centroids
distances = nm.linalg.norm(X[:, nm.newaxis] - centroids, axis=2)
centroids = new_centroids
# Run K-Means
labels, centroids = kmeans(features, k)
url---> https://gist.github.com/akuks/2e9b08cebef0181b583a1dff4a97f8a1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
# Optional: Plotting the clusters (use only Age and one gender column for
visualization)
gender_column = features_encoded.columns[1] # Assuming the first column is 'Age'
plt.figure(figsize=(10, 6))
plt.scatter(df['Age'], features_encoded[gender_column], c=labels, cmap='viridis',
marker='o', edgecolor='k')
plt.title('Hierarchical Agglomerative Clustering of Customers')
plt.xlabel('Age')
plt.ylabel(gender_column) # Update the y-label to match the gender column
plt.colorbar(label='Cluster')
plt.grid(True) # Add grid for better readability
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
for _ in range(n_iterations):
# Step 3.2: Assign clusters based on closest centroid
distances = np.linalg.norm(X.to_numpy()[:, np.newaxis] - centroids, axis=2)
labels = np.argmin(distances, axis=1)
centroids = new_centroids
url----> https://github.com/TrainingByPackt/Data-Science-with-Python/blob/master/
Chapter01/Data/Wholesale%20customers%20data.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
Set C
1. Write a python program to implement Agglomerative clustering on a synthetic
dataset.
(use inbuilt Iris data set).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA