0% found this document useful (0 votes)
11 views1 page

KMeans Clustering

Uploaded by

rampage4630
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views1 page

KMeans Clustering

Uploaded by

rampage4630
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1

import pandas as pd

from sklearn.cluster import KMeans


from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Step 1: Load the dataset


data = pd.read_csv('your_dataset.csv')

# Step 2: Preprocess the data


# For simplicity, let's assume we only have numerical features and we want to scale
them
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# Step 3: Implement KMeans clustering


k = 3 # Specify the number of clusters
kmeans = KMeans(n_clusters=k)
kmeans.fit(scaled_data)

# Step 4: Visualize the clusters (optional)


# Note: This part is only feasible for 2D or 3D data
# If you have more dimensions, you need to reduce dimensionality before
visualization
plt.scatter(scaled_data[:, 0], scaled_data[:, 1], c=kmeans.labels_, cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300,
c='red')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('K-means Clustering')
plt.show()

# Step 5: Evaluate the clusters (optional)


# Example: silhouette score
from sklearn.metrics import silhouette_score
silhouette_avg = silhouette_score(scaled_data, kmeans.labels_)
print("Silhouette Score:", silhouette_avg)

You might also like