0% found this document useful (0 votes)
8 views3 pages

K-Means Algoritham

Uploaded by

Bikram Karmakar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views3 pages

K-Means Algoritham

Uploaded by

Bikram Karmakar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

K-means

Q. Cluster the data of Iris data set using k-means clustering algorithm. Choose the value of k
using Elbow method.

Code:

import os
os.environ["OMP_NUM_THREADS"] = "1"
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import accuracy_score
from scipy.stats import mode

# Load the dataset


data = pd.read_csv('Iris.csv')
print("The Iris dataset:")
print(data.head())

# Extracting features for clustering


x1 = np.array(data['SepalLengthCm'])
x2 = np.array(data['PetalWidthCm'])
X = np.array(list(zip(x1, x2)))

# Extract true labels for accuracy calculation


true_labels = data['Species'] # Ground truth labels

# Convert species names to numeric labels


label_mapping = {label: idx for idx, label in enumerate(true_labels.unique())}
numeric_labels = true_labels.map(label_mapping)

# Plot the dataset


plt.figure(figsize=(6, 4))
plt.scatter(x1, x2)
plt.title('Dataset Before Clustering')
plt.xlabel('SepalLengthCm')
plt.ylabel('PetalWidthCm')
plt.show()

# Elbow Method to find the optimal value of k


wcss = [] # Within-cluster sum of squares
K_range = range(1, 11) # Test k values from 1 to 10
for k in K_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
# Plot the Elbow graph
plt.figure(figsize=(6, 4))
plt.plot(K_range, wcss, 'bo-') # Fixed redundant arguments
plt.title('Elbow Method to Determine Optimal k')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

# Prompt user to select the optimal k based on the Elbow graph


optimal_k = int(input("Based on the Elbow graph, enter the optimal number of clusters (k): "))

# Perform K-means clustering with the selected k


kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(X)
predicted_labels = kmeans.labels_

# Compute accuracy (optional)


# Map each cluster to the most common true label
mapped_labels = np.zeros_like(predicted_labels)
for cluster_id in range(optimal_k):
mask = (predicted_labels == cluster_id)
mapped_labels[mask] = mode(numeric_labels[mask])[0]

# Calculate and print accuracy


accuracy = accuracy_score(numeric_labels, mapped_labels)
print(f"Clustering Accuracy: {accuracy * 100:.2f}%")

# Plotting the clustered dataset


plt.figure(figsize=(8, 5))
plt.scatter(X[:, 0], X[:, 1], c=predicted_labels, cmap='viridis', alpha=0.5)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red',
label='Centroids')
plt.title('Clusters and Centroids')
plt.xlabel('SepalLengthCm')
plt.ylabel('PetalWidthCm')
plt.legend()
plt.grid(True)
plt.show()

You might also like