Lab 12
Lab 12
Sap id –: 500107715
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
# Load dataset
df =
pd.read_csv('/content/Global_AI_Content_Impact_Dataset.csv')#
Drop non-numeric columns (if any)
df_numeric = df.select_dtypes(include=[np.number])
# Handle missing values
df_numeric.fillna(df_numeric.mean(), inplace=True)
# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_numeric)
# Apply DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5) # you can tune eps and
min_samples
db_labels = dbscan.fit_predict(X_scaled)
# Plotting (first two features for visualization)
plt.figure(figsize=(10, 6))
# Noise (label = -1) will be plotted in black
unique_labels = set(db_labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1,
len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
col = [0, 0, 0, 1] # Black color for noise
class_member_mask = (db_labels == k)
xy = X_scaled[class_member_mask]
plt.scatter(xy[:, 0], xy[:, 1], c=[col], s=20)
plt.title('DBSCAN Clustering')
plt.xlabel('Feature 1 (scaled)')
plt.ylabel('Feature 2 (scaled)')
plt.grid(True)
plt.show()