Lab10
Lab10
Sap id –: 500107715
Roll No –: R2142220916
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
# Load dataset
df = pd.read_csv('/content/Global_AI_Content_Impact_Dataset.csv')
# Drop non-numeric columns (if any)df_numeric =
df.select_dtypes(include=[np.number])
# Handle missing values (simple fill for this example)
df_numeric.fillna(df_numeric.mean(), inplace=True)
# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_numeric)
# Apply Isolation Forest directly on the scaled data
iso_forest = IsolationForest(contamination=0.05, random_state=42)
labels = iso_forest.fit_predict(X_scaled)
# Plotting (use first two features for visualization)
plt.figure(figsize=(10, 6))
colors = np.array(['#377eb8', '#e41a1c']) # blue for normal, red
for anomaly
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=colors[(labels ==
1).astype(int)],
s=20)
plt.title('Isolation Forest Anomaly Detection')
plt.xlabel('Feature 1 (scaled)')
plt.ylabel('Feature 2 (scaled)')
plt.grid(True)
plt.show()