code 1
code 1
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score, ShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from mpl_toolkits.mplot3d import Axes3D
# ----------------------
# Load Datasets
# ----------------------
df1 = pd.read_csv(r'/F0L.csv, /F1L.csv, /F5L.csv')
df2 = pd.read_csv(r'/F0M.csv')
# ----------------------
# Data Visualization: Fault Distribution
# ----------------------
plt.figure(figsize=(5, 5))
plt.pie(df1['label'].value_counts(), labels=df1['label'].unique(), autopct='%2.1f%
%', colors=sns.color_palette("pastel"))
plt.title("Fault Distribution in Limited Power Dataset")
plt.show()
# ----------------------
# Feature Correlation Heatmap
# ----------------------
X = df1.iloc[:, 1:-1] # Features
Y = df1.iloc[:, -1] # Target Labels
plt.figure(figsize=(12, 10))
sns.heatmap(X.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.show()
# ----------------------
# Data Preprocessing
# ----------------------
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,
shuffle=True, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# ----------------------
# PCA: Dimensionality Reduction
# ----------------------
for i in range(1, 14):
pca = PCA(n_components=i)
pca.fit(X_train_scaled)
print(f"Cumulative explained variance for {i} components:
{np.sum(pca.explained_variance_ratio_):.4f}")
# Apply PCA with 2 components
pca_2 = PCA(n_components=2)
X_pca_2D = pca_2.fit_transform(X_train_scaled)
principalDf = pd.DataFrame(data=X_pca_2D, columns=['PC1', 'PC2'])
principalDf['Fault'] = np.array(y_train)
plt.figure(figsize=(10, 8))
sns.scatterplot(x=principalDf['PC1'], y=principalDf['PC2'],
hue=principalDf['Fault'], palette='Dark2', alpha=0.7)
plt.title("2D PCA Visualization")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.show()
# 3D PCA Visualization
pca_3 = PCA(n_components=3)
X_pca_3D = pca_3.fit_transform(X_train_scaled)
principalDf = pd.DataFrame(data=X_pca_3D, columns=['PC1', 'PC2', 'PC3'])
principalDf['Fault'] = np.array(y_train)
# ----------------------
# Train & Evaluate Models
# ----------------------
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
# ----------------------
# Confusion Matrix Plot
# ----------------------
def plot_confusion_matrix(y_true, y_pred, model_name):
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="viridis",
xticklabels=np.unique(y_true), yticklabels=np.unique(y_true))
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title(f"Confusion Matrix - {model_name}")
plt.show()
plot_confusion_matrix(y_test, y_pred_rf, "Random Forest")
plot_confusion_matrix(y_test, y_pred_svm, "SVM")
# ----------------------
# Final Model Evaluations
# ----------------------
print("\nRandom Forest Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(classification_report(y_test, y_pred_rf))
print("\nSVM Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print(classification_report(y_test, y_pred_svm))