Ass - 10.ipynb - Colab
Ass - 10.ipynb - Colab
chatgpt
species
0 setosa
1 setosa
2 setosa
3 setosa
4 setosa
Features: Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
'petal width (cm)'],
dtype='object')
Label: species
# Part 1: Statistical Features and Feature Selection
# Z-score Normalization
z_score_scaler = StandardScaler()
iris_df_zscore = pd.DataFrame(z_score_scaler.fit_transform(iris_df), columns=iris.feature_names)
feature_relevance = {
'Feature': iris.feature_names,
'Mean': statistical_summary_df['Mean'],
'Variance': statistical_summary_df['Variance']
}
feature_relevance_df = pd.DataFrame(feature_relevance)
print("\nFeature Relevance based on Mean and Variance:\n", feature_relevance_df)
Statistical Features:
Mean Median Variance Standard Deviation Minimum \
sepal length (cm) 5.843333 5.80 0.685694 0.828066 4.3
sepal width (cm) 3.057333 3.00 0.189979 0.435866 2.0
petal length (cm) 3.758000 4.35 3.116278 1.765298 1.0
petal width (cm) 1.199333 1.30 0.581006 0.762238 0.1
Maximum
sepal length (cm) 7.9
sepal width (cm) 4.4
petal length (cm) 6.9
petal width (cm) 2.5
# pattern recognation
# Step 4: Evaluation
# Evaluate KNN
knn_best = knn_grid.best_estimator_
y_pred_knn = knn_best.predict(X_test_scaled)
# Evaluate SVM
svm_best = svm_grid.best_estimator_
y_pred_svm = svm_best.predict(X_test_scaled)
# Calculate metrics
metrics = {
'Model': ['KNN', 'SVM'],
'Accuracy': [accuracy_score(y_test, y_pred_knn), accuracy_score(y_test, y_pred_svm)],
'Precision': [precision_score(y_test, y_pred_knn, average='weighted'), precision_score(y_test, y_pred_svm, average='weigh
'Recall': [recall_score(y_test, y_pred_knn, average='weighted'), recall_score(y_test, y_pred_svm, average='weighted')],
'F1 Score': [f1_score(y_test, y_pred_knn, average='weighted'), f1_score(y_test, y_pred_svm, average='weighted')]
}
metrics_df = pd.DataFrame(metrics)
print("\nEvaluation Metrics:\n", metrics_df)
# Step 4: Evaluation
# Evaluate KNN
knn_best = knn_grid.best_estimator_
y_pred_knn = knn_best.predict(X_test_scaled)
# Evaluate SVM
svm_best = svm_grid.best_estimator_
y_pred_svm = svm_best.predict(X_test_scaled)
# Calculate metrics
metrics = {
'Model': ['KNN', 'SVM'],
'Accuracy': [accuracy_score(y_test, y_pred_knn), accuracy_score(y_test, y_pred_svm)],
'Precision': [precision_score(y_test, y_pred_knn, average='weighted'), precision_score(y_test, y_pred_svm, average='weig
'Recall': [recall_score(y_test, y_pred_knn, average='weighted'), recall_score(y_test, y_pred_svm, average='weighted')],
'F1 Score': [f1_score(y_test, y_pred_knn, average='weighted'), f1_score(y_test, y_pred_svm, average='weighted')]
}
metrics_df = pd.DataFrame(metrics)
print("\nEvaluation Metrics:\n", metrics_df)
# Step 6: Discussion
best_model = metrics_df.loc[metrics_df['Accuracy'].idxmax()]
print("\nBest Model:")
print(f"Model: {best_model['Model']}")
print(f"Accuracy: {best_model['Accuracy']:.2f}")
print(f"Precision: {best_model['Precision']:.2f}")
print(f"Recall: {best_model['Recall']:.2f}")
print(f"F1 Score: {best_model['F1 Score']:.2f}")
Evaluation Metrics:
Model Accuracy Precision Recall F1 Score
0 KNN 1.000000 1.000000 1.000000 1.000000
1 SVM 0.977778 0.979365 0.977778 0.977745
Best Model:
Model: KNN
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
# delivarables:
# Z-score Normalization
z_score_scaler = StandardScaler()
iris_df_zscore = pd.DataFrame(z_score_scaler.fit_transform(iris_df), columns=iris.feature_names)