Import As Import As From Import From Import From Import From Import
Import As Import As From Import From Import From Import From Import
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score,
classification_report, confusion_matrix
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.95, 0.05], flip_y=0.01, random_state=42)
Precision: 0.72
Recall: 0.32142857142857145
F1-score: 0.4444444444444444
Classification Report:
precision recall f1-score support
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
import seaborn as sns
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Fraud',
'Fraud'], yticklabels=['Not Fraud',
'Fraud'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()
Confusion Matrix:
[[937 7]
[ 38 18]]
QUESTION 2:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, classification_report
plt.figure(figsize=(8,6))
plt.plot(thresholds, precision[:-1], label="Precision")
plt.plot(thresholds, recall[:-1], label="Recall")
plt.xlabel("Decision Threshold")
plt.ylabel("Score")
plt.title("Precision and Recall vs Threshold")
plt.legend()
plt.grid()
plt.show()
Classification Report at threshold=0.5:
precision recall f1-score support
0 0.95 0.98 0.96 908
1 0.66 0.46 0.54 92
accuracy 0.93 1000
macro avg 0.80 0.72 0.75 1000
weighted avg 0.92 0.93 0.92 1000
QUESTION 3:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, classification_report,
accuracy_score
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Baseline Model")
plt.plot(recall_w, precision_w, marker='.', linestyle='dashed', label="Class
Weighted Model")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Comparison of Precision-Recall Curves")
plt.legend()
plt.grid()
plt.show()
QUESTION 4:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, classification_report,
accuracy_score
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
y_scores = model.predict_proba(X_test)[:, 1]
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Precision-Recall Curve")
plt.scatter(recall[best_idx], precision[best_idx], marker='o', color='red',
label="Optimal Point")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve with Optimal Threshold")
plt.legend()
plt.grid()
plt.show()