0% found this document useful (0 votes)
0 views6 pages

Import As Import As From Import From Import From Import From Import

The document contains code for implementing and evaluating various classification models using Logistic Regression and Support Vector Machine (SVM) on synthetic datasets. It includes metrics such as precision, recall, F1-score, and confusion matrices, along with visualizations of precision-recall curves. Additionally, it explores the impact of class weighting and optimizing decision thresholds on model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
0 views6 pages

Import As Import As From Import From Import From Import From Import

The document contains code for implementing and evaluating various classification models using Logistic Regression and Support Vector Machine (SVM) on synthetic datasets. It includes metrics such as precision, recall, F1-score, and confusion matrices, along with visualizations of precision-recall curves. Additionally, it explores the impact of class weighting and optimizing decision thresholds on model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
You are on page 1/ 6

QUESTION 1:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score,
classification_report, confusion_matrix
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.95, 0.05], flip_y=0.01, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Precision: 0.72
Recall: 0.32142857142857145
F1-score: 0.4444444444444444

Classification Report:
precision recall f1-score support

0 0.96 0.99 0.98 944


1 0.72 0.32 0.44 56

accuracy 0.95 1000


macro avg 0.84 0.66 0.71 1000
weighted avg 0.95 0.95 0.95 1000

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
import seaborn as sns
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Fraud',
'Fraud'], yticklabels=['Not Fraud',
'Fraud'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

Confusion Matrix:
[[937 7]
[ 38 18]]
QUESTION 2:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, classification_report

X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,


weights=[0.9, 0.1], flip_y=0.01, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)
y_scores = svm_model.predict_proba(X_test)[:, 1]
precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
plt.figure(figsize=(8,6))
plt.plot(recall, precision, marker='.', label="Precision-Recall Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for SVM")
plt.legend()
plt.grid()
plt.show()
threshold = 0.5
y_pred = (y_scores >= threshold).astype(int)
print(f"\nClassification Report at threshold={threshold}:\n")
print(classification_report(y_test, y_pred))

plt.figure(figsize=(8,6))
plt.plot(thresholds, precision[:-1], label="Precision")
plt.plot(thresholds, recall[:-1], label="Recall")
plt.xlabel("Decision Threshold")
plt.ylabel("Score")
plt.title("Precision and Recall vs Threshold")
plt.legend()
plt.grid()
plt.show()
Classification Report at threshold=0.5:
precision recall f1-score support
0 0.95 0.98 0.96 908
1 0.66 0.46 0.54 92
accuracy 0.93 1000
macro avg 0.80 0.72 0.75 1000
weighted avg 0.92 0.93 0.92 1000
QUESTION 3:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, classification_report,
accuracy_score

X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,


weights=[0.9, 0.1], flip_y=0.01, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
logreg_baseline = LogisticRegression(random_state=42)
logreg_baseline.fit(X_train, y_train)
y_pred_baseline = logreg_baseline.predict(X_test)
y_prob_baseline = logreg_baseline.predict_proba(X_test)[:, 1]
precision, recall, _ = precision_recall_curve(y_test, y_prob_baseline)
print("BASELINE MODEL (Logistic Regression):")
print(f"Accuracy: {accuracy_score(y_test, y_pred_baseline):.4f}")
print(classification_report(y_test, y_pred_baseline))
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Baseline Model")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for Logistic Regression")
plt.legend()
plt.grid()
plt.show()

BASELINE MODEL (Logistic Regression):


Accuracy: 0.9280
precision recall f1-score support
0 0.95 0.97 0.96 908
1 0.64 0.49 0.56 92
accuracy 0.93 1000
macro avg 0.80 0.73 0.76 1000
weighted avg 0.92 0.93 0.92 1000
# CLASS-WEIGHTED MODEL
logreg_weighted = LogisticRegression(class_weight='balanced',
random_state=42)
logreg_weighted.fit(X_train, y_train)
y_pred_weighted = logreg_weighted.predict(X_test)
y_prob_weighted = logreg_weighted.predict_proba(X_test)[:, 1]
# Compute Precision-Recall Curve
precision_w, recall_w, _ = precision_recall_curve(y_test, y_prob_weighted)
# Evaluate Weighted Model
print("MODEL WITH CLASS WEIGHTING:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_weighted):.4f}")
print(classification_report(y_test, y_pred_weighted))

MODEL WITH CLASS WEIGHTING:


Accuracy: 0.8410
precision recall f1-score support
0 0.98 0.84 0.91 908
1 0.35 0.85 0.50 92
accuracy 0.84 1000
macro avg 0.67 0.84 0.70 1000
weighted avg 0.92 0.84 0.87 1000

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Baseline Model")
plt.plot(recall_w, precision_w, marker='.', linestyle='dashed', label="Class
Weighted Model")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Comparison of Precision-Recall Curves")
plt.legend()
plt.grid()
plt.show()

QUESTION 4:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, classification_report,
accuracy_score
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
y_scores = model.predict_proba(X_test)[:, 1]

precision, recall, thresholds = precision_recall_curve(y_test, y_scores)


best_idx = np.argmax(precision) # Index of maximum precision
best_threshold = thresholds[best_idx]
print(f"Optimal Decision Threshold for Maximum Precision:
{best_threshold:.4f}")
y_pred_optimized = (y_scores >= best_threshold).astype(int)

print("Model Evaluation with Optimized Threshold:")


print(classification_report(y_test, y_pred_optimized))

Optimal Decision Threshold for Maximum Precision: 0.9439


Model Evaluation with Optimized Threshold:
precision recall f1-score support
0 0.92 1.00 0.96 908
1 1.00 0.09 0.16 92
accuracy 0.92 1000
macro avg 0.96 0.54 0.56 1000
weighted avg 0.92 0.92 0.88 1000

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Precision-Recall Curve")
plt.scatter(recall[best_idx], precision[best_idx], marker='o', color='red',
label="Optimal Point")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve with Optimal Threshold")
plt.legend()
plt.grid()
plt.show()

You might also like