E21CSEU0770 Lab4
E21CSEU0770 Lab4
ipynb - Colaboratory
1
2 from sklearn.datasets import load_iris
3 from sklearn.tree import DecisionTreeClassifier
4 from sklearn.model_selection import cross_val_score, GridSearchCV
5 import pandas as pd
6 import matplotlib.pyplot as plt
7 from sklearn import tree
8
9
1 iris = load_iris()
2
3 df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
4 df['target'] = iris.target
5 clf = DecisionTreeClassifier()
1
2 param_grid = {
3 'max_depth': [2, 4, 6, 8],
4 'min_samples_split': [2, 4, 6, 8],
5 'min_samples_leaf': [1, 2, 3]
https://colab.research.google.com/drive/18ZAzdOE2FZtW5RKLs5wsWGw5syPidvh-#printMode=true 1/4
15/09/2023, 15:44 E21CSEU0962_Lab4.ipynb - Colaboratory
6 }
7
1
2 grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy')
3 grid_search.fit(df.drop('target', axis=1), df['target'])
4
5 best_params = grid_search.best_params_
6 best_score = grid_search.best_score_
7
8 print("Best Hyperparameters:", best_params)
9 print("Best Score (Accuracy):", best_score)
q2
1 import pandas as pd
2
3 df = pd.read_csv('creditcard.csv')
4
5 print("Dataset Shape:", df.shape)
6 print("Statistical Summary:")
7 print(df.describe())
8
V5 V6 V7 V8 V9 \
count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05
mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15
std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00
min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01
25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01
50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02
75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01
max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01
Class
count 284807.000000
mean 0.001727
std 0.041527
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
[8 rows x 31 columns]
1 X = df.drop('Class', axis=1)
2 y = df['Class']
3
4 print("Features Shape (X):", X.shape)
https://colab.research.google.com/drive/18ZAzdOE2FZtW5RKLs5wsWGw5syPidvh-#printMode=true 2/4
15/09/2023, 15:44 E21CSEU0962_Lab4.ipynb - Colaboratory
5 print("Target Shape (y):", y.shape)
6
Features Shape (X): (284807, 30)
Target Shape (y): (284807,)
1
2 dt_model_balanced = DecisionTreeClassifier(class_weight="balanced")
3
4 roc_auc_scores_balanced = cross_val_score(dt_model_balanced, X, y, cv=rkf, scoring='roc_auc')
5 print("Balanced Class ROC-AUC Scores:", roc_auc_scores_balanced)
6 print("Mean ROC-AUC (Balanced Class):", roc_auc_scores_balanced.mean())
7
Balanced Class ROC-AUC Scores: [0.87726965 0.84671016 0.8976954 0.86718867 0.86725901 0.85985931
0.91980655 0.85700217 0.87741033 0.89766021]
Mean ROC-AUC (Balanced Class): 0.8767861446135539
https://colab.research.google.com/drive/18ZAzdOE2FZtW5RKLs5wsWGw5syPidvh-#printMode=true 3/4
15/09/2023, 15:44 E21CSEU0962_Lab4.ipynb - Colaboratory
https://colab.research.google.com/drive/18ZAzdOE2FZtW5RKLs5wsWGw5syPidvh-#printMode=true 4/4