Fraud Transaction Detection - Ipynb - Colab - Rameshkumar
Fraud Transaction Detection - Ipynb - Colab - Rameshkumar
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
Time V1 V2 V3 V4 V5 V6 V7 V8
0 0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 0.098698
5 rows × 31 columns
credit_card_data.tail()
Time V1 V2 V3 V4 V5 V6 V7
15931 27369 -1.160116 -0.244177 0.744250 -0.192350 1.156356 -1.931383 0.409670 -0.3
15932 27369 -3.058318 3.099206 -4.932555 1.924138 -1.576032 -2.135383 -0.830098 2.2
15933 27369 -0.661806 0.315385 2.011194 -0.438757 -0.554990 -0.668072 0.424651 0.0
15934 27370 1.525348 -1.231442 0.420095 -1.551218 -1.376006 0.100758 -1.455755 0.1
15935 27371 1.385680 -0.590076 -0.569197 -0.939441 -0.196015 -0.486685 -0.102496 -0.2
5 rows × 31 columns
# dataset informations
credit_card_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15936 entries, 0 to 15935
Data columns (total 31 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Time 15936 non-null int64
1 V1 15936 non-null float64
2 V2 15936 non-null float64
3 V3 15936 non-null float64
4 V4 15936 non-null float64
5 V5 15936 non-null float64
6 V6 15936 non-null float64
7 V7 15936 non-null float64
8 V8 15936 non-null float64
9 V9 15936 non-null float64
10 V10 15936 non-null float64
11 V11 15936 non-null float64
12 V12 15936 non-null float64
13 V13 15936 non-null float64
14 V14 15936 non-null float64
15 V15 15936 non-null float64
16 V16 15936 non-null float64
17 V17 15936 non-null float64
18 V18 15936 non-null float64
19 V19 15936 non-null float64
20 V20 15936 non-null float64
21 V21 15936 non-null float64
22 V22 15936 non-null float64
23 V23 15935 non-null float64
24 V24 15935 non-null float64
25 V25 15935 non-null float64
26 V26 15935 non-null float64
27 V27 15935 non-null float64
28 V28 15935 non-null float64
29 Amount 15935 non-null float64
30 Class 15935 non-null float64
dtypes: float64(30), int64(1)
memory usage: 3.8 MB
Time 0
V1 0
V2 0
V3 0
V4 0
V5 0
V6 0
V7 0
V8 0
V9 0
V10 0
V11 0
V12 0
V13 0
V14 0
V15 0
V16 0
V17 0
V18 0
V19 0
V20 0
V21 0
V22 0
V23 1
V24 1
V25 1
V26 1
V27 1
V28 1
Amount 1
Class 1
dtype: int64
0.0 15862
1.0 73
Name: Class, dtype: int64
print(legit.shape)
print(fraud.shape)
(15862, 31)
(73, 31)
count 15862.000000
mean 66.280151
std 188.898885
min 0.000000
25% 5.522500
50% 15.950000
75% 53.890000
max 7712.430000
Name: Amount, dtype: float64
fraud.Amount.describe()
count 73.000000
mean 90.307123
std 271.634360
min 0.000000
25% 1.000000
50% 1.000000
75% 99.990000
max 1809.680000
Name: Amount, dtype: float64
Time V1 V2 V3 V4 V5 V6 V
Class
0.0 12104.432165 -0.219072 0.25000 0.862854 0.272641 -0.105868 0.124522 -0.11268
2 rows × 30 columns
legit_sample = legit.sample(n=492)
new_dataset.head()
Time V1 V2 V3 V4 V5 V6 V7
117 76 -1.024576 0.522289 1.787699 0.202672 -1.140803 -0.137831 -0.336555 0.6
8137 11032 1.302043 -0.261503 -0.722756 -0.853728 1.700463 3.426329 -1.013726 0.7
12944 22750 1.244831 -0.323488 0.669388 -0.759828 -0.533733 0.174007 -0.728925 0.0
15403 26790 -0.510225 0.181672 1.449477 -1.939204 0.559337 -0.697591 0.982108 -0.5
1056 803 1.152021 -0.404516 0.864618 -0.026721 -0.880554 -0.112351 -0.545993 0.0
5 rows × 31 columns
new_dataset.tail()
Time V1 V2 V3 V4 V5 V6 V7
15566 26961 -23.237920 13.487386 -25.188773 6.261733 -17.345188 -4.534989 -17.100492
5 rows × 31 columns
new_dataset['Class'].value_counts()
0.0 492
1.0 73
Name: Class, dtype: int64
new_dataset.groupby('Class').mean()
Time V1 V2 V3 V4 V5 V6
Class
0.0 11827.691057 -0.227964 0.316819 0.862230 0.250302 -0.053900 0.165045 -0.1098
2 rows × 30 columns
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']
print(X)
Time V1 V2 V3 V4 V5 V6 \
117 76 -1.024576 0.522289 1.787699 0.202672 -1.140803 -0.137831
8137 11032 1.302043 -0.261503 -0.722756 -0.853728 1.700463 3.426329
12944 22750 1.244831 -0.323488 0.669388 -0.759828 -0.533733 0.174007
15403 26790 -0.510225 0.181672 1.449477 -1.939204 0.559337 -0.697591
1056 803 1.152021 -0.404516 0.864618 -0.026721 -0.880554 -0.112351
... ... ... ... ... ... ... ...
15566 26961 -23.237920 13.487386 -25.188773 6.261733 -17.345188 -4.534989
15736 27163 -23.914101 13.765942 -25.733734 6.290918 -17.784824 -4.572498
15751 27187 -24.590245 14.044567 -26.278701 6.320089 -18.224513 -4.609968
15781 27219 -25.266355 14.323254 -26.823673 6.349248 -18.664251 -4.647403
15810 27252 -25.942434 14.601998 -27.368650 6.378395 -19.104033 -4.684806
print(Y)
117 0.0
8137 0.0
12944 0.0
15403 0.0
1056 0.0
...
15566 1.0
15736 1.0
15751 1.0
15781 1.0
15810 1.0
Name: Class, Length: 565, dtype: float64
Model Training
Logistic Regression
model = LogisticRegression()
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: Co
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regressio
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
Model Evaluation
Accuracy Score
Could not connect to the reCAPTCHA service. Please check your internet connection and reload to get a reCAPTCHA
challenge.