Diabetes Prediction - Logistic Regression - Jupyter Notebook
Diabetes Prediction - Logistic Regression - Jupyter Notebook
[1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, plot_co
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
In [2]:
data = pd.read_csv('diabetes.csv')
data.head()
Out[2]:
1 1 85 66 29 0 26.6 0.35
3 1 89 66 23 94 28.1 0.167
In [3]:
data.groupby('Outcome').mean()
Out[3]:
Outcome
In [4]:
X=data.drop(columns='Outcome',axis=1)
y=data['Outcome']
print(X.shape,y.shape)
(768, 8) (768,)
where u is the mean of the training samples or zero if with_mean=False, and s is the standard deviation
of the training samples or one if with_std=False.
In [5]:
scalar=StandardScaler()
X_standard=scalar.fit_transform(X)
print(X_standard)
1.4259954 ]
-0.19067191]
-0.10558415]
...
-0.27575966]
1.17073215]
-0.87137393]]
In [6]:
n,y_test=train_test_split(X,y,random_state=2)
andard_test,y_standard_train,y_standard_test=train_test_split(X_standard,y,random_state=2)
In [7]:
lr=LogisticRegression()
lr_standard=LogisticRegression()
In [8]:
lr.fit(X_train,y_train)
lr_standard.fit(X_standard_train,y_standard_train)
C:\Users\SUPER\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.p
y:762: ConvergenceWarning: lbfgs failed to converge (status=1):
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://scik
it-learn.org/stable/modules/preprocessing.html)
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regre
ssion (https://scikit-learn.org/stable/modules/linear_model.html#logistic-re
gression)
n_iter_i = _check_optimize_result(
Out[8]:
LogisticRegression()
In [9]:
y_pred = lr.predict(X_test)
y_standard_pred = lr_standard.predict(X_standard_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_standard_pred))
0.7604166666666666
0.765625
In [10]:
plt.figure(dpi=150)
sns.heatmap(data.corr(),annot=True)
Out[10]:
<AxesSubplot:>
In [11]:
Out[11]:
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x22b8a65d
4f0>
In [ ]: