vertopal.com_python2025
vertopal.com_python2025
#roll no 08
# PGDM-A
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('Diabitiese.csv')
df.head()
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
df.tail()
767 1 93 70 31 0 30.4
df.describe()
df.shape
(769, 9)
df.isnull().sum()
Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64
sns.relplot(x='Pregnancies',y='BloodPressure',data=df)
<seaborn.axisgrid.FacetGrid at 0x14628ebf3b0>
sns.relplot(x='DiabetesPedigreeFunction',y='BloodPressure',data=df)
<seaborn.axisgrid.FacetGrid at 0x14628f7e390>
df.fillna(0, inplace=True)
print(df)
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
767 1 93 70 31 0 30.4
train
test
0 0
1 0
2 0
3 94
4 168
...
764 0
765 112
766 0
767 0
768 1
Name: Insulin, Length: 769, dtype: int64
X_train,X_test,y_train,y_test=train_test_split(train,test,test_size=0.
3,random_state=2)
X_train,X_test,y_train,y_test
regression = LinearRegression()
regression
LinearRegression()
regression.fit(X_train,y_train)
LinearRegression()
predict = regression.predict(X_test)
predict
regression.score(X_test,y_test)
0.054992667713272936
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
df.plot()
<Axes: >
df.iplot()
<Axes: xlabel='BloodPressure'>
df.iplot(kind='bar')
df.count().iplot(kind='bar')
df.sum().iplot(kind='bar')
df.iplot(kind='box')
df['Pregnancies'].iplot(kind='hist',bins=25)
df.iplot(kind='hist')
df.iplot(kind='scatter',x='Pregnancies',y='BloodPressure',mode='marker
s',size=20)
df.iplot(kind='bubble',x='Pregnancies',y='BloodPressure',size='Outcome
')
df.scatter_matrix()
x = df.iloc[:, :-1]
y = df.iloc[:, 6]
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
767 1 93 70 31 0 30.4
DiabetesPedigreeFunction Age
0 0.627 50
1 0.351 31
2 0.672 32
3 0.167 21
4 2.288 33
.. ... ...
764 0.340 27
765 0.245 30
766 0.349 47
767 0.315 23
768 0.252 55
0 0.627
1 0.351
2 0.672
3 0.167
4 2.288
...
764 0.340
765 0.245
766 0.349
767 0.315
768 0.252
Name: DiabetesPedigreeFunction, Length: 769, dtype: float64
x.head()
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
DiabetesPedigreeFunction Age
0 0.627 50
1 0.351 31
2 0.672 32
3 0.167 21
4 2.288 33
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
767 1 93 70 31 0 30.4
DiabetesPedigreeFunction Age
0 0.627 50
1 0.351 31
2 0.672 32
3 0.167 21
4 2.288 33
.. ... ...
764 0.340 27
765 0.245 30
766 0.349 47
767 0.315 23
768 0.252 55
y.describe()
count 769.000000
mean 0.471590
std 0.331208
min 0.078000
25% 0.244000
50% 0.371000
75% 0.626000
max 2.420000
Name: DiabetesPedigreeFunction, dtype: float64
data = {
df = pd.DataFrame(data)
X = df[["Pregnancies", "Insulin"]]
y = df["Insulin"]
model = LogisticRegression()
model.fit(X_train, y_train)
LogisticRegression()
y_pred = model.predict(X_test)
Accuracy: 1.0
Confusion Matrix:
[[1 0]
[0 2]]
Classification Report:
precision recall f1-score support
accuracy 1.00 3
macro avg 1.00 1.00 1.00 3
weighted avg 1.00 1.00 1.00 3
print("\nIntercept:", model.intercept_)
print("Coefficients:", model.coef_)
Intercept: [-1.83267869]
Coefficients: [[0.69083358 0.79529279]]