Set 3
Set 3
12. Use the diabetes data set from Pima Indians Diabetes , Perform Bivariate analysis
BIVARIATE ANALYSIS
[]
from numpy.lib.shape_base import split
x=df[['Glucose']]
y=df[['Outcome']]
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)
Linear Regression
[]
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)
print(regressor.coef_)
print(regressor.intercept_)
account_circle
[[0.00693575]]
[-0.4785685]
[]
plt.scatter(x_train,y_train,color='g')
plt.plot(x_test,y_pred,color='k')
account_circle
[]
sns.lmplot(x = 'Glucose',
y = 'Outcome',
ci = None,
data = df)
account_circle
[]
from sklearn import metrics
print(metrics.mean_absolute_error(y_test,predictions))
print(metrics.mean_squared_error(y_test,predictions))
Multivariate Analysis
[]
x=df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age']]
y = df[['Outcome']]
[]
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)
print(model.coef_)
print(model.intercept_)
account_circle
[[ 0.00899394 0.00574161 -0.00171114 -0.00025321 -0.0001265 0.01617347
0.07290169 0.0062645 ]]
[-1.0013847]
[]
plt.scatter(x_train,y_train,color='r')
plt.plot(x_test,y_pred,color='g')
[]
from sklearn import metrics
print(metrics.mean_absolute_error(y_test, predictions))
print(metrics.mean_squared_error(y_test, predictions))