0% found this document useful (0 votes)
53 views4 pages

ML - Lab-6.ipynb - Colab

Uploaded by

22b01a45a0
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
53 views4 pages

ML - Lab-6.ipynb - Colab

Uploaded by

22b01a45a0
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

EXPERIMENT-6

Write a program to implement the Logistic Regression for the given dataset and compute the accuracy of the classifier

###LOGISTIC REG
#Data Pre-procesing Step
# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import seaborn as sns

#importing datasets
data_set= pd.read_csv('/content/Social_Network_Ads.csv')

#Extracting Independent and dependent Variable


x= data_set.iloc[:, [0,1]].values
y= data_set.iloc[:, 2].values
#print(data_set.shape)
#print(data_set.head())
#data_set.info()
#print(data_set.describe())
#print(data_set.isnull().any())
# Splitting the dataset into training and test set.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.3, random_state=0)
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
#print(x_train)
#print(x_test)
print(data_set["Age"].value_counts())
print(data_set["EstimatedSalary"].value_counts())
##sns.countplot(x=data_set["Age"],hue="Age",data=data_set)
#Fitting Logistic Regression to the training set
from sklearn.linear_model import LogisticRegression
classifier= LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)
#Predicting the test set result
y_pred= classifier.predict(x_test)
# test accuracy
#Creating the Confusion matrix
from sklearn.metrics import confusion_matrix

confusion_matrix =confusion_matrix(y_test, y_pred)


print(confusion_matrix)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,recall_score,precision_score
ac=accuracy_score(y_test, y_pred)
print("Accuracy score",ac)
rc=recall_score(y_test, y_pred)
print("Recall score",rc)
pc=precision_score(y_test, y_pred)
print("Precision score",pc)

42 16

39 15
40 15
48 14
47 14
27 13
38 13
28 12
36 12
46 12
31 11
30 11
49 10
29 10
33 9
32 9
24 9
34 6
23 6 
52 6
25 6
58 6
53 5
57 5
22 5
18 5
21 4
50 4
54 4
55 3
56 3
51 3
43 3
44 2
Name: count, dtype: int64
EstimatedSalary
72000 12
80000 11
79000 10
75000 9
71000 9
..
123000 1
37000 1
115000 1
148000 1
139000 1
Name: count, Length: 117, dtype: int64
[[74 5]
[11 30]]
Accuracy score 0.8666666666666667
Recall score 0.7317073170731707
Precision score 0.8571428571428571

Start coding or generate with AI.

#Visualizing the training set result


from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1, step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('purple','red' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'red'))(i), label = j)
mtp.title('Logistic Regression (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show() 

<ipython-input-50-e33159ed9acd>:11: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoi
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
##test
#Visulaizing the test set result
from matplotlib.colors import ListedColormap
x_set, y_set = x_test, y_test
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1, step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('blue','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('blue', 'green'))(i), label = j)
mtp.title('Logistic Regression (Test set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()

<ipython-input-56-57183aa04161>:12: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoi
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],

import statsmodels.api as sm
x1=sm.add_constant(x)
print(x1)
logit_model=sm.Logit(y,x1)
result=logit_model.fit()
print(result.summary2())
print(result.summary())

[[1.0e+00 1.9e+01 1.9e+04]


[1.0e+00 3.5e+01 2.0e+04]
[1.0e+00 2.6e+01 4.3e+04]
...
[1.0e+00 5.0e+01 2.0e+04]
[1.0e+00 3.6e+01 3.3e+04]
[1.0e+00 4.9e+01 3.6e+04]]
Optimization terminated successfully.
Current function value: 0.346314
Iterations 8
Results: Logit
=================================================================
Model: Logit Method: MLE
Dependent Variable: y Pseudo R-squared: 0.469
Date: 2024-05-28 08:36 AIC: 283.0514
No. Observations: 400 BIC: 295.0258
Df Model: 2 Log-Likelihood: -138.53
Df Residuals: 397 LL-Null: -260.79
Converged: 1.0000 LLR p-value: 7.9949e-54
No. Iterations: 8.0000 Scale: 1.0000
-------------------------------------------------------------------
Coef. Std.Err. z P>|z| [0.025 0.975]
-------------------------------------------------------------------
const -12.4340 1.2998 -9.5662 0.0000 -14.9815 -9.8865
x1 0.2335 0.0259 9.0128 0.0000 0.1827 0.2843
x2 0.0000 0.0000 6.6127 0.0000 0.0000 0.0000
=================================================================
Logit Regression Results
==============================================================================
Dep. Variable: y No. Observations: 400
Model: Logit Df Residuals: 397
Method: MLE Df Model: 2
Date: Tue, 28 May 2024 Pseudo R-squ.: 0.4688
Time: 08:36:05 Log-Likelihood: -138.53
converged: True LL-Null: -260.79
Covariance Type: nonrobust LLR p-value: 7.995e-54
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const -12.4340 1.300 -9.566 0.000 -14.982 -9.886
x1 0.2335 0.026 9.013 0.000 0.183 0.284
x2 3.59e-05 5.43e-06 6.613 0.000 2.53e-05 4.65e-05
==============================================================================

You might also like