2/10/23, 1:55 PM USL_21070126112 - Colaboratory
Name-Vihan Chorada
Branch-AIML B2
PRN-21070126112
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from·sklearn.metrics·import·classification_report
from·sklearn·import·linear_model
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
df=pd.read_csv("/content/wine.csv")
df
Wine Alcohol Malic.acid Ash Acl Mg Phenols Flavanoids Nonflavanoid.phe
0 1 14.23 1.71 2.43 15.6 127 2.80 3.06
1 1 13.20 1.78 2.14 11.2 100 2.65 2.76
2 1 13.16 2.36 2.67 18.6 101 2.80 3.24
3 1 14.37 1.95 2.50 16.8 113 3.85 3.49
4 1 13.24 2.59 2.87 21.0 118 2.80 2.69
... ... ... ... ... ... ... ... ...
173 3 13.71 5.65 2.45 20.5 95 1.68 0.61
174 3 13.40 3.91 2.48 23.0 102 1.80 0.75
175 3 13.27 4.28 2.26 20.0 120 1.59 0.69
176 3 13.17 2.59 2.37 20.0 120 1.65 0.68
177 3 14.13 4.10 2.74 24.5 96 2.05 0.76
178 rows × 14 columns
df[df.columns].value_counts()
Wine Alcohol Malic.acid Ash Acl Mg Phenols Flavanoids Nonflavanoid.phenols Proanth Color.int Hue OD Proline
1 12.85 1.60 2.52 17.8 95 2.48 2.37 0.26 1.46 3.93 1.09 3.63 1015
1
3 12.36 3.83 2.38 21.0 88 2.30 0.92 0.50 1.04 7.65 0.56 1.58 520
1
2 12.60 1.34 1.90 18.5 88 1.45 1.36 0.29 1.35 2.45 1.04 2.77 562
1
12.64 1.36 2.02 16.8 100 2.02 1.41 0.53 0.62 5.75 0.98 1.59 450
1
12.67 0.98 2.24 18.0 99 2.20 1.94 0.30 1.46 2.62 1.23 3.16 450
1
..
11.45 2.40 2.42 20.0 96 2.90 2.79 0.32 1.83 3.25 0.80 3.39 625
1
11.46 3.74 1.82 19.5 107 3.18 2.58 0.24 3.58 2.90 0.75 2.81 562
1
11.56 2.05 3.23 28.5 119 3.18 5.08 0.47 1.87 6.00 0.93 3.69 465
1
11.61 1.35 2.70 20.0 94 2.74 2.92 0.29 2.49 2.65 0.96 3.26 680
1
3 14.34 1.68 2.70 25.0 98 2.80 1.31 0.53 2.70 13.00 0.57 1.96 660
1
Length: 178, dtype: int64
Logistic regression
X=df.iloc[:,1:14]
y=df['Wine']
X.columns
https://colab.research.google.com/drive/1UWonuAcTyNsgPFuHBWn-Rq6UYB0fbPH5#scrollTo=5InQRzW-Z05E&printMode=true 1/3
2/10/23, 1:55 PM USL_21070126112 - Colaboratory
Index(['Alcohol', 'Malic.acid', 'Ash', 'Acl', 'Mg', 'Phenols', 'Flavanoids',
'Nonflavanoid.phenols', 'Proanth', 'Color.int', 'Hue', 'OD', 'Proline'],
dtype='object')
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
lr = LogisticRegression()
lr.fit(X_train, y_train)
LogisticRegression()
y_pred = lr.predict(X_test)
y_pred
array([1, 1, 3, 1, 2, 1, 2, 3, 2, 3, 1, 3, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2,
2, 3, 3, 3, 2, 2, 2, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 3, 1, 2, 2, 3,
3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 3, 2, 2, 1, 2])
ac=lr.score(X_test, y_test)
ac
0.9830508474576272
print("Accuracy of Logistic Regression model is:",
lr.score(X_test, y_test)*100)
Accuracy of Logistic Regression model is: 98.30508474576271
Logistic regression classification with pca
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
pca_model = LogisticRegression()
pca_model.fit(X_train_pca,y_train)
pca_model
LogisticRegression()
pred=pca_model.predict(X_test_pca)
pred
array([1, 1, 3, 1, 2, 1, 2, 3, 2, 3, 2, 3, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2,
2, 3, 3, 3, 2, 2, 2, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 3, 2, 2, 2, 2,
3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 3, 2, 2, 1, 2])
print("Accuracy of Logistic Regression model is:",
pca_model.score(X_test_pca,y_test)*100)
Accuracy of Logistic Regression model is: 96.61016949152543
Logistic regression classification with lda
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components = 2)
X_train_lda = lda.fit_transform(X_train,y_train)
X_test_lda = lda.transform(X_test)
https://colab.research.google.com/drive/1UWonuAcTyNsgPFuHBWn-Rq6UYB0fbPH5#scrollTo=5InQRzW-Z05E&printMode=true 2/3
2/10/23, 1:55 PM USL_21070126112 - Colaboratory
lda_model = LogisticRegression()
lda_model.fit(X_train,y_train)
lda_model
LogisticRegression()
predii=lda_model.predict(X_test)
predii
array([1, 1, 3, 1, 2, 1, 2, 3, 2, 3, 1, 3, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2,
2, 3, 3, 3, 2, 2, 2, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 3, 1, 2, 2, 3,
3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 3, 2, 2, 1, 2])
print("Accuracy of Logistic Regression model is:",lda_model.score(X_test, y_test)*100)
Accuracy of Logistic Regression model is: 98.30508474576271
check 0s completed at 1:46 PM
https://colab.research.google.com/drive/1UWonuAcTyNsgPFuHBWn-Rq6UYB0fbPH5#scrollTo=5InQRzW-Z05E&printMode=true 3/3