vertopal.com_task7 (1)
vertopal.com_task7 (1)
y=df[['variety']]
print(y)
variety
0 Setosa
1 Setosa
2 Setosa
3 Setosa
4 Setosa
.. ...
145 Virginica
146 Virginica
147 Virginica
148 Virginica
149 Virginica
# Split the dataset into training and testing sets (80% train, 20%
test)
x_train, x_test, y_train, y_test = train_test_split(x, y,
train_size=0.8, random_state=45)
# Confusion Matrix
cf = confusion_matrix(y_test, pr)
print("Confusion Matrix:\n", cf)
# Classification Report
cr = classification_report(y_test, pr)
print("Classification Report:\n", cr)
# Accuracy Score
print("Accuracy Score:", accuracy_score(y_test, pr) * 100)
Confusion Matrix:
[[11 0 0]
[ 0 7 0]
[ 0 2 10]]
Classification Report:
precision recall f1-score support
accuracy 0.93 30
macro avg 0.93 0.94 0.93 30
weighted avg 0.95 0.93 0.93 30
# Predicting the output for the training data using the trained model
f = s.predict(x_train)
# Checking the accuracy on the training data to assess potential
overfitting
# A very high accuracy on the training data but a low accuracy on the
test data could indicate overfitting
print('Training accuracy is', accuracy_score(y_train, f))
# Predicting the output for the test data to evaluate model
performance on unseen data
pr = s.predict(x_test)
# Checking the accuracy on the testing data to see how well the model
generalizes
print('Testing accuracy is', accuracy_score(y_test, pr))
#Example-2
# Load the dataset
#The dataset "diabetes.csv" contains information about patients'
#health parameters and diabetes status.
df=pd.read_csv("diabetes.csv") # Load the dataset into a DataFrame
#Display the data
print(df)
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
767 1 93 70 31 0 30.4
# Feature Selection
# Define independent variables (features) and dependent variable
(target/class label)
x=df.drop(['Outcome'],axis=1)
print(x)
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
767 1 93 70 31 0 30.4
DiabetesPedigreeFunction Age
0 0.627 50
1 0.351 31
2 0.672 32
3 0.167 21
4 2.288 33
.. ... ...
763 0.171 63
764 0.340 27
765 0.245 30
766 0.349 47
767 0.315 23
y=df[['Outcome']]
print(y)
Outcome
0 1
1 0
2 1
3 0
4 1
.. ...
763 0
764 0
765 0
766 1
767 0
# Split the dataset into training and testing sets (80% train, 20%
test)
x_train, x_test, y_train, y_test = train_test_split(x, y,
train_size=0.8, random_state=45)
210 2 81 60 22 0 27.7
607 1 92 62 25 41 19.5
544 1 88 78 29 76 32.0
643 4 90 0 0 0 28.0
DiabetesPedigreeFunction Age
727 0.433 22
327 0.200 37
721 0.289 21
210 0.290 25
683 0.536 27
.. ... ...
725 0.236 38
607 0.482 25
544 0.365 29
643 0.610 31
414 0.534 21
51 1 101 50 15 36 24.2
66 0 109 88 30 0 32.5
556 1 97 70 40 0 38.1
146 9 57 80 37 0 32.8
DiabetesPedigreeFunction Age
195 0.395 29
51 0.526 26
66 0.855 38
437 0.434 28
665 0.217 24
.. ... ...
246 0.258 41
556 0.218 30
298 0.412 46
339 0.331 41
146 0.096 41
[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0
0 0
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
0 0
0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0
1 0
0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
0 0
0 0 0 0 1 0]
# Confusion Matrix
cf = confusion_matrix(y_test, pr)
print("Confusion Matrix:\n", cf)
# Classification Report
cr = classification_report(y_test, pr)
print("Classification Report:\n", cr)
# Accuracy Score
print("Accuracy Score:", accuracy_score(y_test, pr) * 100)
Confusion Matrix:
[[97 7]
[36 14]]
Classification Report:
precision recall f1-score support
# Predicting the output for the training data using the trained model
f = s.predict(x_train)
# Checking the accuracy on the training data to assess potential
overfitting
# A very high accuracy on the training data but a low accuracy on the
test data could indicate overfitting
print('Training accuracy is', accuracy_score(y_train, f))
# Predicting the output for the test data to evaluate model
performance on unseen data
pr = s.predict(x_test)
# Checking the accuracy on the testing data to see how well the model
generalizes
print('Testing accuracy is', accuracy_score(y_test, pr))