Female A S Breast Cancer Prediction Model
Female A S Breast Cancer Prediction Model
# importing libraries
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# reading data from the file
df=pd.read_csv("data.csv")
df.head()
{"type":"dataframe","variable_name":"df"}
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
# Column Non-Null Count Dtype
(569, 33)
(569, 32)
{"type":"dataframe"}
diagnosis
B 357
M 212
Name: count, dtype: int64
sns.countplot(df['diagnosis'],label="count")
df.head()
{"type":"dataframe","variable_name":"df"}
sns.pairplot(df.iloc[:,1:5],hue="diagnosis")
<seaborn.axisgrid.PairGrid at 0x7af9d51ebac0>
# get the correlation
df.iloc[:,1:32].corr()
{"type":"dataframe"}
<Axes: >
# split the dataset into dependent(X) and Independent(Y) datasets
X=df.iloc[:,2:31].values
Y=df.iloc[:,1].values
# feature scaling
from sklearn.preprocessing import StandardScaler
X_train=StandardScaler().fit_transform(X_train)
X_test=StandardScaler().fit_transform(X_test)
# models/ Algorithms
def models(X_train,Y_train):
#logistic regression
from sklearn.linear_model import LogisticRegression
log=LogisticRegression(random_state=0)
log.fit(X_train,Y_train)
#Decision Tree
from sklearn.tree import DecisionTreeClassifier
tree=DecisionTreeClassifier(random_state=0,criterion="entropy")
tree.fit(X_train,Y_train)
#Random Forest
from sklearn.ensemble import RandomForestClassifier
forest=RandomForestClassifier(random_state=0,criterion="entropy",n_est
imators=10)
forest.fit(X_train,Y_train)
print('[0]logistic regression
accuracy:',log.score(X_train,Y_train))
print('[1]Decision tree
accuracy:',tree.score(X_train,Y_train))
print('[2]Random forest
accuracy:',forest.score(X_train,Y_train))
return log,tree,forest
model=models(X_train,Y_train)
[0]logistic regression accuracy: 0.9472527472527472
[1]Decision tree accuracy: 1.0
[2]Random forest accuracy: 1.0
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/
_logistic.py:469: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
for i in range(len(model)):
print("Model",i)
print(classification_report(Y_test,model[i].predict(X_test)))
print('Accuracy :
',accuracy_score(Y_test,model[i].predict(X_test)))
Model 0
precision recall f1-score support
Accuracy : 0.956140350877193
Model 1
precision recall f1-score support
Accuracy : 0.956140350877193
Model 2
precision recall f1-score support
Accuracy : 0.9649122807017544
# prediction of random-forest
pred=model[2].predict(X_test)
print('Predicted values:')
print(pred)
print('Actual values:')
print(Y_test)
Predicted values:
[1 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1
1 0
1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0
1 0
1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1
1 0
1 1 0]
Actual values:
204 1
70 0
131 0
431 1
540 1
..
486 1
75 0
249 1
238 1
265 0
Length: 114, dtype: int64
['Feamle_Awareness_Breast_Cancer_prediction.joblib']