0% found this document useful (0 votes)
8 views8 pages

Female A S Breast Cancer Prediction Model

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views8 pages

Female A S Breast Cancer Prediction Model

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

MODEL 2: Breast Cancer Prediction Using

Python Importing libraries

# importing libraries
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# reading data from the file
df=pd.read_csv("data.csv")

df.head()

{"type":"dataframe","variable_name":"df"}

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
# Column Non-Null Count Dtype

0 id 569 non-null int64


1 diagnosis 569 non-null object
2 radius_mean 569 non-null float64
3 texture_mean 569 non-null float64
4 perimeter_mean 569 non-null float64
5 area_mean 569 non-null float64
6 smoothness_mean 569 non-null float64
7 compactness_mean 569 non-null float64
8 concavity_mean 569 non-null float64
9 concave points_mean 569 non-null float64
10 symmetry_mean 569 non-null float64
11 fractal_dimension_mean 569 non-null float64
12 radius_se 569 non-null float64
13 texture_se 569 non-null float64
14 perimeter_se 569 non-null float64
15 area_se 569 non-null float64
16 smoothness_se 569 non-null float64
17 compactness_se 569 non-null float64
18 concavity_se 569 non-null float64
19 concave points_se 569 non-null float64
20 symmetry_se 569 non-null float64
21 fractal_dimension_se 569 non-null float64
22 radius_worst 569 non-null float64
23 texture_worst 569 non-null float64
24 perimeter_worst 569 non-null float64
25 area_worst 569 non-null float64
26 smoothness_worst 569 non-null float64
27 compactness_worst 569 non-null float64
28 concavity_worst 569 non-null float64
29 concave points_worst 569 non-null float64
30 symmetry_worst 569 non-null float64
31 fractal_dimension_worst 569 non-null float64
32 Unnamed: 32 0 non-null float64
dtypes: float64(31), int64(1), object(1)
memory usage: 146.8+ KB

# return the size of dataset


df.shape

(569, 33)

# remove the column


df=df.dropna(axis=1)

# shape of dataset after removing the null column


df.shape

(569, 32)

# describe the dataset


df.describe()

{"type":"dataframe"}

# Get the count of malignant<M> and Benign<B> cells


df['diagnosis'].value_counts()

diagnosis
B 357
M 212
Name: count, dtype: int64

sns.countplot(df['diagnosis'],label="count")

<Axes: xlabel='count', ylabel='diagnosis'>


# label encoding(convert the value of M and B into 1 and 0)
from sklearn.preprocessing import LabelEncoder
labelencoder_Y = LabelEncoder()
df.iloc[:,1]=labelencoder_Y.fit_transform(df.iloc[:,1].values)

df.head()

{"type":"dataframe","variable_name":"df"}

sns.pairplot(df.iloc[:,1:5],hue="diagnosis")

<seaborn.axisgrid.PairGrid at 0x7af9d51ebac0>
# get the correlation
df.iloc[:,1:32].corr()

{"type":"dataframe"}

# visualize the correlation


plt.figure(figsize=(10,10))
sns.heatmap(df.iloc[:,1:10].corr(),annot=True,fmt=".0%")

<Axes: >
# split the dataset into dependent(X) and Independent(Y) datasets
X=df.iloc[:,2:31].values
Y=df.iloc[:,1].values

# spliting the data into trainning and test dateset


from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.20,rand
om_state=0)

# feature scaling
from sklearn.preprocessing import StandardScaler
X_train=StandardScaler().fit_transform(X_train)
X_test=StandardScaler().fit_transform(X_test)

# models/ Algorithms

def models(X_train,Y_train):
#logistic regression
from sklearn.linear_model import LogisticRegression
log=LogisticRegression(random_state=0)
log.fit(X_train,Y_train)

#Decision Tree
from sklearn.tree import DecisionTreeClassifier

tree=DecisionTreeClassifier(random_state=0,criterion="entropy")
tree.fit(X_train,Y_train)

#Random Forest
from sklearn.ensemble import RandomForestClassifier

forest=RandomForestClassifier(random_state=0,criterion="entropy",n_est
imators=10)
forest.fit(X_train,Y_train)

print('[0]logistic regression
accuracy:',log.score(X_train,Y_train))
print('[1]Decision tree
accuracy:',tree.score(X_train,Y_train))
print('[2]Random forest
accuracy:',forest.score(X_train,Y_train))

return log,tree,forest

model=models(X_train,Y_train)
[0]logistic regression accuracy: 0.9472527472527472
[1]Decision tree accuracy: 1.0
[2]Random forest accuracy: 1.0

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/
_logistic.py:469: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

# testing the models/result

from sklearn.metrics import accuracy_score


from sklearn.metrics import classification_report

for i in range(len(model)):
print("Model",i)
print(classification_report(Y_test,model[i].predict(X_test)))
print('Accuracy :
',accuracy_score(Y_test,model[i].predict(X_test)))
Model 0
precision recall f1-score support

0 0.97 0.91 0.94 43


1 0.95 0.99 0.97 71

accuracy 0.96 114


macro avg 0.96 0.95 0.95 114
weighted avg 0.96 0.96 0.96 114

Accuracy : 0.956140350877193
Model 1
precision recall f1-score support

0 0.97 0.91 0.94 43


1 0.95 0.99 0.97 71

accuracy 0.96 114


macro avg 0.96 0.95 0.95 114
weighted avg 0.96 0.96 0.96 114

Accuracy : 0.956140350877193
Model 2
precision recall f1-score support

0 0.98 0.93 0.95 43


1 0.96 0.99 0.97 71

accuracy 0.96 114


macro avg 0.97 0.96 0.96 114
weighted avg 0.97 0.96 0.96 114

Accuracy : 0.9649122807017544

# prediction of random-forest
pred=model[2].predict(X_test)
print('Predicted values:')
print(pred)
print('Actual values:')
print(Y_test)

Predicted values:
[1 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1
1 0
1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0
1 0
1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1
1 0
1 1 0]
Actual values:
204 1
70 0
131 0
431 1
540 1
..
486 1
75 0
249 1
238 1
265 0
Length: 114, dtype: int64

from joblib import dump


dump(model[2],"Feamle_Awareness_Breast_Cancer_prediction.joblib")

['Feamle_Awareness_Breast_Cancer_prediction.joblib']

You might also like