0% found this document useful (0 votes)
96 views5 pages

23

The document loads breast cancer histopathology image data, preprocesses it, splits it into training and test sets, builds a convolutional neural network model for classification, trains the model over 10 epochs, and evaluates the model's performance on the test set. Key details include: - It loads over 63,000 images, splits them into classes 0 and 1, and samples equal numbers from each class - The CNN model contains convolutional, max pooling, dropout and dense layers - The model is trained for 10 epochs and achieves over 85% accuracy on the test set

Uploaded by

Arpita Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
96 views5 pages

23

The document loads breast cancer histopathology image data, preprocesses it, splits it into training and test sets, builds a convolutional neural network model for classification, trains the model over 10 epochs, and evaluates the model's performance on the test set. Key details include: - It loads over 63,000 images, splits them into classes 0 and 1, and samples equal numbers from each class - The CNN model contains convolutional, max pooling, dropout and dense layers - The model is trained for 10 epochs and achieves over 85% accuracy on the test set

Uploaded by

Arpita Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

import numpy as np
import os
from glob import glob
import random
import matplotlib.pylab as plt
mypaths=[]
for name in glob('../input/breast-histopathology-
images/IDC_regular_ps50_idx5/*',recursive=True):
mypaths.append(name)
print(mypaths[:5])
['../input/breast-histopathology-images/IDC_regular_ps50_idx5/10295',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10304',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12868',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10274',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12818']
mp=mypaths[60:120]
imagePatches=[]
for i in mp:
imagePatches+=glob(i+'/*/*.png', recursive=True)
#mp
print('total no. of images selected from total images is '+str(len(imagePatches)))
#imagePatches[60:90]
total no. of images selected from total images is 63535
class0 = [] # 0 = no cancer
class1 = [] # 1 = cancer

for filename in imagePatches:


if filename.endswith("class0.png"):
class0.append(filename)
else:
class1.append(filename)
print('Among them, '+str(len(class0))+' is clss0 and '+str(len(class1))+' is
class1')
Among them, 44738 is clss0 and 18797 is class1
sampled_class0 = random.sample(class0, len(class1))
sampled_class1 = random.sample(class1, len(class1))
len(sampled_class0)
18797
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):


img_arrays = []
for i in data:
if i.endswith('.png'):
img = cv2.imread(i ,cv2.IMREAD_COLOR)
img_sized = cv2.resize(img, (50, 50), interpolation=cv2.INTER_LINEAR)
img_re=img_sized/255.0
img_arrays.append([img_re, label])
return img_arrays
class0_array = get_image_arrays(sampled_class0, 0)
class1_array = get_image_arrays(sampled_class1, 1)
print('done')
done
print(len(class0_array))
print(len(class1_array))
18797
18797
plt.imshow(class1_array[10][0])
<matplotlib.image.AxesImage at 0x7fbdf97b6ad0>

combined_data = np.concatenate((class0_array, class1_array))


#random.seed(41)
#random.shuffle(combined_data)
<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested
sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different
lengths or shapes) is deprecated. If you meant to do this, you must specify
'dtype=object' when creating the ndarray
X = []
y = []

for features,label in combined_data:


X.append(features)
y.append(label)
X = np.array(X).reshape(-1, 50, 50, 3)
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,


random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(28195, 50, 50, 3) (9399, 50, 50, 3) (28195, 2) (9399, 2)
import tensorflow as tf
from tensorflow import keras
model = keras.models.Sequential([
keras.layers.Conv2D(filters=100, kernel_size=(3,3), strides=(1,1),
activation='relu', input_shape=(50,50,3)),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)),
keras.layers.Conv2D(filters=256, kernel_size=(2,2), strides=(2,2),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=384, kernel_size=(2,2), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=384, kernel_size=(1,1), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(1,1), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Flatten(),
keras.layers.Dense(9216,input_shape=(12544,), activation='relu'),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(2, activation='sigmoid')
])
model.compile(
optimizer=tf.optimizers.Adam(lr=0.000001),
loss='binary_crossentropy',
metrics=['accuracy','Recall','Precision']
)
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 48, 48, 100) 2800
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 100) 400
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 100) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 12, 12, 256) 102656
_________________________________________________________________
batch_normalization_1 (Batch (None, 12, 12, 256) 1024
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 256) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 5, 5, 384) 393600
_________________________________________________________________
batch_normalization_2 (Batch (None, 5, 5, 384) 1536
_________________________________________________________________
conv2d_3 (Conv2D) (None, 5, 5, 384) 147840
_________________________________________________________________
batch_normalization_3 (Batch (None, 5, 5, 384) 1536
_________________________________________________________________
conv2d_4 (Conv2D) (None, 5, 5, 256) 98560
_________________________________________________________________
batch_normalization_4 (Batch (None, 5, 5, 256) 1024
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 2, 2, 256) 0
_________________________________________________________________
flatten (Flatten) (None, 1024) 0
_________________________________________________________________
dense (Dense) (None, 9216) 9446400
_________________________________________________________________
dense_1 (Dense) (None, 4096) 37752832
_________________________________________________________________
dropout (Dropout) (None, 4096) 0
_________________________________________________________________
dense_2 (Dense) (None, 4096) 16781312
_________________________________________________________________
dropout_1 (Dropout) (None, 4096) 0
_________________________________________________________________
dense_3 (Dense) (None, 2) 8194
=================================================================
Total params: 64,739,714
Trainable params: 64,736,954
Non-trainable params: 2,760
_________________________________________________________________
history=model.fit(X_train,y_train, validation_data=(X_test, y_test),epochs=10)
Epoch 1/10
882/882 [==============================] - 34s 32ms/step - loss: 0.5803 - accuracy:
0.7263 - recall: 0.7124 - precision: 0.6990 - val_loss: 0.3933 - val_accuracy:
0.8317 - val_recall: 0.8215 - val_precision: 0.8398
Epoch 2/10
882/882 [==============================] - 27s 31ms/step - loss: 0.4179 - accuracy:
0.8272 - recall: 0.8228 - precision: 0.8213 - val_loss: 0.3665 - val_accuracy:
0.8424 - val_recall: 0.8302 - val_precision: 0.8506
Epoch 3/10
882/882 [==============================] - 27s 31ms/step - loss: 0.4048 - accuracy:
0.8338 - recall: 0.8275 - precision: 0.8275 - val_loss: 0.3594 - val_accuracy:
0.8437 - val_recall: 0.8360 - val_precision: 0.8513
Epoch 4/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3836 - accuracy:
0.8390 - recall: 0.8349 - precision: 0.8369 - val_loss: 0.3518 - val_accuracy:
0.8490 - val_recall: 0.8406 - val_precision: 0.8542
Epoch 5/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3745 - accuracy:
0.8478 - recall: 0.8459 - precision: 0.8421 - val_loss: 0.3468 - val_accuracy:
0.8520 - val_recall: 0.8432 - val_precision: 0.8571
Epoch 6/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3636 - accuracy:
0.8481 - recall: 0.8467 - precision: 0.8479 - val_loss: 0.3427 - val_accuracy:
0.8536 - val_recall: 0.8443 - val_precision: 0.8605
Epoch 7/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3530 - accuracy:
0.8562 - recall: 0.8535 - precision: 0.8526 - val_loss: 0.3385 - val_accuracy:
0.8553 - val_recall: 0.8472 - val_precision: 0.8613
Epoch 8/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3550 - accuracy:
0.8516 - recall: 0.8502 - precision: 0.8496 - val_loss: 0.3394 - val_accuracy:
0.8559 - val_recall: 0.8476 - val_precision: 0.8601
Epoch 9/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3488 - accuracy:
0.8564 - recall: 0.8554 - precision: 0.8538 - val_loss: 0.3362 - val_accuracy:
0.8581 - val_recall: 0.8514 - val_precision: 0.8619
Epoch 10/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3430 - accuracy:
0.8579 - recall: 0.8562 - precision: 0.8552 - val_loss: 0.3334 - val_accuracy:
0.8605 - val_recall: 0.8532 - val_precision: 0.8642
e=model.evaluate(X_test,y_test)
294/294 [==============================] - 5s 16ms/step - loss: 0.3334 - accuracy:
0.8605 - recall: 0.8532 - precision: 0.8642
print(e[0])
0.33342963457107544
import matplotlib.pyplot as plt
#plotting the Accuracy of test and training sets
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#plotting the loss of test and training sets


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

y_pred=model.predict(X_test)
y_pred[21]
array([0.60488063, 0.39073038], dtype=float32)
y_test[21]
array([0., 1.], dtype=float32)
Y_pred=[]
for i in y_pred:
if i[0]>i[1]:
Y_pred.append(0)
else:
Y_pred.append(1)
Y_test=[]
for i in y_test:
if i[0]>i[1]:
Y_test.append(0)
else:
Y_test.append(1)
Y_pred[0]
1
Y_test[0]
1
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')
print(confusion_matrix(Y_test, Y_pred))
print('Classification Report')
print(classification_report(Y_test, Y_pred, target_names=['Negative','Positive']))
Confusion Matrix
[[4134 591]
[ 720 3954]]
Classification Report
precision recall f1-score support

Negative 0.85 0.87 0.86 4725


Positive 0.87 0.85 0.86 4674

accuracy 0.86 9399


macro avg 0.86 0.86 0.86 9399
weighted avg 0.86 0.86 0.86 9399

model.save('/s/modelcnn.h5')
confusionmatrix=confusion_matrix(Y_test, Y_pred)
confusionmatrix.shape
(2, 2)
classes=[0,1]
con_mat_df = pd.DataFrame(confusion_matrix(Y_test, Y_pred),
index = classes,
columns = classes)
con_mat_df
0 1
0 4134 591
1 720 3954
import seaborn as sns
figure = plt.figure(figsize=(6, 6))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.cool,fmt='d')
plt.tight_layout()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

You might also like