Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
import numpy as np
import pandas as pd
base_path = "/kaggle/input/breast-cancer-detection/train"
categories = ["0","1"]
image_paths = []
labels = []
df = pd.DataFrame({
"image_path": image_paths,
"label": labels
})
df.head()
image_path label
0 /kaggle/input/breast-cancer-detection/train/0/... 0
1 /kaggle/input/breast-cancer-detection/train/0/... 0
2 /kaggle/input/breast-cancer-detection/train/0/... 0
3 /kaggle/input/breast-cancer-detection/train/0/... 0
4 /kaggle/input/breast-cancer-detection/train/0/... 0
df.tail()
image_path label
2367 /kaggle/input/breast-cancer-detection/train/1/... 1
2368 /kaggle/input/breast-cancer-detection/train/1/... 1
2369 /kaggle/input/breast-cancer-detection/train/1/... 1
2370 /kaggle/input/breast-cancer-detection/train/1/... 1
2371 /kaggle/input/breast-cancer-detection/train/1/... 1
df.shape
(2372, 2)
df.columns
df.duplicated().sum()
df.isnull().sum()
image_path 0
label 0
dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2372 entries, 0 to 2371
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 image_path 2372 non-null object
1 label 2372 non-null object
dtypes: object(2)
memory usage: 37.2+ KB
df['label'].unique()
df['label'].value_counts()
label
0 1569
1 803
Name: count, dtype: int64
plt.figure(figsize=(8, 6))
sns.countplot(data=df, x="label", palette="viridis")
plt.title("Distribution of Labels - Count Plot")
plt.xlabel("Tumor Type")
plt.ylabel("Count")
for p in plt.gca().patches:
plt.gca().annotate(f'{int(p.get_height())}',
(p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=11, color='black',
xytext=(0, 5),
textcoords='offset points')
plt.show()
label_counts = df['label'].value_counts()
plt.figure(figsize=(8, 6))
plt.pie(label_counts, labels=label_counts.index, autopct='%1.1f%%',
startangle=140, colors=sns.color_palette("viridis"))
plt.title("Distribution of Labels - Pie Chart")
plt.show()
import cv2
num_images = 5
plt.figure(figsize=(15, 12))
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
label_encoder = LabelEncoder()
df['category_encoded'] = label_encoder.fit_transform(df['label'])
df = df[['image_path', 'category_encoded']]
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df[['image_path']],
df['category_encoded'])
df_resampled
image_path category_encoded
0 /kaggle/input/breast-cancer-detection/train/0/... 0
1 /kaggle/input/breast-cancer-detection/train/0/... 0
2 /kaggle/input/breast-cancer-detection/train/0/... 0
3 /kaggle/input/breast-cancer-detection/train/0/... 0
4 /kaggle/input/breast-cancer-detection/train/0/... 0
... ... ...
3133 /kaggle/input/breast-cancer-detection/train/1/... 1
3134 /kaggle/input/breast-cancer-detection/train/1/... 1
3135 /kaggle/input/breast-cancer-detection/train/1/... 1
3136 /kaggle/input/breast-cancer-detection/train/1/... 1
3137 /kaggle/input/breast-cancer-detection/train/1/... 1
df_resampled['category_encoded'] =
df_resampled['category_encoded'].astype(str)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense,
Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings("ignore")
print ('check')
check
batch_size = 16
img_size = (256, 256)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_gen_new = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print("GPU is set for TensorFlow")
except RuntimeError as e:
print(e)
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
attention = layers.GlobalAveragePooling2D()(x)
attention = layers.Dense(filters, activation='sigmoid')(attention)
attention = layers.Reshape((1, 1, filters))(attention)
x = layers.multiply([x, attention])
return x
refined_output = layers.Flatten()(decoder_output)
refined_output = layers.Dense(1, activation='sigmoid')(refined_output)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_gen_new = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
Model: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━
━━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
Connected to ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━
━━━━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer) │ (None, 256, 256, 3) │ 0 │ -
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ conv2d (Conv2D) │ (None, 256, 256, 32) │ 896 │
input_layer[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ batch_normalization │ (None, 256, 256, 32) │ 128 │
conv2d[0][0] │
│ (BatchNormalization) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ re_lu (ReLU) │ (None, 256, 256, 32) │ 0 │
batch_normalization[0… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ conv2d_1 (Conv2D) │ (None, 256, 256, 32) │ 1,056 │
re_lu[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ batch_normalization_1 │ (None, 256, 256, 32) │ 128 │
conv2d_1[0][0] │
│ (BatchNormalization) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ re_lu_1 (ReLU) │ (None, 256, 256, 32) │ 0 │
batch_normalization_1… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ global_average_pooling2d │ (None, 32) │ 0 │
re_lu_1[0][0] │
│ (GlobalAveragePooling2D) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ dense (Dense) │ (None, 32) │ 1,056 │
global_average_poolin… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ reshape (Reshape) │ (None, 1, 1, 32) │ 0 │
dense[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ multiply (Multiply) │ (None, 256, 256, 32) │ 0 │
re_lu_1[0][0], │
│ │ │ │
reshape[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ conv2d_2 (Conv2D) │ (None, 256, 256, 64) │ 18,496 │
multiply[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ batch_normalization_2 │ (None, 256, 256, 64) │ 256 │
conv2d_2[0][0] │
│ (BatchNormalization) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ re_lu_2 (ReLU) │ (None, 256, 256, 64) │ 0 │
batch_normalization_2… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ conv2d_3 (Conv2D) │ (None, 256, 256, 64) │ 4,160 │
re_lu_2[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ batch_normalization_3 │ (None, 256, 256, 64) │ 256 │
conv2d_3[0][0] │
│ (BatchNormalization) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ re_lu_3 (ReLU) │ (None, 256, 256, 64) │ 0 │
batch_normalization_3… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ global_average_pooling2d… │ (None, 64) │ 0 │
re_lu_3[0][0] │
│ (GlobalAveragePooling2D) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ dense_1 (Dense) │ (None, 64) │ 4,160 │
global_average_poolin… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ reshape_1 (Reshape) │ (None, 1, 1, 64) │ 0 │
dense_1[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ multiply_1 (Multiply) │ (None, 256, 256, 64) │ 0 │
re_lu_3[0][0], │
│ │ │ │
reshape_1[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ up_sampling2d │ (None, 512, 512, 64) │ 0 │
multiply_1[0][0] │
│ (UpSampling2D) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ conv2d_4 (Conv2D) │ (None, 512, 512, 32) │ 18,464 │
up_sampling2d[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ batch_normalization_4 │ (None, 512, 512, 32) │ 128 │
conv2d_4[0][0] │
│ (BatchNormalization) │ │ │
│
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ re_lu_4 (ReLU) │ (None, 512, 512, 32) │ 0 │
batch_normalization_4… │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ flatten (Flatten) │ (None, 8388608) │ 0 │
re_lu_4[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼──────
──────────────────┤
│ dense_2 (Dense) │ (None, 1) │ 8,388,609 │
flatten[0][0] │
└───────────────────────────┴────────────────────────┴────────────────┴──────
──────────────────┘
def my_image_check(generator):
while True:
x, y = next(generator)
new_x = []
new_y = []
for i in range(x.shape[0]):
try:
img = Image.fromarray((x[i] * 255).astype(np.uint8))
img.verify()
new_x.append(x[i])
new_y.append(y[i])
except Exception as e:
print(f"Error loading image:
{generator.dataframe['image_path'].iloc[generator.index + i] if
(generator.index + i) < len(generator.dataframe) else 'Path information not
available'}")
print(f"Error: {e}")
yield np.array(new_x), np.array(new_y)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_generator = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
train_gen_new = my_image_check(train_generator)
valid_generator = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = my_image_check(valid_generator)
test_generator = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
test_gen_new = my_image_check(test_generator)
history = model.fit(
train_gen_new,
epochs=5,
validation_data=valid_gen_new,
steps_per_epoch=steps_per_epoch,
validation_steps=validation_steps,
callbacks=[lr_scheduler]
)
Found 2510 validated image filenames belonging to 2 classes.
Found 314 validated image filenames belonging to 2 classes.
Found 314 validated image filenames belonging to 2 classes.
Epoch 1/5
313/313 ━━━━━━━━━━━━━━━━━━━━ 77s 149ms/step - accuracy: 0.5387 - loss:
33.5591 - val_accuracy: 0.5256 - val_loss: 6.0205 - learning_rate: 1.0000e-04
Epoch 2/5
313/313 ━━━━━━━━━━━━━━━━━━━━ 85s 198ms/step - accuracy: 0.7713 - loss: 5.9724
- val_accuracy: 0.6536 - val_loss: 6.7114 - learning_rate: 1.0000e-04
Epoch 3/5
313/313 ━━━━━━━━━━━━━━━━━━━━ 49s 155ms/step - accuracy: 0.8430 - loss: 3.7649
- val_accuracy: 0.7974 - val_loss: 4.8260 - learning_rate: 1.0000e-04
Epoch 4/5
313/313 ━━━━━━━━━━━━━━━━━━━━ 49s 156ms/step - accuracy: 0.9245 - loss: 1.5245
- val_accuracy: 0.7353 - val_loss: 5.8694 - learning_rate: 1.0000e-04
Epoch 5/5
313/313 ━━━━━━━━━━━━━━━━━━━━ 49s 156ms/step - accuracy: 0.9437 - loss: 0.9491
- val_accuracy: 0.7451 - val_loss: 7.8076 - learning_rate: 1.0000e-04
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.tight_layout()
plt.show()