Skin
Skin
import numpy as np
import pandas as pd
malignant_dir =
'/kaggle/input/melanoma-cancer-dataset/train/Malignant'
benign_dir = '/kaggle/input/melanoma-cancer-dataset/train/Benign'
df.head()
image_path label
0 /kaggle/input/melanoma-cancer-dataset/train/Ma... Malignant
1 /kaggle/input/melanoma-cancer-dataset/train/Ma... Malignant
2 /kaggle/input/melanoma-cancer-dataset/train/Ma... Malignant
3 /kaggle/input/melanoma-cancer-dataset/train/Ma... Malignant
4 /kaggle/input/melanoma-cancer-dataset/train/Ma... Malignant
df.tail()
image_path label
11874 /kaggle/input/melanoma-cancer-dataset/train/Be... Benign
11875 /kaggle/input/melanoma-cancer-dataset/train/Be... Benign
11876 /kaggle/input/melanoma-cancer-dataset/train/Be... Benign
11877 /kaggle/input/melanoma-cancer-dataset/train/Be... Benign
11878 /kaggle/input/melanoma-cancer-dataset/train/Be... Benign
df.shape
(11879, 2)
df.columns
df.duplicated().sum()
df.isnull().sum()
image_path 0
label 0
dtype: int64
df['label'].unique()
df['label'].value_counts()
label
Benign 6289
Malignant 5590
Name: count, dtype: int64
plt.figure(figsize=(15, 8))
plt.show()
label_counts = df['label'].value_counts()
plt.figure(figsize=(8, 8))
plt.pie(label_counts,
labels=label_counts.index,
autopct='%1.1f%%',
startangle=90,
colors=['#ff9999','#66b3ff'],
shadow=True)
plt.axis('equal')
plt.title('Distribution of Malignant and Benign Melanoma Cases')
plt.show()
malignant_images = os.listdir(malignant_dir)
benign_images = os.listdir(benign_dir)
malignant_sample = np.random.choice(malignant_images, 5,
replace=False)
benign_sample = np.random.choice(benign_images, 5, replace=False)
plt.tight_layout()
plt.show()
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df[['image_path']],
df['label'])
import time
import shutil
import pathlib
import itertools
from PIL import Image
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten,
Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings("ignore")
print ('check')
check
batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_gen_new = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
print("Using GPU")
else:
print("Using CPU")
Using GPU
def create_cnn_model(input_shape):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
return model
cnn_model = create_cnn_model(input_shape)
cnn_model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
cnn_model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━
━━━━━┓
┃ Layer (type) ┃ Output Shape ┃
Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━
━━━━━┩
│ conv2d (Conv2D) │ (None, 222, 222, 32) │
896 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ max_pooling2d (MaxPooling2D) │ (None, 111, 111, 32) │
0 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ conv2d_1 (Conv2D) │ (None, 109, 109, 64) │
18,496 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ max_pooling2d_1 (MaxPooling2D) │ (None, 54, 54, 64) │
0 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ conv2d_2 (Conv2D) │ (None, 52, 52, 128) │
73,856 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ max_pooling2d_2 (MaxPooling2D) │ (None, 26, 26, 128) │
0 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ flatten (Flatten) │ (None, 86528) │
0 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ dense (Dense) │ (None, 128) │
11,075,712 │
├─────────────────────────────────┼────────────────────────┼──────────
─────┤
│ dense_1 (Dense) │ (None, 1) │
129 │
└─────────────────────────────────┴────────────────────────┴──────────
─────┘
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=10,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/10
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_model.predict(test_gen_new)
predicted_labels = (predictions > 0.5).astype(int).flatten()
def create_cnn_model(input_shape):
model = models.Sequential()
kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu',
kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
return model
cnn_model = create_cnn_model(input_shape)
cnn_model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=10,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 36s 43ms/step - accuracy: 0.6592 - loss:
1.1970 - val_accuracy: 0.7965 - val_loss: 0.6146
Epoch 2/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 20s 31ms/step - accuracy: 0.8189 - loss:
0.5164 - val_accuracy: 0.8243 - val_loss: 0.5465
Epoch 3/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 21s 33ms/step - accuracy: 0.8201 - loss:
0.4959 - val_accuracy: 0.7909 - val_loss: 0.5115
Epoch 4/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 19s 30ms/step - accuracy: 0.8231 - loss:
0.4594 - val_accuracy: 0.8450 - val_loss: 0.4743
Epoch 5/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 20s 32ms/step - accuracy: 0.8371 - loss:
0.4437 - val_accuracy: 0.8362 - val_loss: 0.4590
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_model.predict(test_gen_new)
predicted_labels = (predictions > 0.5).astype(int).flatten()
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=['Malignant', 'Benign'], yticklabels=['Malignant',
'Benign'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
def create_xception_model(input_shape):
return model
cnn_model.compile(optimizer=Adam(learning_rate=0.0001),
loss='binary_crossentropy',
metrics=['accuracy'])
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=10,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 78s 97ms/step - accuracy: 0.7860 - loss:
0.4897 - val_accuracy: 0.8641 - val_loss: 0.3278
Epoch 2/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 67ms/step - accuracy: 0.8590 - loss:
0.3347 - val_accuracy: 0.8768 - val_loss: 0.3074
Epoch 3/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 66ms/step - accuracy: 0.8758 - loss:
0.2945 - val_accuracy: 0.8760 - val_loss: 0.3123
Epoch 4/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 83s 67ms/step - accuracy: 0.8856 - loss:
0.2739 - val_accuracy: 0.8752 - val_loss: 0.2933
Epoch 5/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 82s 67ms/step - accuracy: 0.8927 - loss:
0.2578 - val_accuracy: 0.8831 - val_loss: 0.2869
Epoch 6/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 67ms/step - accuracy: 0.8926 - loss:
0.2599 - val_accuracy: 0.8847 - val_loss: 0.2827
Epoch 7/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 66ms/step - accuracy: 0.9089 - loss:
0.2282 - val_accuracy: 0.8808 - val_loss: 0.2843
Epoch 8/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 67ms/step - accuracy: 0.9093 - loss:
0.2160 - val_accuracy: 0.8696 - val_loss: 0.3361
Epoch 9/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 66ms/step - accuracy: 0.9194 - loss:
0.2026 - val_accuracy: 0.8903 - val_loss: 0.2875
Epoch 10/10
629/629 ━━━━━━━━━━━━━━━━━━━━ 42s 67ms/step - accuracy: 0.9204 - loss:
0.1967 - val_accuracy: 0.8927 - val_loss: 0.2766
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_model.predict(test_gen_new)
predicted_labels = (predictions > 0.5).astype(int).flatten()
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=['Malignant', 'Benign'], yticklabels=['Malignant',
'Benign'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
def create_inception_model(input_shape):
base_model = InceptionV3(weights='imagenet',
input_shape=input_shape, include_top=False)
model = Sequential()
model.add(base_model)
model.add(GaussianNoise(0.25))
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(GaussianNoise(0.25))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
return model
cnn_model.compile(optimizer=Adam(learning_rate=0.0001),
loss='binary_crossentropy',
metrics=['accuracy'])
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=10,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/10
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_model.predict(test_gen_new)
predicted_labels = (predictions > 0.5).astype(int).flatten()
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=['Malignant', 'Benign'], yticklabels=['Malignant',
'Benign'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
import time
image_path =
'/kaggle/input/melanoma-cancer-dataset/test/Malignant/5602.jpg'
prediction, prediction_time = predict_image_with_time(cnn_model,
image_path)
1. CNN Model:
• Benign:
– Precision: 0.86
– Recall: 0.88
– F1-score: 0.87
• Malignant:
– Precision: 0.88
– Recall: 0.85
– F1-score: 0.87
• Overall Accuracy: 0.87
• Macro Avg/Weighted Avg F1-score: 0.87
Analysis: The CNN model without regularization performs well with a balanced precision, recall,
and f1-score for both classes. Overall, the accuracy is 87%, and the performance is consistent
across metrics.
Analysis: Adding regularization negatively impacted performance, especially for the Benign
class, with a sharp decline in recall (0.63). The Malignant class benefits from improved recall but
at the cost of reduced precision. Overall, accuracy and f1-scores dropped to 79%, indicating a
potential imbalance in how the model is generalizing across classes.
3. Xception Model:
• Benign:
– Precision: 0.86
– Recall: 0.93
– F1-score: 0.89
• Malignant:
– Precision: 0.92
– Recall: 0.85
– F1-score: 0.88
• Overall Accuracy: 0.89
• Macro Avg/Weighted Avg F1-score: 0.89
Analysis: The Xception model shows strong performance with high precision and recall for both
Benign and Malignant classes. Accuracy of 89% and balanced f1-scores indicate good
generalization. This is a clear improvement over both the CNN and the regularized CNN models.
4. Inception Model:
• Benign:
– Precision: 0.90
– Recall: 0.87
– F1-score: 0.89
• Malignant:
– Precision: 0.88
– Recall: 0.90
– F1-score: 0.89
• Overall Accuracy: 0.89
• Macro Avg/Weighted Avg F1-score: 0.89
Analysis: The Inception model performs similarly to Xception, with 89% accuracy and nearly
equal precision, recall, and f1-scores across both classes. This model strikes a good balance
between both Benign and Malignant detection.
Conclusion:
• The CNN with regularization struggled with class imbalance, especially for the Benign
class.
• Xception and Inception models achieved the best performance, with both reaching 89%
accuracy and well-balanced precision, recall, and f1-scores.
• Between Xception and Inception, the performance is very close, and either model could
be chosen based on other factors like computational efficiency or ease of deployment.
models = ['CNN', 'CNN + Reg', 'Xception', 'Inception']
precision = [0.87, 0.82, 0.89, 0.89]
recall = [0.87, 0.79, 0.89, 0.89]
f1_score = [0.87, 0.78, 0.89, 0.89]
accuracy = [0.87, 0.79, 0.89, 0.89]
bar_width = 0.2
r1 = np.arange(len(models))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
r4 = [x + bar_width for x in r3]
plt.figure(figsize=(10, 6))
plt.bar(r1, precision, color='b', width=bar_width, edgecolor='grey',
label='Precision')
plt.bar(r2, recall, color='g', width=bar_width, edgecolor='grey',
label='Recall')
plt.bar(r3, f1_score, color='r', width=bar_width, edgecolor='grey',
label='F1-Score')
plt.bar(r4, accuracy, color='c', width=bar_width, edgecolor='grey',
label='Accuracy')
plt.xlabel('Models', fontweight='bold')
plt.ylabel('Scores', fontweight='bold')
plt.title('Comparative Performance of Models', fontweight='bold')
plt.xticks([r + bar_width for r in range(len(models))], models)
plt.legend()
plt.show()