0% found this document useful (0 votes)
17 views16 pages

Oral Cancer

Uploaded by

Hakuna Matata
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views16 pages

Oral Cancer

Uploaded by

Hakuna Matata
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 16

DEEP LEARNING - AD3511

ORAL CANCER DETECTION

DONE BY,
MIRUTHUNJAYA J
MONIKA S M
AMAL SUNFIYA M K
ORAL CANCER DETECTION

import numpy as np
import pandas as pd

dataset_path = "/kaggle/input/dataset/train"
categories = ["Normal", "OSCC"]

data = []

for category in categories:


folder_path = os.path.join(dataset_path, category)
label = category

for file_name in os.listdir(folder_path):


if file_name.endswith(('.png', '.jpg', '.jpeg')): file_path =
os.path.join(folder_path, file_name) data.append([file_path, label])

df = pd.DataFrame(data, columns=["file_path", "label"]) df.head()

file_path label 0 /kaggle/input/dataset/train/Normal/aug_506_590... Normal 1


/kaggle/input/dataset/train/Normal/aug_35_9199... Normal 2
/kaggle/input/dataset/train/Normal/aug_263_936... Normal 3
/kaggle/input/dataset/train/Normal/aug_432_454... Normal 4
/kaggle/input/dataset/train/Normal/aug_70_904.jpg Normal

df.tail()

file_path label 4941 /kaggle/input/dataset/train/OSCC/aug_373_318.jpg OSCC


4942 /kaggle/input/dataset/train/OSCC/aug_694_4500.jpg OSCC 4943
/kaggle/input/dataset/train/OSCC/OSCC_100x_224... OSCC 4944
/kaggle/input/dataset/train/OSCC/aug_36_9010.jpg OSCC 4945
/kaggle/input/dataset/train/OSCC/aug_322_6065.jpg OSCC

df.shape

(4946, 2)

df.columns

Index(['file_path', 'label'], dtype='object')

df.duplicated().sum()

df.isnull().sum()

file_path 0
label 0
dtype: int64

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4946 entries, 0 to 4945
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 file_path 4946 non-null object
1 label 4946 non-null object
dtypes: object(2)
memory usage: 77.4+ KB

df['label'].unique()

array(['Normal', 'OSCC'], dtype=object)

df['label'].value_counts()

label
OSCC 2511
Normal 2435
Name: count, dtype: int64

import seaborn as sns


import matplotlib.pyplot as plt
category_counts = df['label'].value_counts()

plt.figure(figsize=(8, 6))
sns.countplot(x='label', data=df, palette='viridis')
plt.title('Distribution of Images by Category', fontsize=16) plt.xlabel('Category',
fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.show()

plt.figure(figsize=(8, 6))
category_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=['skyblue',
'salmon'])
plt.title('Category Distribution', fontsize=16)
plt.ylabel('')
plt.show()
import cv2

num_samples = 5

fig, axes = plt.subplots(2, num_samples, figsize=(15, 6))

for i, category in enumerate(df['label'].unique()):

category_images = df[df['label'] == category].head(num_samples)


for j, image_path in enumerate(category_images['file_path']):
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

axes[i, j].imshow(img)
axes[i, j].axis('off')
if j == 0:
axes[i, j].set_ylabel(category, fontsize=14, rotation=90)

plt.tight_layout()
plt.show()
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df['Encoded_Label'] = label_encoder.fit_transform(df['label'])

label_mapping = dict(zip(label_encoder.classes_,
label_encoder.transform(label_encoder.classes_)))
print("Label Encoding Mapping:")
print(label_mapping)

Label Encoding Mapping:


{'Normal': 0, 'OSCC': 1}

import time
import shutil
import pathlib
import itertools
from PIL import Image

import cv2
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.preprocessing.image import ImageDataGenerator from


tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation,
Dropout, BatchNormalization
from tensorflow.keras import regularizers

import warnings
warnings.filterwarnings("ignore")

print ('check')

check

df['Encoded_Label'] = df['Encoded_Label'].astype(str) df.columns

Index(['file_path', 'label', 'Encoded_Label'], dtype='object') df = df[['file_path',

'Encoded_Label']]

train_df_new, temp_df_new = train_test_split(


df,
train_size=0.8,
shuffle=True,
random_state=42,
stratify=df['Encoded_Label']
)

valid_df_new, test_df_new = train_test_split(


temp_df_new,
test_size=0.5,
shuffle=True,
random_state=42,
stratify=temp_df_new['Encoded_Label']
)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

batch_size = 16
img_size = (299, 299)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)

train_gen_new = tr_gen.flow_from_dataframe(
df,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)

valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)

test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
Found 4946 validated image filenames belonging to 2 classes. Found 495
validated image filenames belonging to 2 classes. Found 495 validated image
filenames belonging to 2 classes.

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

physical_devices = tf.config.list_physical_devices('GPU') if physical_devices:


print("Using GPU")
else:
print("Using CPU")

Using GPU

early_stopping = EarlyStopping(monitor='val_loss', patience=5,


restore_best_weights=True)

from tensorflow.keras.applications import Xception


from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
GlobalAveragePooling2D, Dense, Dropout,
BatchNormalization, GaussianNoise, Input,
MultiHeadAttention, Reshape
)
from tensorflow.keras.optimizers import Adam

def create_xception_model(input_shape, num_classes=2,


learning_rate=0.0001):
inputs = Input(shape=input_shape, name="Input_Layer") base_model =
Xception(weights='imagenet', input_tensor=inputs, include_top=False)
base_model.trainable = False
x = base_model.output
height, width, channels = x.shape[1], x.shape[2], x.shape[3] x = Reshape((height *
width, channels),
name="Reshape_to_Sequence")(x)
attention_output = MultiHeadAttention(
num_heads=8, key_dim=channels, name="Multi_Head_Attention" )(x, x)
attention_output = Reshape((height, width, channels),
name="Reshape_to_Spatial")(attention_output)
x = GaussianNoise(0.25, name="Gaussian_Noise_1")(attention_output) x =
GlobalAveragePooling2D(name="Global_Avg_Pooling")(x) x = Dense(512, activation='relu',
name="FC_512")(x) x = BatchNormalization(name="Batch_Normalization")(x) x =
GaussianNoise(0.25, name="Gaussian_Noise_2")(x) x = Dropout(0.25, name="Dropout")(x)
outputs = Dense(1, activation='sigmoid', name="Output_Layer")(x) model =
Model(inputs=inputs, outputs=outputs,
name="Xception_with_Attention")
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy']
)
return model
input_shape = (299, 299, 3)
cnn_model = create_xception_model(input_shape, num_classes=2,
learning_rate=0.0001)

Downloading data from https://storage.googleapis.com/tensorflow/keras applications/xception/


xception_weights_tf_dim_ordering_tf_kernels_notop.h5
83683744/83683744 ━━━━━━━━━━━━━━━━━━━━ 3s 0us/step

history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)

Epoch 1/5

WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1732441776.854039 106 service.cc:145] XLA service 0x7aa5f1096c20 initialized
for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732441776.854099 106 service.cc:153] StreamExecutor device (0): Tesla T4,
Compute Capability 7.5
I0000 00:00:1732441776.854104 106 service.cc:153] StreamExecutor device (1): Tesla T4,
Compute Capability 7.5
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1732441786.817904 134 asm_compiler.cc:369] ptxas warning : Registers are
spilled to local memory in function 'triton_gemm_dot_75', 228 bytes spill stores, 228 bytes
spill loads

I0000 00:00:1732441787.388886 136 asm_compiler.cc:369] ptxas warning : Registers are


spilled to local memory in function 'triton_gemm_dot_1', 1048 bytes spill stores, 1044 bytes
spill loads

I0000 00:00:1732441809.444929 106 device_compiler.h:188] Compiled cluster using XLA!


This line is logged at most once for the lifetime of the process.

143/310 ━━━━━━━━━━━━━━━━━━━━ 1:05 390ms/step - accuracy:


0.7248 - loss: 0.6269

I0000 00:00:1732441875.353624 172 asm_compiler.cc:369] ptxas warning : Registers are


spilled to local memory in function 'triton_gemm_dot_75', 228 bytes spill stores, 228 bytes
spill loads

I0000 00:00:1732441875.648364 170 asm_compiler.cc:369] ptxas warning : Registers are


spilled to local memory in function 'triton_gemm_dot_1', 1048 bytes spill stores, 1044 bytes
spill loads

310/310 ━━━━━━━━━━━━━━━━━━━━ 0s 470ms/step - accuracy: 0.7519 -


loss: 0.5595

I0000 00:00:1732441971.309592 206 asm_compiler.cc:369] ptxas warning : Registers are


spilled to local memory in function 'triton_gemm_dot_1', 1048 bytes spill stores, 1044 bytes
spill loads
I0000 00:00:1732441973.039498 207 asm_compiler.cc:369] ptxas warning : Registers
are spilled to local memory in function 'triton_gemm_dot_7', 228 bytes spill stores, 228
bytes spill loads
310/310 ━━━━━━━━━━━━━━━━━━━━ 220s 577ms/step - accuracy:
0.7520 - loss: 0.5592 - val_accuracy: 0.8061 - val_loss: 0.4536
Epoch 2/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 137s 438ms/step - accuracy:
0.8478 - loss: 0.3550 - val_accuracy: 0.8990 - val_loss: 0.2516
Epoch 3/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 137s 435ms/step - accuracy:
0.8802 - loss: 0.2863 - val_accuracy: 0.9212 - val_loss: 0.1860
Epoch 4/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 137s 434ms/step - accuracy:
0.9030 - loss: 0.2368 - val_accuracy: 0.9556 - val_loss: 0.1198
Epoch 5/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 432ms/step - accuracy:
0.8988 - loss: 0.2392 - val_accuracy: 0.9535 - val_loss: 0.1314

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
report = classification_report(test_labels, predicted_classes,
target_names=list(test_gen_new.class_indices.keys()))
print(report)

precision recall f1-score support

0 0.93 0.95 0.94 244


1 0.96 0.93 0.94 251

accuracy 0.94 495


macro avg 0.94 0.94 0.94 495
weighted avg 0.94 0.94 0.94 495

conf_matrix = confusion_matrix(test_labels, predicted_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
GlobalAveragePooling2D, Dense, Dropout,
BatchNormalization, GaussianNoise, Input,
MultiHeadAttention, Reshape
)
from tensorflow.keras.optimizers import Adam

def create_mobilenet_model(input_shape, num_classes=2,


learning_rate=0.0001):
inputs = Input(shape=input_shape, name="Input_Layer") base_model =
MobileNet(weights='imagenet', input_tensor=inputs, include_top=False)
base_model.trainable = False
x = base_model.output

height, width, channels = x.shape[1], x.shape[2], x.shape[3] x = Reshape((height *


width, channels),
name="Reshape_to_Sequence")(x)
attention_output = MultiHeadAttention(
num_heads=8, key_dim=channels, name="Multi_Head_Attention" )(x, x)
attention_output = Reshape((height, width, channels),
name="Reshape_to_Spatial")(attention_output)
x = GaussianNoise(0.25, name="Gaussian_Noise_1")(attention_output) x =
GlobalAveragePooling2D(name="Global_Avg_Pooling")(x) x = Dense(512, activation='relu',
name="FC_512")(x) x = BatchNormalization(name="Batch_Normalization")(x) x =
GaussianNoise(0.25, name="Gaussian_Noise_2")(x) x = Dropout(0.25, name="Dropout")(x)
outputs = Dense(1, activation='sigmoid', name="Output_Layer")(x) model =
Model(inputs=inputs, outputs=outputs,
name="MobileNet_with_Attention")
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy']
)
return model

input_shape = (224, 224, 3)


mobilenet_model = create_mobilenet_model(input_shape, num_classes=2,
learning_rate=0.0001)

Downloading data from https://storage.googleapis.com/tensorflow/keras


applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5 17225924/17225924
━━━━━━━━━━━━━━━━━━━━ 1s 0us/step

history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)

Epoch 1/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 138s 438ms/step - accuracy:
0.9235 - loss: 0.1907 - val_accuracy: 0.7556 - val_loss: 0.8188 Epoch 2/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 436ms/step - accuracy:
0.9229 - loss: 0.1967 - val_accuracy: 0.9616 - val_loss: 0.1058 Epoch 3/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 135s 431ms/step - accuracy:
0.9403 - loss: 0.1481 - val_accuracy: 0.8808 - val_loss: 0.3841 Epoch 4/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 434ms/step - accuracy:
0.9528 - loss: 0.1146 - val_accuracy: 0.9717 - val_loss: 0.0748
Epoch 5/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 434ms/step - accuracy:
0.9659 - loss: 0.0909 - val_accuracy: 0.9737 - val_loss: 0.0659

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
report = classification_report(test_labels, predicted_classes,
target_names=list(test_gen_new.class_indices.keys()))
print(report)

precision recall f1-score support

0 1.00 0.95 0.97 244


1 0.95 1.00 0.98 251

accuracy 0.98 495


macro avg 0.98 0.98 0.98 495
weighted avg 0.98 0.98 0.98 495

conf_matrix = confusion_matrix(test_labels, predicted_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

You might also like