Oral Cancer
Oral Cancer
DONE BY,
MIRUTHUNJAYA J
MONIKA S M
AMAL SUNFIYA M K
ORAL CANCER DETECTION
import numpy as np
import pandas as pd
dataset_path = "/kaggle/input/dataset/train"
categories = ["Normal", "OSCC"]
data = []
df.tail()
df.shape
(4946, 2)
df.columns
df.duplicated().sum()
df.isnull().sum()
file_path 0
label 0
dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4946 entries, 0 to 4945
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 file_path 4946 non-null object
1 label 4946 non-null object
dtypes: object(2)
memory usage: 77.4+ KB
df['label'].unique()
df['label'].value_counts()
label
OSCC 2511
Normal 2435
Name: count, dtype: int64
plt.figure(figsize=(8, 6))
sns.countplot(x='label', data=df, palette='viridis')
plt.title('Distribution of Images by Category', fontsize=16) plt.xlabel('Category',
fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.show()
plt.figure(figsize=(8, 6))
category_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=['skyblue',
'salmon'])
plt.title('Category Distribution', fontsize=16)
plt.ylabel('')
plt.show()
import cv2
num_samples = 5
axes[i, j].imshow(img)
axes[i, j].axis('off')
if j == 0:
axes[i, j].set_ylabel(category, fontsize=14, rotation=90)
plt.tight_layout()
plt.show()
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Encoded_Label'] = label_encoder.fit_transform(df['label'])
label_mapping = dict(zip(label_encoder.classes_,
label_encoder.transform(label_encoder.classes_)))
print("Label Encoding Mapping:")
print(label_mapping)
import time
import shutil
import pathlib
import itertools
from PIL import Image
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")
print ('check')
check
'Encoded_Label']]
batch_size = 16
img_size = (299, 299)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_gen_new = tr_gen.flow_from_dataframe(
df,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='file_path',
y_col='Encoded_Label',
target_size=img_size,
class_mode='binary',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
Found 4946 validated image filenames belonging to 2 classes. Found 495
validated image filenames belonging to 2 classes. Found 495 validated image
filenames belonging to 2 classes.
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
Using GPU
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/5
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1732441776.854039 106 service.cc:145] XLA service 0x7aa5f1096c20 initialized
for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732441776.854099 106 service.cc:153] StreamExecutor device (0): Tesla T4,
Compute Capability 7.5
I0000 00:00:1732441776.854104 106 service.cc:153] StreamExecutor device (1): Tesla T4,
Compute Capability 7.5
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1732441786.817904 134 asm_compiler.cc:369] ptxas warning : Registers are
spilled to local memory in function 'triton_gemm_dot_75', 228 bytes spill stores, 228 bytes
spill loads
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
report = classification_report(test_labels, predicted_classes,
target_names=list(test_gen_new.class_indices.keys()))
print(report)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
GlobalAveragePooling2D, Dense, Dropout,
BatchNormalization, GaussianNoise, Input,
MultiHeadAttention, Reshape
)
from tensorflow.keras.optimizers import Adam
history = cnn_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 138s 438ms/step - accuracy:
0.9235 - loss: 0.1907 - val_accuracy: 0.7556 - val_loss: 0.8188 Epoch 2/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 436ms/step - accuracy:
0.9229 - loss: 0.1967 - val_accuracy: 0.9616 - val_loss: 0.1058 Epoch 3/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 135s 431ms/step - accuracy:
0.9403 - loss: 0.1481 - val_accuracy: 0.8808 - val_loss: 0.3841 Epoch 4/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 434ms/step - accuracy:
0.9528 - loss: 0.1146 - val_accuracy: 0.9717 - val_loss: 0.0748
Epoch 5/5
310/310 ━━━━━━━━━━━━━━━━━━━━ 136s 434ms/step - accuracy:
0.9659 - loss: 0.0909 - val_accuracy: 0.9737 - val_loss: 0.0659
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
report = classification_report(test_labels, predicted_classes,
target_names=list(test_gen_new.class_indices.keys()))
print(report)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()