I am trying to train a 3D CNN model for classification of cancer stages on a dataset that comprises of head to neck CT image series which is split into 5 classes corresponding to the stages of cancer.Within each stage. there are folders corresponding to each patient, each consisting of 120 frames of CT image series. I want to feed an image cube into the model to account for the spatial resolution and the resolution in depth and classify the image cube into one of the five classes.
The cancer is present in approximately 10 frames out of the 120 images.
I passed the entire set of images(120 per patient) as a 3D image cube after applying normalization to the data into a 3D convolution model ,structure for which is given below) :
num_classes = 5
model = Sequential([
tf.keras.Input(shape=(255, 255, 120, 1)),
# Convolutional layer 1
Conv3D(16, (3, 3, 3), activation='relu'),
BatchNormalization(),
MaxPooling3D((2, 2, 1), strides=(2, 2, 1), padding="same"),
# Convolutional layer 2
Conv3D(32, (3, 3, 3), activation='relu'),
BatchNormalization(),
MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding="same"),
# Convolutional layer 3
Conv3D(32, (3, 3, 3), activation='relu'),
BatchNormalization(),
MaxPooling3D((2, 2, 2), strides=(2, 2, 2),padding="same"),
# Convolutional layer 4
Conv3D(64, (3, 3, 3), activation='relu'),
BatchNormalization(),
MaxPooling3D((2, 2, 2), strides=(2, 2, 2),padding="same"),
# Convolutional layer 5
Conv3D(128, (3, 3, 3), activation='relu'),
BatchNormalization(),
# Convolutional layer 6
Conv3D(128, (3, 3, 3), activation='relu'),
BatchNormalization(),
MaxPooling3D((2, 2, 2), strides=(2, 2, 2),padding="same"),
#Dropout(0.25),
# Flatten layer
Flatten(),
# Dense layer 1
Dense(256, activation='relu', kernel_initializer = 'glorot_uniform', kernel_regularizer=tf.keras.regularizers.L2(0.01)),
BatchNormalization(),
#Dropout(0.35),
# Dense layer 2
Dense(128, activation='relu', kernel_initializer = 'glorot_uniform', kernel_regularizer=tf.keras.regularizers.L2(0.01)),
BatchNormalization(),
#Dropout(0.25),
# Output layer
Dense(num_classes, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.000001), loss='categorical_crossentropy', metrics=['accuracy'])
This is how I loaded the DICOM images into input(X) and output(Y) labels :
def load_dicom_images(folder_path):
images = []
for file in sorted(os.listdir(folder_path)):
ds = pydicom.dcmread(os.path.join(folder_path, file))
# Convert to grayscale image and resize to 255x255
image = ds.pixel_array
image = cv2.resize(image, (255, 255))
#Normalize the images
normalized_images= (image.astype(np.float32)-image.mean())/image.std()
images.append(normalized_images)
# Convert list to numpy array
images = np.array(images)
return images
def load_data(stage_folder): #Stage folder contains the five 5 folders pertaining to the classes
X = []
y = []
# Map stage to label
stage_to_label = {'Stage I': 0, 'Stage II': 1, 'Stage III': 2, 'Stage IVA': 3, 'Stage IVB': 4 }
for stage in os.listdir(stage_folder):
stage_path = os.path.join(stage_folder, stage)
label = stage_to_label[stage]
for patient_id in os.listdir(stage_path):
patient_folder = os.path.join(stage_path, patient_id)
selected_images = load_dicom_images(patient_folder)
X.append(selected_images)
y.append(label)
X = np.array(X)
y = np.array(y)
# Convert y to categorical (one-hot encoding)
y = to_categorical(y, num_classes=5)
return X, y
history = model.fit(X_train, y_train, batch_size=1, epochs=50, validation_data=(X_test, y_test), verbose = True, callbacks=callbacks)
This gives low training accuracies(21%) and validation accuracies, and validation loss increasing with each epoch. I have reshaped the data according to the input to my CNN.
Do I need to further process my data and only include the cancerous frames by filtering out the rest, or should I include the whole data to preserve the resolution in depth and look for a different approach to increase the accuracy?