diff --git a/AllCode.ipynb b/AllCode.ipynb deleted file mode 100644 index 8fa09a8..0000000 --- a/AllCode.ipynb +++ /dev/null @@ -1,1372 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70c99e47", - "metadata": {}, - "source": [ - "# Importing Libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04b8ba2b", - "metadata": {}, - "outputs": [], - "source": [ - "# Importing libraries essential for the project\n", - "import os\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import skimage.io\n", - "from skimage.transform import resize\n", - "from imgaug import augmenters as iaa\n", - "from tqdm import tqdm\n", - "import PIL\n", - "from PIL import Image, ImageOps\n", - "import cv2\n", - "import random\n", - "from keras.models import Sequential\n", - "from keras.layers import Dense, Flatten\n", - "from keras.applications.densenet import DenseNet121\n", - "import warnings\n", - "import seaborn as sns\n", - "from sklearn.utils import class_weight, shuffle\n", - "from keras.losses import binary_crossentropy\n", - "from keras.preprocessing.image import ImageDataGenerator\n", - "from keras.applications.resnet import preprocess_input\n", - "import keras.backend as K\n", - "import tensorflow as tf\n", - "from sklearn.metrics import f1_score, fbeta_score\n", - "from keras.utils import Sequence, to_categorical\n", - "from sklearn.model_selection import train_test_split\n", - "from keras import optimizers, applications\n", - "from keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input\n", - "from keras.models import Model, load_model\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.metrics import confusion_matrix\n", - "from itertools import product\n", - "from keras.optimizers import Adam\n", - "import keras.optimizers as optimizers\n", - "from sklearn.model_selection import KFold\n", - "from tensorflow.keras.models import load_model\n", - "from keras.callbacks import EarlyStopping, ReduceLROnPlateau\n", - "from tensorflow.keras.applications.xception import Xception\n", - "from keras.applications import EfficientNetB6\n", - "warnings.filterwarnings('ignore')" - ] - }, - { - "cell_type": "markdown", - "id": "8872b6ea", - "metadata": {}, - "source": [ - "# EDA & Pre-Processing" - ] - }, - { - "cell_type": "markdown", - "id": "5f7e3851", - "metadata": {}, - "source": [ - "### Get the train and test data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfb3cc77", - "metadata": {}, - "outputs": [], - "source": [ - "# Import pandas library to read CSV files\n", - "import pandas as pd \n", - "\n", - "# Load the training and test data CSV files into Pandas DataFrames\n", - "df_train = pd.read_csv('train.csv')\n", - "df_test = pd.read_csv('test.csv')\n", - "\n", - "# Extract the 'id_code' and 'diagnosis' columns from the training data\n", - "x = df_train['id_code']\n", - "y = df_train['diagnosis']\n", - "\n", - "# Randomly shuffle the order of the 'id_code' and 'diagnosis' columns in the training data, using a fixed seed for reproducibility\n", - "x, y = shuffle(x, y, random_state=123)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ca0dbe6e", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to plot the count of each category in a given set of data\n", - "def plot_category_counts(x, y):\n", - " # Calculate the number of images in each category\n", - " counts = y.value_counts()\n", - " # Set a list of colors for the bars in the bar chart\n", - " colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']\n", - "\n", - " # Plot a bar chart of the category counts using the Matplotlib library\n", - " plt.bar(counts.index, counts.values, color=colors)\n", - " # Add a title to the plot\n", - " plt.title(\"Number of Images in Each Category\", fontsize=13, fontweight='bold')\n", - " # Add a label to the x-axis\n", - " plt.xlabel(\"Category\", fontsize=11)\n", - " # Add a label to the y-axis\n", - " plt.ylabel(\"Count\", fontsize=11)\n", - " # Display the plot\n", - " plt.show()\n", - "\n", - "# Call the plot_category_counts function with the input x and y variables to plot the category counts\n", - "plot_category_counts(x, y)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c61a8183", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to check for duplicate image labels and filenames\n", - "def check_duplicates(train_path, test_path, train_img_dir, test_img_dir):\n", - " # Load the train and test data CSV files into Pandas DataFrames\n", - " df_train = pd.read_csv(train_path)\n", - " df_test = pd.read_csv(test_path)\n", - " \n", - " # Check for duplicate image labels in the train and test data\n", - " duplicates_train = df_train.duplicated()\n", - " print(\"Number of duplicate train image labels:\", duplicates_train.sum())\n", - "\n", - " duplicates_test = df_test.duplicated()\n", - " print(\"Number of duplicate test image labels:\", duplicates_test.sum())\n", - "\n", - " # Check for duplicate image filenames in the train and test image directories\n", - " train_img_files = os.listdir(train_img_dir)\n", - " train_duplicates = len(train_img_files) != len(set(train_img_files))\n", - "\n", - " test_img_files = os.listdir(test_img_dir)\n", - " test_duplicates = len(test_img_files) != len(set(test_img_files))\n", - "\n", - " # If there are duplicate image filenames, print a warning message\n", - " if train_duplicates or test_duplicates:\n", - " print(\"There are duplicate image names in the directory.\")\n", - " # If there are no duplicate image filenames, print a confirmation message\n", - " else:\n", - " print(\"\\nThere are no duplicate image names in the directory.\")\n", - "\n", - "# Call the check_duplicates function with the specified input variables\n", - "check_duplicates('train.csv',\n", - " 'test.csv',\n", - " 'train_images/',\n", - " 'test_images/')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2b907254", - "metadata": {}, - "source": [ - "### Under-Sampling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a679ebe", - "metadata": {}, - "outputs": [], - "source": [ - "# Select a random sample of indices for each of the four categories with non-zero diagnosis values\n", - "idx_0 = df_train[df_train['diagnosis'] == 0].index.tolist()\n", - "idx_0_selected = np.random.choice(idx_0, size=193, replace=False)\n", - "\n", - "idx_1 = df_train[df_train['diagnosis'] == 1].index.tolist()\n", - "idx_1_selected = np.random.choice(idx_1, size=193, replace=False)\n", - "\n", - "idx_2 = df_train[df_train['diagnosis'] == 2].index.tolist()\n", - "idx_2_selected = np.random.choice(idx_2, size=193, replace=False)\n", - "\n", - "idx_4 = df_train[df_train['diagnosis'] == 4].index.tolist()\n", - "idx_4_selected = np.random.choice(idx_4, size=193, replace=False)\n", - "\n", - "# Combine the selected indices for each category into a single list\n", - "idx_selected = list(idx_0_selected) + list(idx_1_selected) + list(idx_2_selected) + list(idx_4_selected)\n", - "\n", - "# Append any remaining indices with non-zero diagnosis values to the list of selected indices\n", - "idx_selected += df_train[(df_train['diagnosis'] != 0) & (df_train['diagnosis'] != 1) & (df_train['diagnosis'] != 2) & (df_train['diagnosis'] != 4)].index.tolist()\n", - "\n", - "# Subset the DataFrame using the selected indices and shuffle the rows\n", - "df_train = df_train.iloc[idx_selected]\n", - "df_train = shuffle(df_train, random_state=123)\n", - "\n", - "# Extract the 'id_code' and 'diagnosis' columns from the training data\n", - "x = df_train['id_code']\n", - "y = df_train['diagnosis']\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a54a708b", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the count of each category in the 'y' variable\n", - "counts = y.value_counts()\n", - "\n", - "# Define a list of colors for the bars in the bar chart\n", - "colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']\n", - "\n", - "# Plot a bar chart of the category counts using the Matplotlib library\n", - "plt.bar(counts.index, counts.values, color=colors)\n", - "# Add a title to the plot\n", - "plt.title(\"Number of Images in Each Category\")\n", - "# Add a label to the x-axis\n", - "plt.xlabel(\"Category\")\n", - "# Add a label to the y-axis\n", - "plt.ylabel(\"Count\")\n", - "# Display the plot\n", - "plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a4c3a9e5", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a figure with 1 row and 4 columns, and a size of 20 x 4 inches\n", - "fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(20, 4))\n", - "\n", - "# Define a list of image paths to display\n", - "img_paths = ['train_images/1df0a4c23c95.png', \n", - " 'train_images/0a1076183736.png',\n", - " 'train_images/0e3572b5884a.png', \n", - " 'train_images/698d6e422a80.png'\n", - " ]\n", - "\n", - "# Loop over the image paths and display each image in a separate subplot\n", - "for i, img_path in enumerate(img_paths):\n", - " # Read the image file using OpenCV and convert it from BGR to RGB color format\n", - " img = cv2.imread(img_path)\n", - " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", - " \n", - " # Display the image in a subplot and turn off the axis labels\n", - " ax[i].imshow(img)\n", - " ax[i].axis('off')\n", - "\n", - "# Display the plot on the screen\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "57d61762", - "metadata": {}, - "source": [ - "### Cropping the Images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40dd129b", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to crop an input image and remove any gray background pixels\n", - "def crop_image_from_gray(img, tol=7):\n", - " # Convert the input image to grayscale\n", - " gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", - " # Create a mask of all pixels in the image with intensity greater than 'tol'\n", - " mask = gray_img > tol\n", - " # Find the indices of the rows and columns with non-zero values in the mask\n", - " indices = np.ix_(mask.any(1), mask.any(0))\n", - " # If there are no non-zero rows or columns in the mask, return the original image\n", - " if indices[0].size == 0 or indices[1].size == 0:\n", - " return img\n", - " # Crop the image to the indices of the non-zero rows and columns in the mask\n", - " cropped = img[indices]\n", - " # Return the cropped image\n", - " return cropped\n", - "\n", - "# Define a function to crop an input image to a circular shape\n", - "def circle_crop(img_path):\n", - " # Read the input image file using OpenCV\n", - " img = cv2.imread(img_path)\n", - " # Crop the input image to remove any gray background pixels\n", - " cropped = crop_image_from_gray(img)\n", - " # Get the height, width, and number of channels in the cropped image\n", - " height, width, _ = cropped.shape\n", - " # Find the center point of the cropped image\n", - " center = (width // 2, height // 2)\n", - " # Find the radius of the circle to crop around (set to the minimum of the width and height)\n", - " radius = min(center)\n", - " # Create a mask of all pixels in the image within the specified radius\n", - " circle_mask = np.zeros((height, width), dtype=np.uint8)\n", - " cv2.circle(circle_mask, center, radius, 1, thickness=-1)\n", - " # Apply the mask to the cropped image to keep only the circular region\n", - " masked = cv2.bitwise_and(cropped, cropped, mask=circle_mask)\n", - " # Crop the circular image again to remove any remaining gray background pixels\n", - " cropped = crop_image_from_gray(masked)\n", - " # Return the final cropped image\n", - " return cropped\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6f49288", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to display example images from each class in the input DataFrame\n", - "def display_images(df, img_dir):\n", - " # Create a figure with 1 row and 5 columns, and a size of 20 x 4 inches\n", - " fig, axs = plt.subplots(1, 5, figsize=(20,4))\n", - " # Loop over the 5 classes of diabetic retinopathy\n", - " for i in range(5):\n", - " # Sample one image at random from the input DataFrame with a diagnosis of i\n", - " class_image = df[df['diagnosis']==i].sample(1, random_state=123)['id_code'].values[0]\n", - " # Construct the full file path for the sampled image\n", - " img_path = os.path.join(img_dir, f\"{class_image}.png\")\n", - " # Crop the image to a circular shape using the circle_crop function\n", - " cropped_img = circle_crop(img_path)\n", - " # Convert the cropped image from BGR to RGB color format\n", - " cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)\n", - " # Display the cropped image in a subplot and add a title and axis labels\n", - " axs[i].imshow(cropped_img)\n", - " axs[i].set_title(f\"Class {i}\")\n", - " axs[i].axis('off')\n", - " # Display the plot on the screen\n", - " plt.show()\n", - "\n", - "# Call the display_images function with the input train DataFrame and the directory containing the train images\n", - "display_images(df_train, 'train_images/')\n" - ] - }, - { - "cell_type": "markdown", - "id": "cd386844", - "metadata": {}, - "source": [ - "## Image Transformation & Feature Segmentation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3dfcefcf", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to display example grayscale images from each class in the input DataFrame\n", - "def display_images(df, img_dir):\n", - " # Create a figure with 1 row and 5 columns, and a size of 20 x 4 inches\n", - " fig, axs = plt.subplots(1, 5, figsize=(20,4))\n", - " # Loop over the 5 classes of diabetic retinopathy\n", - " for i in range(5):\n", - " # Sample one image at random from the input DataFrame with a diagnosis of i\n", - " class_image = df[df['diagnosis']==i].sample(1, random_state=123)['id_code'].values[0]\n", - " # Construct the full file path for the sampled image\n", - " img_path = os.path.join(img_dir, f\"{class_image}.png\")\n", - " # Crop the image to a circular shape using the circle_crop function\n", - " cropped_img = circle_crop(img_path)\n", - " # Convert the cropped image from BGR to grayscale color format\n", - " cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)\n", - " # Display the cropped image in a subplot and add a title and axis labels\n", - " axs[i].imshow(cropped_img, cmap='gray')\n", - " axs[i].set_title(f\"Class {i}\")\n", - " axs[i].axis('off')\n", - " # Display the plot on the screen\n", - " plt.show()\n", - "\n", - "# Call the display_images function with the input train DataFrame and the directory containing the train images\n", - "display_images(df_train, 'train_images/')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b4ffb2b", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Define a function to display example preprocessed grayscale images from each class in the input DataFrame\n", - "def display_images(df, img_dir):\n", - " # Create a figure with 1 row and 5 columns, and a size of 20 x 4 inches\n", - " fig, axs = plt.subplots(1, 5, figsize=(20,4))\n", - " # Loop over the 5 classes of diabetic retinopathy\n", - " for i in range(5):\n", - " # Sample one image at random from the input DataFrame with a diagnosis of i\n", - " class_image = df[df['diagnosis']==i].sample(1, random_state=123)['id_code'].values[0]\n", - " # Construct the full file path for the sampled image\n", - " img_path = os.path.join(img_dir, f\"{class_image}.png\")\n", - " # Crop the image to a circular shape using the circle_crop function\n", - " cropped_img = circle_crop(img_path)\n", - " # Convert the cropped image from BGR to grayscale color format\n", - " cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)\n", - " # Apply local histogram equalization to enhance contrast\n", - " clahe = cv2.createCLAHE(clipLimit=3.6, tileGridSize=(8,8))\n", - " cropped_img = clahe.apply(cropped_img)\n", - " # Rescale image intensity to enhance contrast\n", - " p2, p98 = np.percentile(cropped_img, (2, 98))\n", - " cropped_img = skimage.exposure.rescale_intensity(cropped_img, in_range=(p2, p98))\n", - " # Display the preprocessed grayscale image in a subplot and add a title and axis labels\n", - " axs[i].imshow(cropped_img, cmap='gray')\n", - " axs[i].set_title(f\"Class {i}\")\n", - " axs[i].axis('off')\n", - " # Display the plot on the screen\n", - " plt.show()\n", - "\n", - "# Call the display_images function with the input train DataFrame and the directory containing the train images\n", - "display_images(df_train, 'train_images/')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d58447a", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Set the IMG_SIZE constant to 256\n", - "IMG_SIZE = 256\n", - "\n", - "# Check if the preprocessed_images directory exists and create it if it doesn't\n", - "if not os.path.exists('preprocessed_images'):\n", - " os.makedirs('preprocessed_images')\n", - "\n", - "# Loop over each row in the input train DataFrame using the tqdm function to display a progress bar\n", - "for index, row in tqdm(df_train.iterrows(), total=len(df_train)):\n", - " # Construct the output file path for the preprocessed image\n", - " output_path = f\"preprocessed_images/{row['id_code']}.png\"\n", - " \n", - " # Check if the preprocessed image already exists, and skip to the next row if it does\n", - " if os.path.exists(output_path):\n", - " continue\n", - " \n", - " # Preprocess the image by first reading it from the input train image directory\n", - " path=f\"train_images/{row['id_code']}.png\"\n", - " image = circle_crop(path)\n", - " # Convert the image from BGR to grayscale color format using the cvtColor function from OpenCV\n", - " image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", - " # Apply local histogram equalization to enhance contrast using the createCLAHE function from OpenCV\n", - " clahe = cv2.createCLAHE(clipLimit=3.6, tileGridSize=(8,8))\n", - " image = clahe.apply(image)\n", - " # Rescale image intensity to enhance contrast using the rescale_intensity function from the scikit-image library\n", - " p2, p98 = np.percentile(image, (2, 98))\n", - " image = skimage.exposure.rescale_intensity(image, in_range=(p2, p98))\n", - " # Resize the image to IMG_SIZE x IMG_SIZE using the resize function from OpenCV and save the preprocessed image to disk using the imwrite function from OpenCV\n", - " image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))\n", - " cv2.imwrite(output_path, image)\n", - "\n", - "# Print a message to indicate that all of the training images have been successfully preprocessed\n", - "print(\"All of the training images have been successfully preprocessed!\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "26ea474e", - "metadata": {}, - "outputs": [], - "source": [ - "# create directory to store preprocessed images\n", - "if not os.path.exists('preprocessed_test_images'):\n", - " os.makedirs('preprocessed_test_images')\n", - "\n", - "# loop through each image in the test dataset and preprocess it\n", - "for index, row in tqdm(df_test.iterrows(), total=len(df_test)):\n", - " # construct output file path for preprocessed image\n", - " output_path = f\"preprocessed_test_images/{row['id_code']}.png\"\n", - " \n", - " # check if preprocessed image already exists in directory, and skip if it does\n", - " if os.path.exists(output_path):\n", - " continue\n", - " \n", - " # load image from 'test_images' directory and apply circle cropping and grayscale conversion\n", - " path=f\"test_images/{row['id_code']}.png\"\n", - " image = circle_crop(path)\n", - " image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", - "\n", - " # apply local histogram equalization to enhance contrast\n", - " clahe = cv2.createCLAHE(clipLimit=3.6, tileGridSize=(8,8))\n", - " image = clahe.apply(image)\n", - "\n", - " # rescale intensity of image\n", - " p2, p98 = np.percentile(image, (2, 98))\n", - " image = skimage.exposure.rescale_intensity(image, in_range=(p2, p98))\n", - "\n", - " # resize and save preprocessed image to 'preprocessed_test_images' directory\n", - " image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))\n", - " cv2.imwrite(output_path, image)\n", - "\n", - "# print message indicating that pre-processing is complete\n", - "print(\"All of the test images Sucessfully Pre-processed!\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "9002a985", - "metadata": {}, - "source": [ - "## Data Augmentation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "71d7fff3", - "metadata": {}, - "outputs": [], - "source": [ - "# Defining a training data generator with various image transformations\n", - "train_datagen = ImageDataGenerator(\n", - " # Rescaling the pixel values of the images to be between 0 and 1\n", - " rescale=1./255,\n", - " # Splitting the training data into 80% for training and 20% for validation\n", - " validation_split=0.2,\n", - " # Randomly rotating the images by up to 20 degrees\n", - " rotation_range=20,\n", - " # Randomly shifting the images horizontally by up to 20% of the image width\n", - " width_shift_range=0.2,\n", - " # Randomly shifting the images vertically by up to 20% of the image height\n", - " height_shift_range=0.2,\n", - " # Randomly applying shearing transformations to the images\n", - " shear_range=0.2,\n", - " # Randomly zooming in on the images by up to 20%\n", - " zoom_range=0.2,\n", - " # Flipping the images horizontally\n", - " horizontal_flip=True,\n", - " # Flipping the images vertically\n", - " vertical_flip=True\n", - ")\n", - "\n", - "# Defining a test data generator with only rescaling as an image transformation\n", - "test_datagen = ImageDataGenerator(\n", - " # Rescaling the pixel values of the images to be between 0 and 1\n", - " rescale=1./255\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "c61c8b0d", - "metadata": {}, - "source": [ - "# Building Artifical Neural Network Models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50354324", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculating the number of samples to include in the test set\n", - "n = round(df_train.shape[0] * 0.3)\n", - "\n", - "# Sampling n rows randomly from the test set using a fixed random state for reproducibility\n", - "df_test = df_test.sample(n=n, random_state=123)\n", - "\n", - "# Printing the number of samples in the train and test sets\n", - "print('Number of train samples: ', df_train.shape[0])\n", - "print('Number of test samples: ', df_test.shape[0])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "149e2df2", - "metadata": {}, - "outputs": [], - "source": [ - "# Appending the file extension '.png' to the 'id_code' column in the training and test datasets\n", - "df_train[\"id_code\"] = df_train[\"id_code\"].apply(lambda x: x + \".png\")\n", - "df_test[\"id_code\"] = df_test[\"id_code\"].apply(lambda x: x + \".png\")\n", - "\n", - "# Converting the 'diagnosis' column in the training dataset to a string data type\n", - "df_train['diagnosis'] = df_train['diagnosis'].astype('str')\n" - ] - }, - { - "cell_type": "markdown", - "id": "29d9f4d5", - "metadata": {}, - "source": [ - "## Data Generators" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4502f597", - "metadata": {}, - "outputs": [], - "source": [ - "# Defining the batch size for the training and validation data generators\n", - "BATCH_SIZE = 2\n", - "\n", - "# Defining the height and width of the images to be fed into the neural network\n", - "HEIGHT = 224\n", - "WIDTH = 224\n", - "\n", - "# Defining the number of color channels in the images (3 for RGB)\n", - "CANAL = 3\n", - "\n", - "# Determining the number of unique classes in the 'diagnosis' column of the training dataset\n", - "N_CLASSES = df_train['diagnosis'].nunique()\n", - "\n", - "# Defining the patience for early stopping in the model training process\n", - "ES_PATIENCE = 5\n", - "\n", - "# Defining the patience for reducing the learning rate on plateau in the model training process\n", - "RLROP_PATIENCE = 3\n", - "\n", - "# Defining the drop factor for reducing the learning rate in the model training process\n", - "DECAY_DROP = 0.5\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09305028", - "metadata": {}, - "outputs": [], - "source": [ - "# Creating a training data generator that augments the training data in real time\n", - "train_generator=train_datagen.flow_from_dataframe(\n", - " dataframe=df_train, # The dataframe containing the image filenames and labels\n", - " directory=\"C:/Users/CodeCyborg/Desktop/preprocessed_images/\", # The directory where the images are stored\n", - " x_col=\"id_code\", # The name of the column containing the image filenames\n", - " y_col=\"diagnosis\", # The name of the column containing the image labels\n", - " batch_size=BATCH_SIZE, # The number of images to include in each batch\n", - " class_mode=\"categorical\", # The type of labels to use (categorical for multiclass classification)\n", - " target_size=(HEIGHT, WIDTH), # The dimensions to resize the images to\n", - " subset='training' # Whether to use a subset of the data (in this case, the training set)\n", - ")\n", - "\n", - "# Creating a validation data generator that augments the validation data in real time\n", - "valid_generator=train_datagen.flow_from_dataframe(\n", - " dataframe=df_train, # The dataframe containing the image filenames and labels\n", - " directory=\"C:/Users/CodeCyborg/Desktop/preprocessed_images/\", # The directory where the images are stored\n", - " x_col=\"id_code\", # The name of the column containing the image filenames\n", - " y_col=\"diagnosis\", # The name of the column containing the image labels\n", - " batch_size=BATCH_SIZE, # The number of images to include in each batch\n", - " class_mode=\"categorical\", # The type of labels to use (categorical for multiclass classification)\n", - " target_size=(HEIGHT, WIDTH), # The dimensions to resize the images to\n", - " subset='validation' # Whether to use a subset of the data (in this case, the validation set)\n", - ")\n", - "\n", - "# Creating a test data generator that does not augment the test data\n", - "test_generator = test_datagen.flow_from_dataframe( \n", - " dataframe=df_test, # The dataframe containing the image filenames\n", - " directory = \"C:/Users/CodeCyborg/Desktop/preprocessed_test_images/\", # The directory where the images are stored\n", - " x_col=\"id_code\", # The name of the column containing the image filenames\n", - " target_size=(HEIGHT, WIDTH), # The dimensions to resize the images to\n", - " batch_size=BATCH_SIZE, # The number of images to include in each batch\n", - " shuffle=False, # Whether to shuffle the images between epochs (not necessary for the test set)\n", - " class_mode=None # No labels are provided for the test set\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "015f5903", - "metadata": {}, - "source": [ - "## Building xception Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5cae143", - "metadata": {}, - "outputs": [], - "source": [ - "# Creating an Xception convolutional neural network with the input shape of (224, 224, 3)\n", - "xception = Xception(include_top=False, input_shape = (224,224,3))\n", - "\n", - "# Freezing the weights of all layers except the last four\n", - "for layer in xception.layers[:-4]:\n", - " layer.trainable = False\n", - "\n", - "# Building the neural network by adding a flatten layer, two dense layers with ReLU activation and dropout,\n", - "# and a final dense layer with softmax activation for multiclass classification\n", - "xception_model = Sequential()\n", - "xception_model.add(xception)\n", - "xception_model.add(Flatten())\n", - "xception_model.add(Dense(256, activation='relu'))\n", - "xception_model.add(Dropout(0.2))\n", - "xception_model.add(Dense(126, activation='relu'))\n", - "xception_model.add(Dropout(0.2))\n", - "xception_model.add(Dense(5, activation='softmax'))\n", - "\n", - "# Creating an early stopping and learning rate reduction callback list\n", - "es = EarlyStopping(monitor='val_loss', mode='min', patience=ES_PATIENCE, restore_best_weights=True, verbose=1)\n", - "rlrop = ReduceLROnPlateau(monitor='val_loss', mode='min', patience=RLROP_PATIENCE, factor=DECAY_DROP, min_lr=1e-6, verbose=1)\n", - "callback_list = [es, rlrop]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "497d3fd5", - "metadata": {}, - "outputs": [], - "source": [ - "# Defining a grid of hyperparameters to search over\n", - "learning_rates = [1e-4, 1e-6]\n", - "decay_factors = [0.1, 0.2]\n", - "param_grid = product(learning_rates, decay_factors)\n", - "\n", - "# Initializing an empty dictionary to store the training history for each set of hyperparameters\n", - "histories = {}\n", - "\n", - "# Looping over each set of hyperparameters and training the model with those hyperparameters\n", - "for lr, decay_factor in param_grid:\n", - " # Creating an Adam optimizer with the specified learning rate and decay factor\n", - " optimizer = optimizers.Adam(lr=lr, decay=decay_factor)\n", - " \n", - " # Compiling the model with the Adam optimizer and categorical cross-entropy loss\n", - " xception_model.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"accuracy\"])\n", - " \n", - " # Training the model with the specified hyperparameters and saving the training history to the 'histories' dictionary\n", - " history_Xception = xception_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " verbose=1).history\n", - " histories[(lr, decay_factor)] = history_Xception\n", - "\n", - "# Finding the best set of hyperparameters based on validation accuracy and loss\n", - "best_score = float('-inf')\n", - "for params, history in histories.items():\n", - " val_acc = history['val_accuracy'][-1]\n", - " val_loss = history['val_loss'][-1]\n", - " score = val_acc - val_loss \n", - " if score > best_score:\n", - " best_score = score\n", - " best_params = params\n", - "\n", - "# Printing the best hyperparameters and the corresponding metrics\n", - "print('Best hyperparameters:')\n", - "print('Learning rate:', best_params[0])\n", - "print('Decay factor:', best_params[1])\n", - "\n", - "print('Metrics:')\n", - "best_history_Xception = histories[best_params]\n", - "print('Training loss:', best_history_Xception['loss'][-1])\n", - "print('Training accuracy:', best_history_Xception['accuracy'][-1])\n", - "print('Validation loss:', best_history_Xception['val_loss'][-1])\n", - "print('Validation accuracy:', best_history_Xception['val_accuracy'][-1])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f309169f", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Setting the number of folds for cross-validation\n", - "n_folds = 5\n", - "\n", - "# Setting the best hyperparameters found during hyperparameter search\n", - "best_lr, best_decay_factor = best_params\n", - "best_optimizer = optimizers.Adam(lr=best_lr, decay=best_decay_factor)\n", - "xception_model.compile(optimizer=best_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Initializing empty lists to store the validation accuracy and loss for each fold\n", - "val_accs = []\n", - "val_losses = []\n", - "\n", - "# Creating a KFold object to split the data into training and validation sets for cross-validation\n", - "kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)\n", - "\n", - "# Looping over each fold in the cross-validation and training the model\n", - "for fold, (train_idx, val_idx) in enumerate(kfold.split(df_train)):\n", - " print(f\"Fold {fold+1}/{n_folds}\")\n", - " \n", - " # Split the data into training and validation sets for the current fold\n", - " train_data = df_train.iloc[train_idx]\n", - " val_data = df_train.iloc[val_idx]\n", - " \n", - " # Train the model for two epochs with early stopping and learning rate reduction callbacks\n", - " history_finetunning_Xception = xception_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " verbose=1).history\n", - " \n", - " # Evaluate the model on the validation set and print the validation loss and accuracy\n", - " val_loss, val_acc = xception_model.evaluate_generator(generator=valid_generator,\n", - " steps=2)\n", - " print(f\"Validation loss: {val_loss:.4f}, validation accuracy: {val_acc:.4f}\")\n", - " \n", - " # Add the validation accuracy and loss for the current fold to the corresponding lists\n", - " val_accs.append(val_acc)\n", - " val_losses.append(val_loss)\n", - " \n", - "# Compute the mean and standard deviation of the validation accuracy and loss over all folds\n", - "mean_val_acc = np.mean(val_accs)\n", - "mean_val_loss = np.mean(val_losses)\n", - "std_val_acc = np.std(val_accs)\n", - "std_val_loss = np.std(val_losses)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2318087c", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Print the mean and standard deviation of the validation accuracy and loss over all folds\n", - "print(f\"Mean validation accuracy: {mean_val_acc:.4f} ± {std_val_acc:.4f}\")\n", - "print(f\"Mean validation loss: {mean_val_loss:.4f} ± {std_val_loss:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8625cf4", - "metadata": {}, - "outputs": [], - "source": [ - "# Generate predictions for the test set using the trained model\n", - "predictions = xception_model.predict_generator(test_generator, verbose=1)" - ] - }, - { - "cell_type": "markdown", - "id": "4b5f7afb", - "metadata": {}, - "source": [ - "## Building DenseNet121 Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "933dc6e0", - "metadata": {}, - "outputs": [], - "source": [ - "# Define the input shape for the model\n", - "input_shape = (224, 224, 3)\n", - "\n", - "# Load the DenseNet121 model\n", - "densenet = DenseNet121(include_top=False, input_shape=input_shape)\n", - "\n", - "# Freeze all layers except the last four\n", - "for layer in densenet.layers[:-4]:\n", - " layer.trainable = False\n", - "\n", - "# Define a sequential model that consists of the DenseNet121 model and dense layers\n", - "DenseNet121_model = Sequential()\n", - "DenseNet121_model.add(densenet)\n", - "DenseNet121_model.add(Flatten())\n", - "DenseNet121_model.add(Dense(256, activation='relu'))\n", - "DenseNet121_model.add(Dropout(0.2))\n", - "DenseNet121_model.add(Dense(126, activation='relu'))\n", - "DenseNet121_model.add(Dropout(0.2))\n", - "DenseNet121_model.add(Dense(5, activation='softmax'))\n", - "\n", - "# Save the initial weights of the model to a file\n", - "DenseNet121_model.save_weights('C:/Users/CodeCyborg/Desktop/saved_weights_DenseNet121.h5')\n", - "\n", - "# Define early stopping and learning rate reduction callbacks\n", - "es = EarlyStopping(monitor='val_loss', mode='min', patience=ES_PATIENCE, restore_best_weights=True, verbose=1)\n", - "rlrop = ReduceLROnPlateau(monitor='val_loss', mode='min', patience=RLROP_PATIENCE, factor=DECAY_DROP, min_lr=1e-6, verbose=1)\n", - "callback_list = [es, rlrop]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a909bb94", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a list of learning rates and decay factors to be used in grid search\n", - "learning_rates = [1e-4, 1e-6]\n", - "decay_factors = [0.1, 0.2]\n", - "\n", - "# Create a parameter grid by taking the Cartesian product of the learning rates and decay factors\n", - "param_grid = product(learning_rates, decay_factors)\n", - "\n", - "# Create an empty dictionary to store the training histories for each combination of hyperparameters\n", - "histories = {}\n", - "\n", - "# Loop over each combination of hyperparameters\n", - "for lr, decay_factor in param_grid:\n", - " # Create an instance of the Adam optimizer with the current learning rate and decay factor\n", - " optimizer = Adam(lr=lr, decay=decay_factor)\n", - " # Compile the DenseNet121 model with the current optimizer, loss function, and evaluation metric\n", - " DenseNet121_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])\n", - " # Train the model for a fixed number of epochs using the current hyperparameters and store the training history\n", - " history_DenseNet121 = DenseNet121_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " verbose=1).history\n", - " histories[(lr, decay_factor)] = history_DenseNet121\n", - "\n", - "# Find the hyperparameters with the highest validation score (accuracy - loss)\n", - "best_score = float('-inf')\n", - "for params, history in histories.items():\n", - " val_acc = history['val_accuracy'][-1]\n", - " val_loss = history['val_loss'][-1]\n", - " score = val_acc - val_loss \n", - " if score > best_score:\n", - " best_score = score\n", - " best_params = params\n", - "\n", - "# Print the best hyperparameters found in the grid search\n", - "print('Best hyperparameters:')\n", - "print('Learning rate:', best_params[0])\n", - "print('Decay factor:', best_params[1])\n", - "\n", - "# Print the training and validation metrics for the best model\n", - "print('Metrics:')\n", - "best_history_DenseNet121 = histories[best_params]\n", - "print('Training loss:', best_history_DenseNet121['loss'][-1])\n", - "print('Training accuracy:', best_history_DenseNet121['accuracy'][-1])\n", - "print('Validation loss:', best_history_DenseNet121['val_loss'][-1])\n", - "print('Validation accuracy:', best_history_DenseNet121['val_accuracy'][-1])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ca79275", - "metadata": {}, - "outputs": [], - "source": [ - "# Define the number of folds for cross-validation\n", - "n_folds = 5\n", - "\n", - "# Initialize the best optimizer based on the best hyperparameters found during grid search\n", - "best_lr, best_decay_factor = best_params\n", - "best_optimizer = Adam(lr=best_lr, decay=best_decay_factor)\n", - "\n", - "# Compile the model with the best optimizer and loss function\n", - "DenseNet121_model.compile(optimizer=best_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])\n", - "# Initialize empty lists to store the validation accuracy and loss for each fold\n", - "val_accs = []\n", - "val_losses = []\n", - "\n", - "# Perform cross-validation\n", - "kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)\n", - "for fold, (train_idx, val_idx) in enumerate(kfold.split(df_train)):\n", - " print(f\"Fold {fold+1}/{n_folds}\")\n", - " \n", - " # Split the data into training and validation sets for the current fold\n", - " train_data = df_train.iloc[train_idx]\n", - " val_data = df_train.iloc[val_idx]\n", - " \n", - " # Train the model on the training data for the current fold\n", - " history_finetuning_DenseNet121 = DenseNet121_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " verbose=1).history\n", - " \n", - " # Evaluate the model on the test set\n", - " test_loss, test_acc = DenseNet121_model.evaluate_generator(test_generator, steps=test_generator.n, verbose=1)\n", - " print('Test loss:', test_loss)\n", - " print('Test accuracy:', test_acc)\n", - " \n", - " \n", - " # Evaluate the model on the validation data for the current fold\n", - " val_loss, val_acc = DenseNet121_model.evaluate_generator(generator=valid_generator,\n", - " steps=2)\n", - " print(f\"Validation loss: {val_loss:.4f}, validation accuracy: {val_acc:.4f}\")\n", - " \n", - " # Append the validation accuracy and loss for the current fold to the lists\n", - " val_accs.append(val_acc)\n", - " val_losses.append(val_loss)\n", - "\n", - "# Compute the mean and standard deviation of the validation accuracy and loss across all folds\n", - "mean_val_acc = np.mean(val_accs)\n", - "mean_val_loss = np.mean(val_losses)\n", - "std_val_acc = np.std(val_accs)\n", - "std_val_loss = np.std(val_losses)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd44dcd3", - "metadata": {}, - "outputs": [], - "source": [ - "# Print the mean and standard deviation of the validation accuracy and loss\n", - "print(f\"Mean validation accuracy: {mean_val_acc:.4f} ± {std_val_acc:.4f}\")\n", - "print(f\"Mean validation loss: {mean_val_loss:.4f} ± {std_val_loss:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73fefe54", - "metadata": {}, - "outputs": [], - "source": [ - "# Make predictions on the test data using the trained model\n", - "predictions = DenseNet121_model.predict_generator(test_generator, verbose=1)" - ] - }, - { - "cell_type": "markdown", - "id": "cb30c68b", - "metadata": {}, - "source": [ - "## Building EfficientNetB6 Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2049f871", - "metadata": {}, - "outputs": [], - "source": [ - "# Load the EfficientNetB6 model\n", - "effnet = EfficientNetB6(include_top=False, input_shape=(528, 528, 3))\n", - "\n", - "# Define a sequential model that consists of th EfficientNetB6 model and dense layers\n", - "EfficientNetB6_model = Sequential()\n", - "EfficientNetB6_model.add(effnet)\n", - "EfficientNetB6_model.add(GlobalAveragePooling2D())\n", - "EfficientNetB6_model.add(Dense(256, activation='relu'))\n", - "EfficientNetB6_model.add(Dropout(0.2))\n", - "EfficientNetB6_model.add(Dense(126, activation='relu'))\n", - "EfficientNetB6_model.add(Dropout(0.2))\n", - "EfficientNetB6_model.add(Dense(5, activation='softmax'))\n", - "\n", - "# Save the initial weights of the model to a file\n", - "EfficientNetB6_model.save_weights('C:/Users/CodeCyborg/Desktop/saved_weights_EfficientNetB6.h5')\n", - "\n", - "# Define early stopping and learning rate reduction callbacks\n", - "es = EarlyStopping(monitor='val_loss', mode='min', patience=ES_PATIENCE, restore_best_weights=True, verbose=1)\n", - "rlrop = ReduceLROnPlateau(monitor='val_loss', mode='min', patience=RLROP_PATIENCE, factor=DECAY_DROP, min_lr=1e-6, verbose=1)\n", - "callback_list = [es, rlrop]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6000329f", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Define a grid of hyperparameters to try\n", - "learning_rates = [1e-4, 1e-6]\n", - "decay_factors = [0.1, 0.2]\n", - "param_grid = product(learning_rates, decay_factors)\n", - "\n", - "# Train the model with each set of hyperparameters and store the histories\n", - "histories = {}\n", - "for lr, decay_factor in param_grid:\n", - " optimizer = optimizers.Adam(lr=lr, decay=decay_factor)\n", - " EfficientNetB6_model.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"accuracy\"])\n", - " history_EfficientNetB6 = EfficientNetB6_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " verbose=1).history\n", - " histories[(lr, decay_factor)] = history_EfficientNetB6\n", - "\n", - "# Find the set of hyperparameters that produced the best score\n", - "best_score = float('-inf')\n", - "for params, history in histories.items():\n", - " val_acc = history['val_accuracy'][-1]\n", - " val_loss = history['val_loss'][-1]\n", - " score = val_acc - val_loss \n", - " if score > best_score:\n", - " best_score = score\n", - " best_params = params\n", - "\n", - "# Print the best hyperparameters and metrics\n", - "print('Best hyperparameters:')\n", - "print('Learning rate:', best_params[0])\n", - "print('Decay factor:', best_params[1])\n", - "\n", - "print('Metrics:')\n", - "best_history_EfficientNetB6 = histories[best_params]\n", - "print('Training loss:', best_history_EfficientNetB6['loss'][-1])\n", - "print('Training accuracy:', best_history_EfficientNetB6['accuracy'][-1])\n", - "print('Validation loss:', best_history_EfficientNetB6['val_loss'][-1])\n", - "print('Validation accuracy:', best_history_EfficientNetB6['val_accuracy'][-1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "776c03d0", - "metadata": {}, - "outputs": [], - "source": [ - "# Define the number of folds for cross-validation\n", - "n_folds = 5\n", - "\n", - "best_lr, best_decay_factor = best_params\n", - "best_optimizer = optimizers.Adam(lr=best_lr, decay=best_decay_factor)\n", - "EfficientNetB6_model.compile(optimizer=best_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Initialize an empty list to store the validation accuracy and loss for each fold\n", - "val_accs = []\n", - "val_losses = []\n", - "\n", - "# Perform cross-validation\n", - "kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)\n", - "for fold, (train_idx, val_idx) in enumerate(kfold.split(df_train)):\n", - " print(f\"Fold {fold+1}/{n_folds}\")\n", - " \n", - " # Split the data into training and validation sets for the current fold\n", - " train_data = df_train.iloc[train_idx]\n", - " val_data = df_train.iloc[val_idx]\n", - " \n", - " # Train the model on the training data for the current fold\n", - " history_finetuning_EfficientNetB6 = EfficientNetB6_model.fit_generator(generator=train_generator,\n", - " steps_per_epoch=2,\n", - " epochs=2,\n", - " callbacks=callback_list,\n", - " validation_data=valid_generator,\n", - " validation_steps=2,\n", - " verbose=1).history\n", - " \n", - " # Evaluate the model on the validation data for the current fold\n", - " val_loss, val_acc = EfficientNetB6_model.evaluate_generator(generator=valid_generator,\n", - " steps=2)\n", - " print(f\"Validation loss: {val_loss:.4f}, validation accuracy: {val_acc:.4f}\")\n", - " \n", - " # Append the validation accuracy and loss for the current fold to the list\n", - " val_accs.append(val_acc)\n", - " val_losses.append(val_loss)\n", - " \n", - "# Compute the mean and standard deviation of the validation accuracy and loss across all folds\n", - "mean_val_acc = np.mean(val_accs)\n", - "mean_val_loss = np.mean(val_losses)\n", - "std_val_acc = np.std(val_accs)\n", - "std_val_loss = np.std(val_losses)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe7b66cf", - "metadata": {}, - "outputs": [], - "source": [ - "# Print the mean validation accuracy and standard deviation of validation accuracy\n", - "print(f\"Mean validation accuracy: {mean_val_acc:.4f} ± {std_val_acc:.4f}\")\n", - "\n", - "# Print the mean validation loss and standard deviation of validation loss\n", - "print(f\"Mean validation loss: {mean_val_loss:.4f} ± {std_val_loss:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "00c7ccad", - "metadata": {}, - "outputs": [], - "source": [ - "# Make predictions on the test data using the trained model\n", - "predictions = EfficientNetB6_model.predict_generator(test_generator, verbose=1)" - ] - }, - { - "cell_type": "markdown", - "id": "15cfd267", - "metadata": {}, - "source": [ - "# Model Evaluation" - ] - }, - { - "cell_type": "markdown", - "id": "f5fc1fa0", - "metadata": {}, - "source": [ - "## Loss and Accuracy for each Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4136e1a", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to plot the training and validation accuracy and loss\n", - "def plot_accuracy_loss(history, title, ax1, ax2):\n", - " ax1.plot(history['accuracy'])\n", - " ax1.plot(history['val_accuracy'])\n", - " ax1.set_title(title + ' Accuracy')\n", - " ax1.set_ylabel('Accuracy')\n", - " ax1.legend(['train', 'validation'], loc='upper left')\n", - "\n", - " ax2.plot(history['loss'])\n", - " ax2.plot(history['val_loss'])\n", - " ax2.set_title(title + ' Loss')\n", - " ax2.set_ylabel('Loss')\n", - " ax2.set_xlabel('Epoch')\n", - " ax2.legend(['train', 'validation'], loc='upper left')\n", - "\n", - "# Create a figure with three rows and two columns of subplots\n", - "fig, axs = plt.subplots(3, 2, figsize=(10, 15))\n", - "\n", - "# Plot the accuracy and loss of the Xception model\n", - "plot_accuracy_loss(best_history_Xception, 'Xception', axs[0, 0], axs[0, 1])\n", - "\n", - "# Plot the accuracy and loss of the DenseNet121 model\n", - "plot_accuracy_loss(best_history_DenseNet121, 'DenseNet121', axs[1, 0], axs[1, 1])\n", - "\n", - "# Plot the accuracy and loss of the EfficientNetB6 model\n", - "plot_accuracy_loss(best_history_EfficientNetB6, 'EfficientNetB6', axs[2, 0], axs[2, 1])\n", - "\n", - "# Adjust the layout of the subplots and show the plot\n", - "plt.tight_layout()\n", - "plt.subplots_adjust(wspace=0.3, hspace=0.3)\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "97d9dc07", - "metadata": {}, - "source": [ - "## Classification Report & Confusion Matrix for Models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4090c9d", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a function to generate a classification report for a given model and generator\n", - "def generate_classification_report(model, generator, class_names):\n", - " valid_pred = model.predict(generator)\n", - " valid_labels = generator.classes\n", - " valid_pred_classes = np.argmax(valid_pred, axis=1)\n", - " print(\"Model Classification Report:\")\n", - " print(classification_report(valid_labels, valid_pred_classes, target_names=class_names))\n", - " return valid_labels, valid_pred_classes\n", - "\n", - "# Define a function to plot a confusion matrix for a given model and predicted labels\n", - "def plot_confusion_matrix(model_name, valid_labels, valid_pred_classes):\n", - " cm = confusion_matrix(valid_labels, valid_pred_classes)\n", - " labels = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']\n", - " df_cm = pd.DataFrame(cm, index=labels, columns=labels)\n", - " plt.figure(figsize=(10, 7))\n", - " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', \n", - " xticklabels=labels, yticklabels=labels) \n", - " plt.title('Confusion Matrix - ' + model_name + ' Model', fontdict={'fontsize': 18, 'fontweight': 'bold'})\n", - " plt.xlabel('Predicted labels', fontdict={'fontsize': 16, 'fontweight': 'bold'})\n", - " plt.ylabel('True labels', fontdict={'fontsize': 16, 'fontweight': 'bold'})\n", - " plt.show()\n", - "\n", - "# Get the class names from the valid generator\n", - "class_names = list(valid_generator.class_indices.keys())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4a54900", - "metadata": {}, - "outputs": [], - "source": [ - "# Generate classification report for Xception model\n", - "valid_labels_xception, valid_pred_classes_xception = generate_classification_report(xception_model, valid_generator, class_names)\n", - "\n", - "# Generate classification report for DenseNet121 model\n", - "valid_labels_DenseNet121, valid_pred_classes_DenseNet121 = generate_classification_report(DenseNet121_model, valid_generator, class_names)\n", - "\n", - "# Generate classification report for EfficientNetB6 model\n", - "valid_labels_EfficientNetB6, valid_pred_classes_EfficientNetB6 = generate_classification_report(EfficientNetB6_model, valid_generator, class_names)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2360725", - "metadata": {}, - "outputs": [], - "source": [ - "# Generate confusion matrix for Xception model\n", - "plot_confusion_matrix('Xception', valid_labels_xception, valid_pred_classes_xception)\n", - "\n", - "# Generate confusion matrix for DenseNet121 model\n", - "plot_confusion_matrix('DenseNet121', valid_labels_DenseNet121, valid_pred_classes_DenseNet121)\n", - "\n", - "# Generate confusion matrix for EfficientNetB6 model\n", - "plot_confusion_matrix('EfficientNetB6', valid_labels_EfficientNetB6, valid_pred_classes_EfficientNetB6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "350afa17", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}