{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "25dceb5e", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T07:52:30.891679Z", "iopub.status.busy": "2025-09-30T07:52:30.891481Z", "iopub.status.idle": "2025-09-30T07:52:53.502688Z", "shell.execute_reply": "2025-09-30T07:52:53.501755Z" }, "papermill": { "duration": 22.615314, "end_time": "2025-09-30T07:52:53.503953", "exception": false, "start_time": "2025-09-30T07:52:30.888639", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-09-30 07:52:34.084452: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1759218754.451692 19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1759218754.555147 19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "TensorFlow Version: 2.18.0\n" ] } ], "source": [ "# ==============================================================================\n", "# SCRIPT -- AlexNet with a Memory-Safe Data Pipeline\n", "# ==============================================================================\n", "# This script contains all the corrected code to:\n", "# 1. Load CIFAR-10 and create a memory-safe tf.data pipeline for resizing.\n", "# 2. Build the Keras model of the AlexNet architecture.\n", "# 3. Train the model using the efficient pipeline.\n", "# 4. Evaluate and plot the results.\n", "#\n", "\n", "import tensorflow as tf\n", "from tensorflow.keras.datasets import cifar10\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization\n", "from tensorflow.keras.utils import to_categorical\n", "from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n", "from tensorflow.keras.regularizers import l2\n", "import matplotlib.pyplot as plt\n", "\n", "print(\"TensorFlow Version:\", tf.__version__)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "53d7e29b", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T07:52:53.508760Z", "iopub.status.busy": "2025-09-30T07:52:53.508300Z", "iopub.status.idle": "2025-09-30T07:52:58.246938Z", "shell.execute_reply": "2025-09-30T07:52:58.246289Z" }, "papermill": { "duration": 4.742368, "end_time": "2025-09-30T07:52:58.248379", "exception": false, "start_time": "2025-09-30T07:52:53.506011", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", "\u001b[1m170498071/170498071\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 0us/step\n" ] } ], "source": [ "## --------------------------------------\n", "## 1. DATA LOADING AND PARAMETERS\n", "## --------------------------------------\n", "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", "\n", "# Normalize pixel values and one-hot encode the labels\n", "x_train = x_train.astype('float32') / 255.0\n", "x_test = x_test.astype('float32') / 255.0\n", "y_train = to_categorical(y_train, 10)\n", "y_test = to_categorical(y_test, 10)\n", "\n", "# Define parameters\n", "NUM_CLASSES = 10\n", "IMG_SIZE = 224\n", "BATCH_SIZE = 128\n", "L2_LAMBDA = 0.0005 # Weight decay\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "9f3c563f", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T07:52:58.256927Z", "iopub.status.busy": "2025-09-30T07:52:58.256677Z", "iopub.status.idle": "2025-09-30T07:53:02.339184Z", "shell.execute_reply": "2025-09-30T07:53:02.338312Z" }, "papermill": { "duration": 4.088064, "end_time": "2025-09-30T07:53:02.340471", "exception": false, "start_time": "2025-09-30T07:52:58.252407", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "I0000 00:00:1759218779.943116 19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", "I0000 00:00:1759218779.943819 19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory: -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Building memory-safe data pipelines with augmentation...\n", "Data pipelines created successfully.\n" ] } ], "source": [ "## --------------------------------------\n", "## 2. IMPROVED DATA PIPELINE WITH AUGMENTATION\n", "## --------------------------------------\n", "# Create a sequential model for data augmentation\n", "data_augmentation = Sequential([\n", " tf.keras.layers.RandomFlip(\"horizontal\"),\n", " tf.keras.layers.RandomRotation(0.1),\n", " tf.keras.layers.RandomZoom(0.1),\n", "])\n", "\n", "# Function to resize images\n", "def resize_image(image, label):\n", " image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n", " return image, label\n", "\n", "# Function to apply augmentation to the training set\n", "def augment_and_resize(image, label):\n", " image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n", " image = data_augmentation(tf.expand_dims(image, 0), training=True)[0]\n", " return image, label\n", "\n", "# Create efficient tf.data pipelines\n", "print(\"Building memory-safe data pipelines with augmentation...\")\n", "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", "test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", "\n", "# Apply shuffling and augmentation to the training set\n", "train_dataset = train_dataset.shuffle(buffer_size=10000)\n", "train_dataset = train_dataset.map(augment_and_resize, num_parallel_calls=tf.data.AUTOTUNE)\n", "\n", "# Only apply resizing to the test set\n", "test_dataset = test_dataset.map(resize_image, num_parallel_calls=tf.data.AUTOTUNE)\n", "\n", "# Batch and prefetch both datasets for performance\n", "train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)\n", "test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)\n", "print(\"Data pipelines created successfully.\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "7878835f", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T07:53:02.348678Z", "iopub.status.busy": "2025-09-30T07:53:02.348130Z", "iopub.status.idle": "2025-09-30T07:53:04.171940Z", "shell.execute_reply": "2025-09-30T07:53:04.171373Z" }, "papermill": { "duration": 1.828828, "end_time": "2025-09-30T07:53:04.172934", "exception": false, "start_time": "2025-09-30T07:53:02.344106", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of devices: 2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n" ] }, { "data": { "text/html": [ "
Model: \"sequential_1\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential_1\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ conv2d (Conv2D) │ (None, 54, 54, 96) │ 34,944 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization │ (None, 54, 54, 96) │ 384 │\n",
"│ (BatchNormalization) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d (MaxPooling2D) │ (None, 26, 26, 96) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_1 (Conv2D) │ (None, 26, 26, 256) │ 614,656 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_1 │ (None, 26, 26, 256) │ 1,024 │\n",
"│ (BatchNormalization) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_1 (MaxPooling2D) │ (None, 12, 12, 256) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_2 (Conv2D) │ (None, 12, 12, 384) │ 885,120 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_3 (Conv2D) │ (None, 12, 12, 384) │ 1,327,488 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_4 (Conv2D) │ (None, 12, 12, 256) │ 884,992 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_2 (MaxPooling2D) │ (None, 5, 5, 256) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten (Flatten) │ (None, 6400) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense (Dense) │ (None, 4096) │ 26,218,496 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout (Dropout) │ (None, 4096) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_1 (Dense) │ (None, 4096) │ 16,781,312 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (Dropout) │ (None, 4096) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (Dense) │ (None, 10) │ 40,970 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m34,944\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m384\u001b[0m │\n",
"│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m614,656\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m1,024\u001b[0m │\n",
"│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m885,120\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m1,327,488\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m12\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m884,992\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m5\u001b[0m, \u001b[38;5;34m5\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6400\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m26,218,496\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m16,781,312\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m40,970\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total params: 46,789,386 (178.49 MB)\n", "\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m46,789,386\u001b[0m (178.49 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Trainable params: 46,788,682 (178.48 MB)\n", "\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m46,788,682\u001b[0m (178.48 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Non-trainable params: 704 (2.75 KB)\n", "\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m704\u001b[0m (2.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## --------------------------------------\n", "## 3. MULTI-GPU SETUP, MODEL DEFINITION AND COMPILATION\n", "## --------------------------------------\n", "\n", "# 1. Create a MirroredStrategy to use all available GPUs\n", "strategy = tf.distribute.MirroredStrategy()\n", "print(f'Number of devices: {strategy.num_replicas_in_sync}')\n", "\n", "# 2. Define AND compile the model inside the strategy's scope\n", "with strategy.scope():\n", " # --- Model Definition ---\n", " model = Sequential([\n", " Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu',\n", " input_shape=(224, 224, 3), kernel_regularizer=l2(L2_LAMBDA)),\n", " BatchNormalization(),\n", " MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),\n", "\n", " Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " BatchNormalization(),\n", " MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),\n", "\n", " Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),\n", "\n", " Flatten(),\n", " Dense(4096, activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " Dropout(0.5),\n", " Dense(4096, activation='relu', kernel_regularizer=l2(L2_LAMBDA)),\n", " Dropout(0.5),\n", " Dense(NUM_CLASSES, activation='softmax')\n", " ])\n", " \n", " # --- Model Compilation with stable learning rate ---\n", " model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),\n", " loss='categorical_crossentropy',\n", " metrics=['accuracy'])\n", "\n", "model.summary()\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "f0d00ba2", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T07:53:04.181749Z", "iopub.status.busy": "2025-09-30T07:53:04.181529Z", "iopub.status.idle": "2025-09-30T13:17:02.160896Z", "shell.execute_reply": "2025-09-30T13:17:02.160210Z" }, "papermill": { "duration": 19437.985181, "end_time": "2025-09-30T13:17:02.162287", "exception": false, "start_time": "2025-09-30T07:53:04.177106", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Starting model training on multiple GPUs...\n", "Epoch 1/100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "I0000 00:00:1759218794.728912 63 cuda_dnn.cc:529] Loaded cuDNN version 90300\n", "I0000 00:00:1759218794.778591 62 cuda_dnn.cc:529] Loaded cuDNN version 90300\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m233s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.2552 - val_loss: 5.8087 - learning_rate: 1.0000e-04\n", "Epoch 2/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.5852 - val_loss: 3.8615 - learning_rate: 1.0000e-04\n", "Epoch 3/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.6959 - val_loss: 3.0414 - learning_rate: 1.0000e-04\n", "Epoch 4/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.6860 - val_loss: 2.6492 - learning_rate: 1.0000e-04\n", "Epoch 5/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.6390 - val_loss: 2.4970 - learning_rate: 1.0000e-04\n", "Epoch 6/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7129 - val_loss: 2.0626 - learning_rate: 1.0000e-04\n", "Epoch 7/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 560ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7493 - val_loss: 1.7856 - learning_rate: 1.0000e-04\n", "Epoch 8/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8115 - val_loss: 1.4895 - learning_rate: 1.0000e-04\n", "Epoch 9/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7852 - val_loss: 1.4268 - learning_rate: 1.0000e-04\n", "Epoch 10/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m218s\u001b[0m 556ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7644 - val_loss: 1.4600 - learning_rate: 1.0000e-04\n", "Epoch 11/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 557ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8105 - val_loss: 1.2222 - learning_rate: 1.0000e-04\n", "Epoch 12/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7586 - val_loss: 1.3615 - learning_rate: 1.0000e-04\n", "Epoch 13/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7883 - val_loss: 1.2408 - learning_rate: 1.0000e-04\n", "Epoch 14/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8324 - val_loss: 1.1125 - learning_rate: 1.0000e-04\n", "Epoch 15/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7904 - val_loss: 1.2024 - learning_rate: 1.0000e-04\n", "Epoch 16/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7932 - val_loss: 1.0645 - learning_rate: 1.0000e-04\n", "Epoch 17/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8227 - val_loss: 1.0313 - learning_rate: 1.0000e-04\n", "Epoch 18/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.7926 - val_loss: 1.0981 - learning_rate: 1.0000e-04\n", "Epoch 19/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8308 - val_loss: 1.0348 - learning_rate: 1.0000e-04\n", "Epoch 20/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8567 - val_loss: 0.9426 - learning_rate: 1.0000e-04\n", "Epoch 21/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8218 - val_loss: 0.9783 - learning_rate: 1.0000e-04\n", "Epoch 22/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8464 - val_loss: 0.9939 - learning_rate: 1.0000e-04\n", "Epoch 23/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m223s\u001b[0m 569ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8412 - val_loss: 0.9400 - learning_rate: 1.0000e-04\n", "Epoch 24/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8084 - val_loss: 1.0187 - learning_rate: 1.0000e-04\n", "Epoch 25/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8466 - val_loss: 0.8996 - learning_rate: 1.0000e-04\n", "Epoch 26/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8726 - val_loss: 0.8967 - learning_rate: 1.0000e-04\n", "Epoch 27/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8615 - val_loss: 0.9093 - learning_rate: 1.0000e-04\n", "Epoch 28/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8410 - val_loss: 0.9364 - learning_rate: 1.0000e-04\n", "Epoch 29/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 559ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8419 - val_loss: 0.9368 - learning_rate: 1.0000e-04\n", "Epoch 30/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m219s\u001b[0m 558ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8276 - val_loss: 0.9096 - learning_rate: 1.0000e-04\n", "Epoch 31/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 546ms/step - accuracy: nan - loss: nan\n", "Epoch 31: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05.\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 560ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8550 - val_loss: 0.9027 - learning_rate: 1.0000e-04\n", "Epoch 32/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8930 - val_loss: 0.7861 - learning_rate: 2.0000e-05\n", "Epoch 33/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8812 - val_loss: 0.8005 - learning_rate: 2.0000e-05\n", "Epoch 34/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8729 - val_loss: 0.8099 - learning_rate: 2.0000e-05\n", "Epoch 35/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8778 - val_loss: 0.8201 - learning_rate: 2.0000e-05\n", "Epoch 36/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9026 - val_loss: 0.7591 - learning_rate: 2.0000e-05\n", "Epoch 37/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8860 - val_loss: 0.7798 - learning_rate: 2.0000e-05\n", "Epoch 38/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9032 - val_loss: 0.7319 - learning_rate: 2.0000e-05\n", "Epoch 39/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8908 - val_loss: 0.7586 - learning_rate: 2.0000e-05\n", "Epoch 40/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m224s\u001b[0m 570ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8902 - val_loss: 0.7364 - learning_rate: 2.0000e-05\n", "Epoch 41/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 560ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8834 - val_loss: 0.7415 - learning_rate: 2.0000e-05\n", "Epoch 42/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8871 - val_loss: 0.7632 - learning_rate: 2.0000e-05\n", "Epoch 43/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 547ms/step - accuracy: nan - loss: nan\n", "Epoch 43: ReduceLROnPlateau reducing learning rate to 3.999999898951501e-06.\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8909 - val_loss: 0.7529 - learning_rate: 2.0000e-05\n", "Epoch 44/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9057 - val_loss: 0.7150 - learning_rate: 4.0000e-06\n", "Epoch 45/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9038 - val_loss: 0.7067 - learning_rate: 4.0000e-06\n", "Epoch 46/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 560ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9002 - val_loss: 0.7188 - learning_rate: 4.0000e-06\n", "Epoch 47/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8998 - val_loss: 0.7029 - learning_rate: 4.0000e-06\n", "Epoch 48/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9077 - val_loss: 0.6989 - learning_rate: 4.0000e-06\n", "Epoch 49/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9058 - val_loss: 0.7022 - learning_rate: 4.0000e-06\n", "Epoch 50/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8998 - val_loss: 0.7106 - learning_rate: 4.0000e-06\n", "Epoch 51/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9068 - val_loss: 0.7060 - learning_rate: 4.0000e-06\n", "Epoch 52/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9024 - val_loss: 0.7031 - learning_rate: 4.0000e-06\n", "Epoch 53/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 548ms/step - accuracy: nan - loss: nan\n", "Epoch 53: ReduceLROnPlateau reducing learning rate to 1e-06.\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9057 - val_loss: 0.7074 - learning_rate: 4.0000e-06\n", "Epoch 54/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9044 - val_loss: 0.6993 - learning_rate: 1.0000e-06\n", "Epoch 55/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m220s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.8986 - val_loss: 0.7076 - learning_rate: 1.0000e-06\n", "Epoch 56/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9074 - val_loss: 0.6977 - learning_rate: 1.0000e-06\n", "Epoch 57/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9043 - val_loss: 0.6974 - learning_rate: 1.0000e-06\n", "Epoch 58/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9035 - val_loss: 0.6992 - learning_rate: 1.0000e-06\n", "Epoch 59/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9076 - val_loss: 0.6973 - learning_rate: 1.0000e-06\n", "Epoch 60/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 562ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9107 - val_loss: 0.6969 - learning_rate: 1.0000e-06\n", "Epoch 61/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 561ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9077 - val_loss: 0.6976 - learning_rate: 1.0000e-06\n", "Epoch 62/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9115 - val_loss: 0.6982 - learning_rate: 1.0000e-06\n", "Epoch 63/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9083 - val_loss: 0.6997 - learning_rate: 1.0000e-06\n", "Epoch 64/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 566ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9050 - val_loss: 0.6962 - learning_rate: 1.0000e-06\n", "Epoch 65/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9093 - val_loss: 0.6982 - learning_rate: 1.0000e-06\n", "Epoch 66/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9117 - val_loss: 0.6984 - learning_rate: 1.0000e-06\n", "Epoch 67/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9094 - val_loss: 0.6960 - learning_rate: 1.0000e-06\n", "Epoch 68/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 563ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9064 - val_loss: 0.7004 - learning_rate: 1.0000e-06\n", "Epoch 69/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9092 - val_loss: 0.6996 - learning_rate: 1.0000e-06\n", "Epoch 70/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9073 - val_loss: 0.6971 - learning_rate: 1.0000e-06\n", "Epoch 71/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9078 - val_loss: 0.6979 - learning_rate: 1.0000e-06\n", "Epoch 72/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9112 - val_loss: 0.6930 - learning_rate: 1.0000e-06\n", "Epoch 73/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m223s\u001b[0m 568ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9092 - val_loss: 0.6939 - learning_rate: 1.0000e-06\n", "Epoch 74/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m223s\u001b[0m 567ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9083 - val_loss: 0.6927 - learning_rate: 1.0000e-06\n", "Epoch 75/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 567ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9085 - val_loss: 0.6897 - learning_rate: 1.0000e-06\n", "Epoch 76/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9066 - val_loss: 0.6947 - learning_rate: 1.0000e-06\n", "Epoch 77/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9082 - val_loss: 0.6913 - learning_rate: 1.0000e-06\n", "Epoch 78/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 566ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9057 - val_loss: 0.6872 - learning_rate: 1.0000e-06\n", "Epoch 79/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9105 - val_loss: 0.6914 - learning_rate: 1.0000e-06\n", "Epoch 80/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9113 - val_loss: 0.6885 - learning_rate: 1.0000e-06\n", "Epoch 81/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9089 - val_loss: 0.6907 - learning_rate: 1.0000e-06\n", "Epoch 82/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9112 - val_loss: 0.6883 - learning_rate: 1.0000e-06\n", "Epoch 83/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 566ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9112 - val_loss: 0.6911 - learning_rate: 1.0000e-06\n", "Epoch 84/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9075 - val_loss: 0.6958 - learning_rate: 1.0000e-06\n", "Epoch 85/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 566ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9110 - val_loss: 0.6898 - learning_rate: 1.0000e-06\n", "Epoch 86/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 566ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9073 - val_loss: 0.6906 - learning_rate: 1.0000e-06\n", "Epoch 87/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m221s\u001b[0m 564ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9092 - val_loss: 0.6934 - learning_rate: 1.0000e-06\n", "Epoch 88/100\n", "\u001b[1m391/391\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m222s\u001b[0m 565ms/step - accuracy: nan - loss: nan - val_accuracy: 0.9077 - val_loss: 0.6919 - learning_rate: 1.0000e-06\n", "Epoch 88: early stopping\n", "Restoring model weights from the end of the best epoch: 78.\n", "Model training complete.\n" ] } ], "source": [ "## --------------------------------------\n", "## 4. TRAINING THE MODEL\n", "## --------------------------------------\n", "# Callbacks to stop training when performance worsens and to reduce the learning rate\n", "early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)\n", "reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)\n", "\n", "EPOCHS = 100\n", "\n", "print(\"\\nStarting model training on multiple GPUs...\")\n", "history = model.fit(\n", " train_dataset,\n", " epochs=EPOCHS,\n", " validation_data=test_dataset,\n", " callbacks=[early_stopping, reduce_lr]\n", ")\n", "print(\"Model training complete.\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "4ae116ca", "metadata": { "execution": { "iopub.execute_input": "2025-09-30T13:17:05.224872Z", "iopub.status.busy": "2025-09-30T13:17:05.224574Z", "iopub.status.idle": "2025-09-30T13:17:10.487097Z", "shell.execute_reply": "2025-09-30T13:17:10.486383Z" }, "papermill": { "duration": 6.809009, "end_time": "2025-09-30T13:17:10.488230", "exception": false, "start_time": "2025-09-30T13:17:03.679221", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Evaluating model on the test set...\n", "79/79 - 4s - 52ms/step - accuracy: 0.9057 - loss: 0.6873\n", "\n", "Test Loss: 0.6873\n", "Test Accuracy: 90.57%\n", "\n", "Plotting training history...\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "