From 19f870527919157f47e18f894c095b566cc16246 Mon Sep 17 00:00:00 2001 From: "michael.divia" <michael.divia@etu.hesge.ch> Date: Tue, 1 Apr 2025 18:01:46 +0200 Subject: [PATCH] 2 training models --- pokedex.py | 113 ----------------------------------- pokedex_EfficientNetV2M.py | 118 +++++++++++++++++++++++++++++++++++++ pokedex_ResNet50.py | 118 +++++++++++++++++++++++++++++++++++++ test.py | 66 --------------------- 4 files changed, 236 insertions(+), 179 deletions(-) delete mode 100644 pokedex.py create mode 100644 pokedex_EfficientNetV2M.py create mode 100644 pokedex_ResNet50.py delete mode 100644 test.py diff --git a/pokedex.py b/pokedex.py deleted file mode 100644 index d60dcaf..0000000 --- a/pokedex.py +++ /dev/null @@ -1,113 +0,0 @@ -import os -import numpy as np -import keras -from keras import layers -import matplotlib.pyplot as plt -from tensorflow import data as tf_data -import random - -#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - -# Load training and validation dataset -train_ds, val_ds = keras.utils.image_dataset_from_directory( - "Combined_Dataset", - labels="inferred", - label_mode="int", - image_size=(256, 256), - batch_size=20, - shuffle=True, - validation_split=0.2, - subset="both", - seed=random.randint(0,8000) -) - -# Get class (Pokémon) names -class_names = train_ds.class_names - -# Introduce artificial sample diversity -data_augmentation_layers = [ - layers.RandomFlip("horizontal"), - layers.RandomRotation(0.1), -] - -def data_augmentation(images): - for layer in data_augmentation_layers: - images = layer(images) - return images - -# Apply `data_augmentation` to the training images. -train_ds = train_ds.map( - lambda img, label: (data_augmentation(img), label), - num_parallel_calls=tf_data.AUTOTUNE, -) - -# Prefetching samples in GPU memory helps maximize GPU utilization. -train_ds = train_ds.prefetch(tf_data.AUTOTUNE) -val_ds = val_ds.prefetch(tf_data.AUTOTUNE) - - -# MODEL -def simple_xception_netowkr(input_shape, num_classes): - inputs = keras.Input(shape=input_shape) - - # Entry block - x = layers.Rescaling(1.0 / 255)(inputs) - x = layers.Conv2D(128, 3, strides=2, padding="same")(x) - x = layers.BatchNormalization()(x) - x = layers.Activation("relu")(x) - - previous_block_activation = x # Set aside residual - - for size in [256, 512, 728]: - x = layers.Activation("relu")(x) - x = layers.SeparableConv2D(size, 3, padding="same")(x) - x = layers.BatchNormalization()(x) - - x = layers.Activation("relu")(x) - x = layers.SeparableConv2D(size, 3, padding="same")(x) - x = layers.BatchNormalization()(x) - - x = layers.MaxPooling2D(3, strides=2, padding="same")(x) - - # Project residual - residual = layers.Conv2D(size, 1, strides=2, padding="same")( - previous_block_activation - ) - x = layers.add([x, residual]) # Add back residual - previous_block_activation = x # Set aside next residual - - x = layers.SeparableConv2D(1024, 3, padding="same")(x) - x = layers.BatchNormalization()(x) - x = layers.Activation("relu")(x) - - x = layers.GlobalAveragePooling2D()(x) - if num_classes == 2: - units = 1 - else: - units = num_classes - - x = layers.Dropout(0.25)(x) - # We specify activation=None so as to return logits - outputs = layers.Dense(units, activation=None)(x) - return keras.Model(inputs, outputs) - - -model = simple_xception_netowkr(input_shape=(256, 256) + (3,), num_classes=152) - -# Train -epochs = 25 - -callbacks = [ - keras.callbacks.ModelCheckpoint("save_at_{epoch}.keras"), -] -model.compile( - optimizer=keras.optimizers.Adam(3e-4), - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")], -) -model.fit( - train_ds, - epochs=epochs, - callbacks=callbacks, - validation_data=val_ds, -) \ No newline at end of file diff --git a/pokedex_EfficientNetV2M.py b/pokedex_EfficientNetV2M.py new file mode 100644 index 0000000..b9fd017 --- /dev/null +++ b/pokedex_EfficientNetV2M.py @@ -0,0 +1,118 @@ +import os +import gc +import keras +import tensorflow as tf +from keras import layers +from tensorflow import data as tf_data + +# --- GPU Strategy --- +strategy = tf.distribute.MirroredStrategy() +print("Number of GPUs:", strategy.num_replicas_in_sync) + +# --- Parameters --- +data_dir = "/home/users/d/divia/scratch/Combined_Dataset" +image_size = (240, 240) +num_classes = 151 +base_batch_size = 32 +base_lr = 1e-3 + +# --- Auto-Tune Batch Size --- +def find_max_batch_size(data_dir, image_size, candidate_sizes=[256, 128, 64, 32, 16]): + print("Tuning batch size...") + for bs in candidate_sizes: + try: + print(f"Trying global batch size {bs}...") + ds = keras.utils.image_dataset_from_directory( + data_dir, + labels="inferred", + label_mode="int", + image_size=image_size, + batch_size=bs, + shuffle=True + ) + for batch in ds.take(1): # Try to load one batch + tf.print("✓ Batch size", bs, "works") + del ds + gc.collect() + tf.keras.backend.clear_session() + return bs + except tf.errors.ResourceExhaustedError: + print(f"✗ Batch size {bs} too large.") + raise RuntimeError("No suitable batch size found.") + +global_batch_size = find_max_batch_size(data_dir, image_size) +scaled_lr = base_lr * (global_batch_size / base_batch_size) + +# --- Load Dataset --- +full_ds = keras.utils.image_dataset_from_directory( + data_dir, + labels="inferred", + label_mode="int", + image_size=image_size, + batch_size=global_batch_size, + shuffle=True, + seed=1234 +) + +# --- Train/Val Split --- +total_batches = tf.data.experimental.cardinality(full_ds).numpy() +train_size = int(0.8 * total_batches) +train_ds = full_ds.take(train_size) +val_ds = full_ds.skip(train_size) + +# --- Data Augmentation --- +data_augmentation_layers = keras.Sequential([ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), +]) + +def preprocess_train(img, label): + img = data_augmentation_layers(img) + label = tf.one_hot(label, num_classes) + return img, label + +def preprocess_val(img, label): + label = tf.one_hot(label, num_classes) + return img, label + +train_ds = train_ds.map(preprocess_train, num_parallel_calls=tf_data.AUTOTUNE) +val_ds = val_ds.map(preprocess_val, num_parallel_calls=tf_data.AUTOTUNE) + +train_ds = train_ds.prefetch(buffer_size=tf_data.AUTOTUNE) +val_ds = val_ds.prefetch(buffer_size=tf_data.AUTOTUNE) + +# --- Build & Compile Model --- +with strategy.scope(): + base_model = tf.keras.applications.EfficientNetV2M( + include_top=False, + weights='imagenet', + input_shape=(240, 240, 3) + ) + + model = keras.Sequential([ + base_model, + layers.GlobalAveragePooling2D(), + layers.Dense(256, activation='relu'), + layers.Dropout(0.5), + layers.Dense(num_classes, activation='softmax') + ]) + + optimizer = tf.keras.optimizers.Adam(learning_rate=scaled_lr) + + model.compile( + optimizer=optimizer, + loss='categorical_crossentropy', + metrics=['accuracy'] + ) + +# --- Train --- +callbacks = [ + keras.callbacks.ModelCheckpoint("EfficientNetV2M/save_at_{epoch}.keras") +] + +model.fit( + train_ds, + validation_data=val_ds, + epochs=10, + callbacks=callbacks +) \ No newline at end of file diff --git a/pokedex_ResNet50.py b/pokedex_ResNet50.py new file mode 100644 index 0000000..9c16244 --- /dev/null +++ b/pokedex_ResNet50.py @@ -0,0 +1,118 @@ +import os +import gc +import keras +import tensorflow as tf +from keras import layers +from tensorflow import data as tf_data + +# --- GPU Strategy --- +strategy = tf.distribute.MirroredStrategy() +print("Number of GPUs:", strategy.num_replicas_in_sync) + +# --- Parameters --- +data_dir = "/home/users/d/divia/scratch/Combined_Dataset" +image_size = (224, 224) +num_classes = 151 +base_batch_size = 32 +base_lr = 1e-3 + +# --- Auto-Tune Batch Size --- +def find_max_batch_size(data_dir, image_size, candidate_sizes=[256, 128, 64, 32, 16]): + print("Tuning batch size...") + for bs in candidate_sizes: + try: + print(f"Trying global batch size {bs}...") + ds = keras.utils.image_dataset_from_directory( + data_dir, + labels="inferred", + label_mode="int", + image_size=image_size, + batch_size=bs, + shuffle=True + ) + for batch in ds.take(1): # Try to load one batch + tf.print("✓ Batch size", bs, "works") + del ds + gc.collect() + tf.keras.backend.clear_session() + return bs + except tf.errors.ResourceExhaustedError: + print(f"✗ Batch size {bs} too large.") + raise RuntimeError("No suitable batch size found.") + +global_batch_size = find_max_batch_size(data_dir, image_size) +scaled_lr = base_lr * (global_batch_size / base_batch_size) + +# --- Load Dataset --- +full_ds = keras.utils.image_dataset_from_directory( + data_dir, + labels="inferred", + label_mode="int", + image_size=image_size, + batch_size=global_batch_size, + shuffle=True, + seed=1234 +) + +# --- Train/Val Split --- +total_batches = tf.data.experimental.cardinality(full_ds).numpy() +train_size = int(0.8 * total_batches) +train_ds = full_ds.take(train_size) +val_ds = full_ds.skip(train_size) + +# --- Data Augmentation --- +data_augmentation_layers = keras.Sequential([ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), +]) + +def preprocess_train(img, label): + img = data_augmentation_layers(img) + label = tf.one_hot(label, num_classes) + return img, label + +def preprocess_val(img, label): + label = tf.one_hot(label, num_classes) + return img, label + +train_ds = train_ds.map(preprocess_train, num_parallel_calls=tf_data.AUTOTUNE) +val_ds = val_ds.map(preprocess_val, num_parallel_calls=tf_data.AUTOTUNE) + +train_ds = train_ds.prefetch(buffer_size=tf_data.AUTOTUNE) +val_ds = val_ds.prefetch(buffer_size=tf_data.AUTOTUNE) + +# --- Build & Compile Model --- +with strategy.scope(): + base_model = tf.keras.applications.ResNet50( + include_top=False, + weights='imagenet', + input_shape=(224, 224, 3) + ) + + model = keras.Sequential([ + base_model, + layers.GlobalAveragePooling2D(), + layers.Dense(256, activation='relu'), + layers.Dropout(0.5), + layers.Dense(num_classes, activation='softmax') + ]) + + optimizer = tf.keras.optimizers.Adam(learning_rate=scaled_lr) + + model.compile( + optimizer=optimizer, + loss='categorical_crossentropy', + metrics=['accuracy'] + ) + +# --- Train --- +callbacks = [ + keras.callbacks.ModelCheckpoint("ResNet50/save_at_{epoch}.keras") +] + +model.fit( + train_ds, + validation_data=val_ds, + epochs=10, + callbacks=callbacks +) \ No newline at end of file diff --git a/test.py b/test.py deleted file mode 100644 index 1579836..0000000 --- a/test.py +++ /dev/null @@ -1,66 +0,0 @@ -import keras -import matplotlib.pyplot as plt -import numpy as np -import os -import random - -#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - -# Get class names from directory structure -temp_ds = keras.utils.image_dataset_from_directory( - "Combined_Dataset", - labels="inferred", - label_mode="int", - image_size=(256, 256), - batch_size=1, - shuffle=False -) -class_names = temp_ds.class_names - -# Load model -model = keras.models.load_model("simple_xception.keras") - -# Base path -base_path = "Combined_Dataset" - -# Prepare 2x2 plot -plt.figure(figsize=(10, 10)) - -for i in range(4): - # Pick random class and image - random_class = random.choice(class_names) - class_folder = os.path.join(base_path, random_class) - random_image = random.choice([ - f for f in os.listdir(class_folder) - if f.lower().endswith(('.png', '.jpg', '.jpeg')) - ]) - img_path = os.path.join(class_folder, random_image) - - # Load and preprocess - img = keras.utils.load_img(img_path, target_size=(256, 256)) - img_array = keras.utils.img_to_array(img) - img_array = keras.ops.expand_dims(img_array, 0) - - # Predict - predictions = model.predict(img_array, verbose=0) - probabilities = keras.ops.softmax(predictions[0]) - predicted_class_index = np.argmax(probabilities) - predicted_label = class_names[predicted_class_index] - confidence = 100 * probabilities[predicted_class_index] - - # Compare with actual - is_correct = predicted_label == random_class - - # Plot - ax = plt.subplot(2, 2, i + 1) - plt.imshow(img) - plt.axis("off") - plt.title( - f"Pred: {predicted_label}\n" - f"True: {random_class}\n" - f"{'Yes' if is_correct else 'No'} | {confidence:.1f}%", - fontsize=10 - ) - -plt.tight_layout() -plt.show() -- GitLab