diff --git a/python/pokedex_rpi.py b/python/pokedex_rpi.py index 3585d56654284f6693be6cc1975f1a5af0893b71..7454e9a44026b2d6c5cd12d4ac7ec55a256b6f62 100644 --- a/python/pokedex_rpi.py +++ b/python/pokedex_rpi.py @@ -1,25 +1,25 @@ import json import argparse -import os import numpy as np from picamera2 import Picamera2 from picamera2.devices.hailo import Hailo import cv2 +import os # --- Argparse --- parser = argparse.ArgumentParser(description="Pokémon Classifier Inference with Hailo-8") parser.add_argument("--model", choices=["1", "2"], required=True, help="1 = ResNet50, 2 = Xception") args = parser.parse_args() -# --- Paths --- +# --- Paths & Input Sizes --- if args.model == "1": hef_path = "../models/ResNet50/pokedex_ResNet50.hef" json_path = "../models/ResNet50/class_names.json" - size=(224, 224) + input_shape = (224, 224) elif args.model == "2": hef_path = "../models/Xception/pokedex_Xception.hef" json_path = "../models/Xception/class_names.json" - size = (256, 256) + input_shape = (256, 256) else: raise ValueError("Invalid model selection") @@ -27,15 +27,13 @@ else: with open(json_path, "r") as f: class_names = json.load(f) -# --- Inference --- +# --- Hailo Inference with PiCamera2 --- with Hailo(hef_path) as hailo: - print(f"-- Hailo model input size: {size}") + print(f"-- Model input shape: {input_shape}") picam2 = Picamera2() - - # Use *raw* capture configuration with correct size config = picam2.create_still_configuration( - main={"size": size, "format": "RGB888"}, + main={"size": input_shape, "format": "RGB888"}, lores=None, display=None ) @@ -46,7 +44,7 @@ with Hailo(hef_path) as hailo: frame = picam2.capture_array() print(f"-- Captured frame shape: {frame.shape}") - # Optional: display image + # Optional: show image try: cv2.imshow("Captured Image", frame) print("-- Press any key to continue...") @@ -58,8 +56,14 @@ with Hailo(hef_path) as hailo: cv2.imwrite(output_path, frame) os.system(f"feh --fullscreen {output_path}") + # --- Preprocess (normalize + layout + batch) --- + image = frame.astype(np.float32) # Convert to float32 + image -= [123.68, 116.779, 103.939] # Subtract ImageNet mean + image = np.transpose(image, (2, 0, 1)) # HWC → CHW + image = np.expand_dims(image, axis=0) # Add batch dimension + print("-- Running inference...") - inference_results = hailo.run(frame) + inference_results = hailo.run(image) predicted_idx = int(np.argmax(inference_results)) predicted_name = class_names[predicted_idx]