diff --git a/python/pokedex_rpi.py b/python/pokedex_rpi.py index 258de2ed92d93d56edf62a1076cfb7345ac4ab8a..52a17af90e4a0f4184cc086e6a2bda91d2c0f760 100644 --- a/python/pokedex_rpi.py +++ b/python/pokedex_rpi.py @@ -1,60 +1,70 @@ import cv2 import numpy as np import json -from hailo_platform.pyhailort import HailoRT import argparse +from hailo_platform.pyhailort import HailoRT -# --- WHAT ? --- -parser = argparse.ArgumentParser(description="WHAT ?!") +# --- Argparse --- +parser = argparse.ArgumentParser(description="Pokémon Classifier Inference with Hailo-8") parser.add_argument("--model", choices=["1", "2"], required=True, help="1 = ResNet50, 2 = Xception") args = parser.parse_args() -# Paths +# --- Paths --- if args.model == "1": hef_path = "../models/ResNet50/pokedex_ResNet50.hef" json_path = "../models/ResNet50/class_names.json" - size=(224,224) + input_shape = (224, 224) elif args.model == "2": hef_path = "../models/Xception/pokedex_Xception.hef" json_path = "../models/Xception/class_names.json" - size=(256,256) + input_shape = (256, 256) +else: + raise ValueError("Invalid model selection") -# Load class names +# --- Load class names --- with open(json_path, "r") as f: class_names = json.load(f) +# --- Setup device and network --- device = HailoRT.Device() hef = HailoRT.Hef(hef_path) -configured_network_group = device.create_hef_group(hef) -input_vstream_info = configured_network_group.get_input_vstream_infos()[0] -output_vstream_info = configured_network_group.get_output_vstream_infos()[0] +network_group = device.create_hef_group(hef) + +input_info = network_group.get_input_vstream_infos()[0] +output_info = network_group.get_output_vstream_infos()[0] -# --- Open webcam --- +# --- Open webcam and capture image --- cap = cv2.VideoCapture(0) if not cap.isOpened(): print("-- Unable to open webcam") - exit() + exit(1) -print("-- Taking picture...") +print("-- Capturing image...") ret, frame = cap.read() cap.release() if not ret: print("-- Failed to capture image") - exit() + exit(1) # --- Preprocess image --- -image = cv2.resize(frame, size) -image = image.astype(np.float32) / 255.0 # Normalize to [0, 1] -image = np.expand_dims(image, axis=0) # Add batch dimension -image = np.transpose(image, (0, 3, 1, 2)) # NHWC ? NCHW if required (check your model) +image = cv2.resize(frame, input_shape) +image = image.astype(np.float32) + +# Standard Hailo normalization (if your model expects ImageNet style) +image -= [123.68, 116.779, 103.939] +# image /= 255.0 # only if your model was trained with [0,1] normalization + +# NHWC → NCHW +image = np.transpose(image, (2, 0, 1)) # (H, W, C) → (C, H, W) +image = np.expand_dims(image, axis=0) # Add batch dimension → (1, C, H, W) # --- Inference --- -with HailoRT.VirtualStreams(input_vstream_info, output_vstream_info, configured_network_group) as (input_vstreams, output_vstreams): +with HailoRT.VirtualStreams(input_info, output_info, network_group) as (input_vstreams, output_vstreams): input_vstreams[0].send(image) output_data = output_vstreams[0].recv() # --- Postprocess --- predicted_idx = int(np.argmax(output_data)) predicted_name = class_names[predicted_idx] -print(f"-- Predicted Pokémon: {predicted_name}") +print(f"🎯 Predicted Pokémon: {predicted_name}")