diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..ff86f24
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
diff --git a/data/COLLABORA_02_RGB.png b/data/COLLABORA_02_RGB.png
new file mode 100644
index 0000000..46cd1f1
Binary files /dev/null and b/data/COLLABORA_02_RGB.png differ
diff --git a/data/Chinedu-Obasi_2684938.jpg b/data/Chinedu-Obasi_2684938.jpg
new file mode 100644
index 0000000..f60a259
Binary files /dev/null and b/data/Chinedu-Obasi_2684938.jpg differ
diff --git a/demo/football/README.md b/demo/football/README.md
new file mode 100644
index 0000000..7a33266
--- /dev/null
+++ b/demo/football/README.md
@@ -0,0 +1,41 @@
+# Football demo
+
+Real-time football broadcast overlay: **detection → tracking → overlay**
+(`pyml_yolo`/`pyml_objectdetector` -> `pyml_tracker` -> `pyml_football_overlay`).
+
+The overlay draws a foot ellipse per player coloured by team (red/blue, voted
+from jersey hue), a gold ellipse for referees, motion trails (off by default),
+and a focal-player HUD with headshot, ball contacts, and distance travelled.
+Players whose team isn't decided yet (and unclassifiable kits, e.g. the
+goalkeeper) are left unmarked rather than drawn in a placeholder colour. The
+ball is tracked for contact counting but its marker is off by default.
+
+## Run
+
+```bash
+# file -> annotated MP4
+demo/football/run.sh
+demo/football/run.sh 08fd33_4.mp4 demo/football/out.mp4 1280x720
+
+# file -> live on-screen
+demo/football/run.sh display
+demo/football/run.sh display 08fd33_4.mp4 1280x720
+
+# live camera -> on-screen
+demo/football/run.sh camera /dev/video0
+```
+
+## Environment knobs
+
+| Var        | Default | Meaning |
+|------------|---------|---------|
+| `BACKEND`  | `pt`    | `pt` = PyTorch `pyml_yolo`; `fp16` = ONNX FP16 via `pyml_objectdetector` (CUDA). |
+| `INTERVAL` | `3`     | Run detection every Nth frame; the tracker/overlay still update every frame, so it stays smooth at ~N× less inference cost. The main real-time lever. |
+
+```bash
+BACKEND=fp16 demo/football/run.sh display     # faster inference path
+INTERVAL=5   demo/football/run.sh display     # detect every 5th frame
+INTERVAL=1   demo/football/run.sh             # detect every frame (max accuracy)
+```
+
+
diff --git a/demo/football/onnx_loop.py b/demo/football/onnx_loop.py
new file mode 100644
index 0000000..bbe9540
--- /dev/null
+++ b/demo/football/onnx_loop.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# Run a video through the ONNX (fp16) football pipeline.
+#
+#   detector (onnx) -> pyml_tracker -> pyml_football_overlay
+#
+# Usage:
+#   python demo/football/onnx_loop.py INPUT.mp4             # live display, looping
+#   python demo/football/onnx_loop.py INPUT.mp4 OUTPUT.mp4  # write annotated mp4
+# (self-contained: finds the repo venv + plugins and re-execs into them)
+import os
+import sys
+import glob
+
+REPO = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+VENV = os.path.join(REPO, ".venv")
+MODEL = os.path.join(REPO, "models/football/football_fp16.onnx")
+os.environ["GST_PLUGIN_PATH"] = (
+    os.path.join(REPO, "plugins") + os.pathsep + os.environ.get("GST_PLUGIN_PATH", "")
+)
+if not os.environ.get("_ONNX_LOOP_REEXEC") and os.path.isdir(VENV):
+    os.environ["VIRTUAL_ENV"] = VENV
+    os.environ["PATH"] = (
+        os.path.join(VENV, "bin") + os.pathsep + os.environ.get("PATH", "")
+    )
+    libs = sorted(
+        set(
+            glob.glob(
+                os.path.join(
+                    VENV, "lib", "python*", "site-packages", "nvidia", "*", "lib"
+                )
+            )
+        )
+    )
+    if libs:
+        os.environ["LD_LIBRARY_PATH"] = os.pathsep.join(
+            [*libs, os.environ.get("LD_LIBRARY_PATH", "")]
+        )
+    os.environ["_ONNX_LOOP_REEXEC"] = "1"
+    pybin = os.path.join(VENV, "bin", "python")
+    exe = pybin if os.path.exists(pybin) else sys.executable
+    os.execv(exe, [exe, *sys.argv])
+
+import gi  # noqa: E402
+
+gi.require_version("Gst", "1.0")
+from gi.repository import Gst, GLib  # noqa: E402
+
+Gst.init(None)
+
+
+def on_message(bus, message, loop, pipeline, do_loop):
+    t = message.type
+
+    if t == Gst.MessageType.EOS:
+        if do_loop:
+            # Display mode: seek back to the start to loop the clip.
+            print("Looping...")
+            if not pipeline.seek_simple(
+                Gst.Format.TIME, Gst.SeekFlags.FLUSH | Gst.SeekFlags.KEY_UNIT, 0
+            ):
+                print("Failed to seek back to start", file=sys.stderr)
+                loop.quit()
+        else:
+            # mp4 mode: end of file, the muxer has finalized the file.
+            loop.quit()
+
+    elif t == Gst.MessageType.ERROR:
+        err, debug = message.parse_error()
+        print(f"ERROR: {err}", file=sys.stderr)
+        if debug:
+            print(f"DEBUG: {debug}", file=sys.stderr)
+        loop.quit()
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(f"usage: {sys.argv[0]} INPUT.mp4 [OUTPUT.mp4]", file=sys.stderr)
+        print(
+            "  no OUTPUT -> live display (looping); OUTPUT -> write annotated mp4",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    video = os.path.abspath(sys.argv[1])
+    out = os.path.abspath(sys.argv[2]) if len(sys.argv) > 2 else None
+
+    # Shared detection + overlay chain. Feed the ORIGINAL resolution:
+    # pyml_objectdetector letterboxes to the model's 640 internally for
+    # inference and maps boxes back, so the overlay stays full-res.
+    chain = (
+        f"filesrc location={video} ! "
+        "decodebin ! videoconvert ! video/x-raw,format=RGB ! "
+        "queue max-size-buffers=8 max-size-time=0 max-size-bytes=0 ! "
+        "pyml_objectdetector engine-name=onnx "
+        f"  model-name={MODEL} device=cuda:0 "
+        "  input-format=nchw post-process=anchor_free interval=1 "
+        "  confidence=0.1 nms-iou=0.7 ! "
+        "queue max-size-buffers=8 max-size-time=0 max-size-bytes=0 ! "
+        "pyml_tracker tracker-type=bytetrack new-track-confidence=0.25 ! "
+        "videoconvert ! video/x-raw,format=RGBA ! "
+        "queue max-size-buffers=8 max-size-time=0 max-size-bytes=0 ! "
+        "pyml_football_overlay class-names=ball,goalkeeper,player,referee "
+        "  team-colors=true trails=false show-ids=false show-labels=false "
+        "  draw-from-detections=true min-confidence=0 merge-iou=0.5 "
+        "  position-smoothing=0.7 highlight-focal=false ! "
+    )
+    if out:
+        pipeline_description = (
+            chain + "queue max-size-buffers=8 max-size-time=0 max-size-bytes=0 ! "
+            "videoconvert ! openh264enc ! h264parse ! mp4mux ! "
+            f"filesink location={out}"
+        )
+        do_loop = False
+    else:
+        # Pre-roll buffer absorbs inference jitter for smooth real-time display.
+        pipeline_description = (
+            chain + "queue max-size-buffers=600 max-size-time=0 max-size-bytes=0 "
+            "  min-threshold-buffers=30 ! "
+            "videoconvert ! autovideosink sync=true"
+        )
+        do_loop = True
+
+    print(pipeline_description)
+    print(f"writing -> {out}" if out else "live display (looping)")
+
+    try:
+        pipeline = Gst.parse_launch(pipeline_description)
+    except GLib.Error as e:
+        print(f"Failed to create pipeline: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    loop = GLib.MainLoop()
+
+    bus = pipeline.get_bus()
+    bus.add_signal_watch()
+    bus.connect("message", on_message, loop, pipeline, do_loop)
+
+    pipeline.set_state(Gst.State.PLAYING)
+
+    try:
+        loop.run()
+    except KeyboardInterrupt:
+        if out:
+            # Finalize the mp4 on Ctrl-C: send EOS and wait for the muxer to
+            # flush its trailer, otherwise the file is left unplayable.
+            pipeline.send_event(Gst.Event.new_eos())
+            bus.timed_pop_filtered(
+                5 * Gst.SECOND, Gst.MessageType.EOS | Gst.MessageType.ERROR
+            )
+    finally:
+        pipeline.set_state(Gst.State.NULL)
+    if out:
+        print(f"Done: {out}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/demo/football/run.sh b/demo/football/run.sh
new file mode 100755
index 0000000..0be9a15
--- /dev/null
+++ b/demo/football/run.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# Football broadcast-overlay demo.
+#
+# Ppipeline:
+#   detector  ->  pyml_tracker (ByteTrack)  ->  pyml_football_overlay
+#
+# Usage:
+#   demo/football/run.sh [INPUT.mp4] [OUTPUT.mp4] [WxH]      # file -> annotated mp4
+#   demo/football/run.sh display [INPUT.mp4] [WxH]           # file -> live on-screen
+#   demo/football/run.sh camera [/dev/videoN] [WxH]          # live camera -> on-screen
+set -euo pipefail
+
+REPO="$(cd "$(dirname "$0")/../.." && pwd)"
+cd "$REPO"
+source .venv/bin/activate
+export GST_PLUGIN_PATH="$REPO/plugins:${GST_PLUGIN_PATH:-}"
+
+BACKEND="${BACKEND:-pt}"
+INTERVAL="${INTERVAL:-3}"   # run detection every Nth frame; tracker/overlay stay per-frame
+CONF="${CONF:-0.1}"        # detector confidence threshold (low = more detections)
+IOU="${IOU:-0.7}"          # NMS IoU (ultralytics/football_analyzer default)
+NEWTRACK="${NEWTRACK:-0.25}" # min confidence to START a new track (ByteTrack gate; kills ghosts)
+DRAWCONF="${DRAWCONF:-0}"  # min confidence to DRAW a detection (0 = draw all; raise to trim weak boxes)
+MERGE="${MERGE:-0.5}"      # collapse overlapping boxes (lower=merge more; 0 disables) so one player=one circle
+SMOOTH="${SMOOTH:-0.6}"    # temporal EMA on circle positions (0=off, higher=smoother but more lag)
+CLASSES="ball,goalkeeper,player,referee"
+TRACK="pyml_tracker tracker-type=bytetrack new-track-confidence=$NEWTRACK"
+# Detection-based overlay: circles sit on the raw per-frame detections (no
+# tracking drift/phantoms/doubles); merge collapses overlaps and
+# position-smoothing low-passes the positions. DRAWCONF defaults 0 so no
+# detection is hidden; the tracker still runs so the HUD keeps its stats.
+OVERLAY="pyml_football_overlay class-names=$CLASSES team-colors=true trails=false show-ids=false show-labels=false draw-from-detections=true min-confidence=$DRAWCONF merge-iou=$MERGE position-smoothing=$SMOOTH highlight-focal=false"
+
+if [[ "$BACKEND" == "fp16" ]]; then
+  export LD_LIBRARY_PATH="$(python -c "import os,nvidia,glob;b=os.path.dirname(nvidia.__file__);print(':'.join(sorted(set(glob.glob(b+'/*/lib')))))"):${LD_LIBRARY_PATH:-}"
+  DETECT="pyml_objectdetector engine-name=onnx model-name=models/football/football_fp16.onnx device=cuda:0 input-format=nchw post-process=anchor_free interval=$INTERVAL"
+  IN_FMT="RGB"; FORCE_SQUARE=1
+else
+  DETECT="pyml_yolo model-name=models/football/football device=cuda:0 interval=$INTERVAL confidence=$CONF nms-iou=$IOU"
+  IN_FMT="RGBA"; FORCE_SQUARE=0
+fi
+
+POST_DETECT="$TRACK"
+[[ "$IN_FMT" == "RGB" ]] && POST_DETECT="$TRACK ! videoconvert ! video/x-raw,format=RGBA"
+
+# A queue at each stage boundary turns the serial chain into a threaded
+# pipeline: while inference runs on frame N, the sink renders N-1 and the
+# decoder reads N+1. Nothing is dropped (leaky=no, the default).
+Q="queue max-size-buffers=8 max-size-time=0 max-size-bytes=0"
+# Pre-roll buffer before the display sink: build a head start of processed
+# frames so real-time playback (sync=true) rides out per-frame inference
+# jitter without stuttering. Smooths jitter, not a sustained throughput
+# deficit -- if inference can't keep up on average, playback just lags
+# (still no drops). Lower INTERVAL/raise the head start if it falls behind.
+PREROLL="queue max-size-buffers=600 max-size-time=0 max-size-bytes=0 min-threshold-buffers=30"
+
+# detector -> tracker -> overlay, with a thread boundary at each hop.
+CHAIN="$Q ! $DETECT ! $Q ! $POST_DETECT ! $Q ! $OVERLAY"
+
+MODE="${1:-file}"
+if [[ "$MODE" == "camera" ]]; then
+  DEV="${2:-/dev/video0}"; SIZE="${3:-1280x720}"
+  [[ "$FORCE_SQUARE" == "1" ]] && SIZE="640x640"
+  W="${SIZE%x*}"; H="${SIZE#*x}"
+  echo "[$BACKEND] live camera $DEV @ ${W}x${H} -> autovideosink (needs a display)"
+  exec gst-launch-1.0 -e \
+    v4l2src device="$DEV" ! videoconvert ! videoscale \
+    ! "video/x-raw,width=${W},height=${H},format=${IN_FMT}" \
+    ! $CHAIN \
+    ! $Q ! videoconvert ! autovideosink sync=false
+elif [[ "$MODE" == "display" ]]; then
+  IN="${2:-data/soccer_tracking.mp4}"
+  SIZE="${3:-1280x720}"
+  [[ "$FORCE_SQUARE" == "1" ]] && SIZE="640x640"
+  W="${SIZE%x*}"; H="${SIZE#*x}"
+  [[ -f "$IN" ]] || { echo "input not found: $IN" >&2; exit 1; }
+  echo "[$BACKEND] '$IN' @ ${W}x${H} -> live display (real-time, sync=true)"
+  exec gst-launch-1.0 -e \
+    filesrc location="$IN" ! decodebin ! videoconvert ! videoscale \
+    ! "video/x-raw,width=${W},height=${H},format=${IN_FMT}" \
+    ! $CHAIN \
+    ! $PREROLL ! videoconvert ! autovideosink sync=true
+else
+  IN="${1:-data/soccer_tracking.mp4}"
+  OUT="${2:-demo/football/out.mp4}"
+  SIZE="${3:-1280x720}"
+  [[ "$FORCE_SQUARE" == "1" ]] && SIZE="640x640"
+  W="${SIZE%x*}"; H="${SIZE#*x}"
+  [[ -f "$IN" ]] || { echo "input not found: $IN" >&2; exit 1; }
+  echo "[$BACKEND] '$IN' @ ${W}x${H} -> '$OUT'"
+  gst-launch-1.0 -e \
+    filesrc location="$IN" ! decodebin ! videoconvert ! videoscale \
+    ! "video/x-raw,width=${W},height=${H},format=${IN_FMT}" \
+    ! $CHAIN \
+    ! $Q ! videoconvert ! openh264enc ! h264parse ! mp4mux ! filesink location="$OUT"
+  echo "Done: $OUT"
+fi
diff --git a/models/football/football.onnx b/models/football/football.onnx
new file mode 100644
index 0000000..0d742a2
--- /dev/null
+++ b/models/football/football.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80c093f8f67e866232f3e31e71809071cf4f6c97914ab7c0cd82cbb8d6e30dfb
+size 101508645
diff --git a/models/football/football.pt b/models/football/football.pt
new file mode 100644
index 0000000..e1fa8fb
--- /dev/null
+++ b/models/football/football.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd531b4739e544b075479d6a41118931e82f8362d576258218e8fab2e4bdfa9
+size 51178706
diff --git a/models/football/football_fp16.onnx b/models/football/football_fp16.onnx
new file mode 100644
index 0000000..6f15606
--- /dev/null
+++ b/models/football/football_fp16.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6589a1567088115f8e84564e938e09938f152b4b42902e525359bef601e350
+size 50859790
diff --git a/models/football/football_int8.onnx b/models/football/football_int8.onnx
new file mode 100644
index 0000000..4dc0e6f
--- /dev/null
+++ b/models/football/football_int8.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af48014db347b8821efd7d107819cec2959aeb99cbb99c272e6ea9e4bd938519
+size 30817706
diff --git a/plugins/python/base_objectdetector.py b/plugins/python/base_objectdetector.py
index 84f2f2d..5b14f02 100644
--- a/plugins/python/base_objectdetector.py
+++ b/plugins/python/base_objectdetector.py
@@ -46,6 +46,11 @@ def __init__(self):
         self.metadata = Metadata("si")
         self.logger.info("Initialized BaseObjectDetector")
         self.__track = False
+        self.__interval = 1
+        self._det_counter = 0
+        self._cached_results = None
+        self._cached_num_sources = 1
+        self._cached_id = None
 
     @GObject.Property(type=bool, default=False)
     def track(self):
@@ -60,6 +65,17 @@ def track(self, value):
         if self.engine:
             self.engine.track = value
 
+    @GObject.Property(type=int, default=1, minimum=1, maximum=10000)
+    def interval(self):
+        "Run detection every Nth frame and re-attach the previous detections on "
+        "the frames in between (N=1 runs detection every frame). Lets downstream "
+        "tracking/overlay stay per-frame while detection runs at a lower rate."
+        return self.__interval
+
+    @interval.setter
+    def interval(self, value):
+        self.__interval = max(1, int(value))
+
     def do_forward(self, frames):
         self.logger.info(
             f"Forward called with frames shape: {frames.shape if frames is not None else 'None'}"
@@ -77,47 +93,39 @@ def do_transform_ip(self, buf):
         """
         self.logger.info(f"Transforming buffer: {hex(id(buf))}")
         try:
-            # Use MuxedBufferProcessor to extract frames and metadata
-            muxed_processor = MuxedBufferProcessor(
-                self.logger,
-                self.width,
-                self.height,
-                self.framerate_num,
-                self.framerate_denom,
-            )
-            frames, id_str, num_sources, format = muxed_processor.extract_frames(
-                buf, self.sinkpad
-            )
-            if frames is None:
-                self.logger.error("Failed to extract frames")
-                return Gst.FlowReturn.ERROR
-
-            # Process frames (single or batch)
-            results = self.do_forward(frames)
-            if results is None:
-                self.logger.error("Inference returned None")
-                return Gst.FlowReturn.ERROR
-
-            # Handle single-frame case
-            if num_sources == 1:
-                self.do_decode(buf, results, stream_idx=0)
-            # Handle batch case
-            else:
-                self.logger.info(
-                    f"Processing batch with ID={id_str}, num_sources={num_sources}"
+            run_detect = (self._det_counter % self.__interval) == 0
+            self._det_counter += 1
+
+            if run_detect:
+                # Use MuxedBufferProcessor to extract frames and metadata
+                muxed_processor = MuxedBufferProcessor(
+                    self.logger,
+                    self.width,
+                    self.height,
+                    self.framerate_num,
+                    self.framerate_denom,
+                )
+                frames, id_str, num_sources, format = muxed_processor.extract_frames(
+                    buf, self.sinkpad
                 )
-                results_list = results if isinstance(results, list) else [results]
-                if len(results_list) != num_sources:
-                    self.logger.error(
-                        f"Expected {num_sources} results, got {len(results_list)}"
-                    )
+                if frames is None:
+                    self.logger.error("Failed to extract frames")
                     return Gst.FlowReturn.ERROR
 
-                for idx, result in enumerate(results_list):
-                    if result is None:
-                        self.logger.warning(f"Frame {idx} result is None")
-                        continue
-                    self.do_decode(buf, result, stream_idx=idx)
+                results = self.do_forward(frames)
+                if results is None:
+                    self.logger.error("Inference returned None")
+                    return Gst.FlowReturn.ERROR
+
+                self._cached_results = results
+                self._cached_num_sources = num_sources
+                self._decode_results(buf, results, num_sources)
+            elif self._cached_results is not None:
+                # Skip inference on this frame and re-attach the previous
+                # detections so downstream tracking/overlay stay per-frame.
+                self._decode_results(
+                    buf, self._cached_results, self._cached_num_sources
+                )
 
             attached_meta = GstAnalytics.buffer_get_analytics_relation_meta(buf)
             if attached_meta:
@@ -132,6 +140,22 @@ def do_transform_ip(self, buf):
             self.logger.error(f"Transform error: {e}\n{traceback.format_exc()}")
             return Gst.FlowReturn.ERROR
 
+    def _decode_results(self, buf, results, num_sources):
+        if num_sources == 1:
+            self.do_decode(buf, results, stream_idx=0)
+        else:
+            results_list = results if isinstance(results, list) else [results]
+            if len(results_list) != num_sources:
+                self.logger.error(
+                    f"Expected {num_sources} results, got {len(results_list)}"
+                )
+                return
+            for idx, result in enumerate(results_list):
+                if result is None:
+                    self.logger.warning(f"Frame {idx} result is None")
+                    continue
+                self.do_decode(buf, result, stream_idx=idx)
+
     def do_decode(self, buf, output, stream_idx=0):
         self.logger.info(
             f"Decoding for stream {stream_idx}: {output} (type: {type(output)})"
diff --git a/plugins/python/engine/drpai_engine.py b/plugins/python/engine/drpai_engine.py
new file mode 100644
index 0000000..20c0ca9
--- /dev/null
+++ b/plugins/python/engine/drpai_engine.py
@@ -0,0 +1,145 @@
+# DRPAIEngine
+# Copyright (C) 2024-2026 Collabora Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+import os
+import numpy as np
+
+from .ml_engine import MLEngine
+
+
+def _anchor_count(imgsz):
+    """Total anchors a YOLO model emits for a square input at strides 8/16/32."""
+    return sum((imgsz // s) ** 2 for s in (8, 16, 32))
+
+
+class DRPAIEngine(MLEngine):
+    """DRP-AI TVM runtime engine for Renesas RZ/V boards (RZ/V2H).
+
+    Runs a model compiled with the Renesas DRP-AI TVM compiler on the DRP-AI
+    NPU. `model_name` is the path to the compiled deploy directory containing
+    ``deploy.so`` / ``deploy.json`` / ``deploy.params``.
+
+    Inference goes through the ``drpai_runtime`` pybind11 module (built from
+    ``rzv2h/`` against the board's DRP-AI TVM runtime.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.runtime = None
+        self.model_name = None
+        self.kwargs = None
+        self.imgsz = 640
+
+        self.input_format = "nchw"
+        self.post_process = "anchor_free"
+
+    def do_load_model(self, model_name, **kwargs):
+        self.model_name = model_name
+        self.kwargs = kwargs
+        imgsz = kwargs.get("imgsz")
+        if imgsz:
+            try:
+                self.imgsz = int(imgsz)
+            except (TypeError, ValueError):
+                pass
+
+        try:
+            import drpai_runtime
+        except ImportError as e:
+            self.logger.error(
+                "drpai_runtime module not found. Build the pybind11 binding in "
+                "rzv2h/ inside the RZ/V2H DRP-AI TVM SDK and put it on PYTHONPATH "
+                f"(see rzv2h/README.md). Import error: {e}"
+            )
+            return False
+
+        if not os.path.isdir(model_name):
+            self.logger.error(
+                f"DRP-AI model directory not found: {model_name!r} "
+                "(expected a folder with deploy.so/json/params)"
+            )
+            return False
+
+        try:
+            self.runtime = drpai_runtime.Runtime()
+            if not self.runtime.load(model_name):
+                self.logger.error(f"DRP-AI failed to load model from {model_name}")
+                self.runtime = None
+                return False
+            self.logger.info(
+                f"DRP-AI model loaded from {model_name} (imgsz={self.imgsz})"
+            )
+            return True
+        except Exception as e:
+            self.logger.error(f"DRP-AI load error: {e}")
+            self.runtime = None
+            return False
+
+    def do_set_device(self, device):
+        self.device = device
+        self.logger.info(f"DRP-AI engine device set to {device}")
+
+    def do_generate(self, input_text, max_length=1000, system_prompt=None):
+        raise NotImplementedError(
+            "DRP-AI engine is a vision-inference engine; text generation is not "
+            "supported."
+        )
+
+    def _preprocess(self, frame_hwc):
+        """HWC uint8 RGB(A) frame -> contiguous (1, 3, H, W) float32 in [0, 1]."""
+        x = np.asarray(frame_hwc, dtype=np.float32)
+        if x.shape[-1] > 3:
+            x = x[..., :3]
+        x = x / 255.0
+        x = np.transpose(x, (2, 0, 1))
+        x = np.expand_dims(x, 0)
+        return np.ascontiguousarray(x, dtype=np.float32)
+
+    def _gather_output(self):
+        """Read output 0 and reshape the flat buffer to (1, 4+nc, anchors)."""
+        out = np.asarray(self.runtime.get_output(0), dtype=np.float32).reshape(-1)
+        anchors = _anchor_count(self.imgsz)
+        if anchors and out.size % anchors == 0:
+            channels = out.size // anchors
+            return out.reshape(1, channels, anchors)
+        self.logger.warning(
+            f"DRP-AI output size {out.size} not divisible by {anchors} anchors; "
+            "passing raw to post-process"
+        )
+        return out
+
+    def do_forward(self, frames):
+        if self.runtime is None:
+            self.logger.error("DRP-AI runtime not loaded")
+            return None
+
+        is_batch = isinstance(frames, np.ndarray) and frames.ndim == 4
+        batch = frames if is_batch else frames[np.newaxis, ...]
+
+        results = []
+        for img in batch:
+            try:
+                self.runtime.set_input(0, self._preprocess(img))
+                self.runtime.run()
+                raw = self._gather_output()
+                results.append(self._apply_post_process(raw, is_batch=False))
+            except Exception as e:
+                self.logger.error(f"DRP-AI inference error: {e}")
+                results.append(None)
+
+        return results if is_batch else results[0]
diff --git a/plugins/python/engine/engine_factory.py b/plugins/python/engine/engine_factory.py
index 2a0e5fb..d361bdf 100644
--- a/plugins/python/engine/engine_factory.py
+++ b/plugins/python/engine/engine_factory.py
@@ -44,6 +44,7 @@ class EngineFactory:
     MIGRAPHX_ENGINE = "migraphx"
     IREE_ENGINE = "iree"
     NCNN_ENGINE = "ncnn"
+    DRPAI_ENGINE = "drpai"
 
     _builtins_registered: bool = False  # Class-level flag for singleton-like lazy init
 
@@ -154,6 +155,13 @@ def _register_builtins(cls) -> None:
         except ImportError:
             pass
 
+        try:
+            from .drpai_engine import DRPAIEngine
+
+            _try_register(cls.DRPAI_ENGINE, DRPAIEngine)
+        except ImportError:
+            pass
+
     @staticmethod
     def register(engine_type: str, engine_class: Type) -> None:
         _engine_registry[engine_type] = engine_class
diff --git a/plugins/python/engine/ml_engine.py b/plugins/python/engine/ml_engine.py
index 3d5a74c..58f2e9d 100644
--- a/plugins/python/engine/ml_engine.py
+++ b/plugins/python/engine/ml_engine.py
@@ -96,7 +96,12 @@ def _apply_post_process(self, raw, is_batch):
         if pp != "none" and not isinstance(raw, list):
             from utils.detection_decoder import decode
 
-            results = decode(raw, pp)
+            results = decode(
+                raw,
+                pp,
+                conf_threshold=getattr(self, "conf", 0.25),
+                iou_threshold=getattr(self, "iou", 0.45),
+            )
             return results[0] if not is_batch else results
         return raw
 
diff --git a/plugins/python/engine/onnx_engine.py b/plugins/python/engine/onnx_engine.py
index ba85e84..ad3dce7 100644
--- a/plugins/python/engine/onnx_engine.py
+++ b/plugins/python/engine/onnx_engine.py
@@ -49,6 +49,62 @@ def _input_is_nchw(self):
         shape = self.session.get_inputs()[0].shape
         return len(shape) == 4 and shape[1] in (1, 3, 4)
 
+    def _model_input_hw(self):
+        """(H, W) the model's input expects, or None if dynamic/unknown."""
+        if self.session is None:
+            return None
+        shape = self.session.get_inputs()[0].shape
+        if len(shape) != 4:
+            return None
+        h, w = shape[2], shape[3]
+        if isinstance(h, int) and isinstance(w, int) and h > 0 and w > 0:
+            return (h, w)
+        return None
+
+    def _letterbox(self, frames, is_batch):
+        """Resize frame(s) to the model input size, preserving aspect ratio with
+        grey padding (YOLO-style). Returns (processed, transform); transform =
+        (ratio, pad_x, pad_y, orig_w, orig_h) maps model coords back to the
+        original frame. Returns (frames, None) when no resize is needed (already
+        model-sized, or dynamic input) -- so pre-sized callers are unaffected."""
+        import numpy as np
+        import cv2
+
+        mhw = self._model_input_hw()
+        if mhw is None:
+            return frames, None
+        mh, mw = mhw
+        imgs = frames if is_batch else frames[None]
+        h, w = int(imgs.shape[1]), int(imgs.shape[2])
+        if (h, w) == (mh, mw):
+            return frames, None
+        r = min(mh / h, mw / w)
+        nh, nw = int(round(h * r)), int(round(w * r))
+        pad_x, pad_y = (mw - nw) // 2, (mh - nh) // 2
+        out = np.full((imgs.shape[0], mh, mw, imgs.shape[3]), 114, dtype=imgs.dtype)
+        for i in range(imgs.shape[0]):
+            out[i, pad_y : pad_y + nh, pad_x : pad_x + nw] = cv2.resize(
+                imgs[i], (nw, nh), interpolation=cv2.INTER_LINEAR
+            )
+        proc = out if is_batch else out[0]
+        return proc, (r, float(pad_x), float(pad_y), w, h)
+
+    def _unletterbox(self, results, transform):
+        """Map detection boxes from model coords back to original-frame coords."""
+        import numpy as np
+
+        r, pad_x, pad_y, ow, oh = transform
+        for res in results if isinstance(results, list) else [results]:
+            if not isinstance(res, dict):
+                continue
+            b = res.get("boxes")
+            if b is None or len(b) == 0:
+                continue
+            b = np.asarray(b, dtype=np.float32).copy()
+            b[:, [0, 2]] = ((b[:, [0, 2]] - pad_x) / r).clip(0, ow)
+            b[:, [1, 3]] = ((b[:, [1, 3]] - pad_y) / r).clip(0, oh)
+            res["boxes"] = b
+
     def do_load_model(self, model_name, **kwargs):
         """Load a pre-trained model by name from TorchVision, Transformers (via Optimum ONNX), or a local ONNX path."""
         processor_name = kwargs.get("processor_name")
@@ -369,10 +425,21 @@ def do_forward(self, frames):
             fmt = self.input_format
             if fmt == "auto" and self._input_is_nchw():
                 self.input_format = "nchw"
-            img = self._apply_input_format(frames.astype(np.float32) / 255.0, is_batch)
+            # Letterbox to the model's fixed input size for inference, keeping
+            # the transform so boxes map back to the original frame -- lets the
+            # caller feed full-res frames and overlay on them.
+            proc, transform = self._letterbox(frames, is_batch)
+            img = self._apply_input_format(proc.astype(np.float32) / 255.0, is_batch)
+            if "float16" in self.session.get_inputs()[0].type:
+                img = img.astype(np.float16)
             outputs = self.session.run(self.output_names, {self.input_names[0]: img})
             raw = outputs if len(outputs) > 1 else outputs[0]
-            return self._apply_post_process(raw, is_batch)
+            if isinstance(raw, np.ndarray) and raw.dtype != np.float32:
+                raw = raw.astype(np.float32)
+            results = self._apply_post_process(raw, is_batch)
+            if transform is not None:
+                self._unletterbox(results, transform)
+            return results
 
         else:
             raise ValueError("Unsupported model type.")
diff --git a/plugins/python/football_analyzer.py b/plugins/python/football_analyzer.py
new file mode 100644
index 0000000..545be61
--- /dev/null
+++ b/plugins/python/football_analyzer.py
@@ -0,0 +1,906 @@
+# FootballAnalyzer
+# Copyright (C) 2024-2026 Collabora Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+import os
+import pickle
+
+from log.global_logger import GlobalLogger
+
+CAN_REGISTER_ELEMENT = True
+try:
+    import gi
+
+    gi.require_version("Gst", "1.0")
+    gi.require_version("GstBase", "1.0")
+    gi.require_version("GstVideo", "1.0")
+    from gi.repository import Gst, GstBase, GstVideo, GObject  # noqa: E402
+
+    # Define caps before the optional heavy imports so the element's pad
+    # templates still resolve when an optional dep (e.g. supervision) is missing;
+    # only registration is then skipped (CAN_REGISTER_ELEMENT=False).
+    VIDEO_CAPS = Gst.Caps.from_string("video/x-raw, format=BGR")
+
+    import cv2
+    import numpy as np
+    import supervision as sv
+    from ultralytics import YOLO
+
+    from log.logger_factory import LoggerFactory  # noqa: E402
+
+except ImportError as e:
+    CAN_REGISTER_ELEMENT = False
+    GlobalLogger().warning(
+        f"The 'pyml_football_analyzer' element will not be available. Error: {e}"
+    )
+
+
+def get_center_of_bbox(bbox):
+    x1, y1, x2, y2 = bbox
+    return int((x1 + x2) / 2), int((y1 + y2) / 2)
+
+
+def get_bbox_width(bbox):
+    return bbox[2] - bbox[0]
+
+
+class Tracker:
+    """Tracker."""
+
+    def __init__(self, model_path):
+        self.model = YOLO(model_path)
+        self.tracker = sv.ByteTrack()
+        self.sift = cv2.SIFT_create()
+        self.matcher = cv2.BFMatcher(cv2.NORM_L2)
+
+    def _foreground_mask(self, shape, frame_tracks, dilation=15):
+        h, w = shape[:2]
+        mask = np.full((h, w), 255, dtype=np.uint8)
+        bboxes = []
+        for key in ("players", "referees", "ball"):
+            for obj in frame_tracks.get(key, {}).values():
+                bboxes.append(obj["bbox"])
+        for bbox in bboxes:
+            x1 = max(0, int(bbox[0]) - dilation)
+            y1 = max(0, int(bbox[1]) - dilation)
+            x2 = min(w, int(bbox[2]) + dilation)
+            y2 = min(h, int(bbox[3]) + dilation)
+            mask[y1:y2, x1:x2] = 0
+        return mask
+
+    def get_camera_motion(
+        self,
+        frames,
+        tracks,
+        read_from_stub=False,
+        stub_path=None,
+        ratio=0.75,
+        ransac_thresh=3.0,
+        min_matches=8,
+    ):
+        if read_from_stub and stub_path is not None and os.path.exists(stub_path):
+            with open(stub_path, "rb") as f:
+                return pickle.load(f)
+
+        cumulative = [np.eye(3, dtype=np.float64)]
+        prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
+        prev_mask = self._foreground_mask(
+            frames[0].shape, {k: tracks[k][0] for k in tracks}
+        )
+        prev_kp, prev_desc = self.sift.detectAndCompute(prev_gray, prev_mask)
+
+        for i in range(1, len(frames)):
+            curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
+            curr_mask = self._foreground_mask(
+                frames[i].shape, {k: tracks[k][i] for k in tracks}
+            )
+            curr_kp, curr_desc = self.sift.detectAndCompute(curr_gray, curr_mask)
+
+            H_step = np.eye(3, dtype=np.float64)
+            if (
+                prev_desc is not None
+                and curr_desc is not None
+                and len(prev_desc) >= 2
+                and len(curr_desc) >= 2
+            ):
+                knn = self.matcher.knnMatch(prev_desc, curr_desc, k=2)
+                good = [
+                    m
+                    for pair in knn
+                    if len(pair) == 2
+                    for m, n in [pair]
+                    if m.distance < ratio * n.distance
+                ]
+                if len(good) >= min_matches:
+                    pts_prev = np.float32(
+                        [prev_kp[m.queryIdx].pt for m in good]
+                    ).reshape(-1, 1, 2)
+                    pts_curr = np.float32(
+                        [curr_kp[m.trainIdx].pt for m in good]
+                    ).reshape(-1, 1, 2)
+                    H, _ = cv2.findHomography(
+                        pts_prev, pts_curr, cv2.RANSAC, ransac_thresh
+                    )
+                    if H is not None:
+                        H_step = H
+
+            cumulative.append(H_step @ cumulative[-1])
+            prev_kp, prev_desc = curr_kp, curr_desc
+
+        if stub_path is not None:
+            with open(stub_path, "wb") as f:
+                pickle.dump(cumulative, f)
+        return cumulative
+
+    def detect_frames(self, frames):
+        batch_size = 20
+        detections = []
+        for i in range(0, len(frames), batch_size):
+            detections_batch = self.model.predict(frames[i : i + batch_size], conf=0.1)
+            detections += detections_batch
+        return detections
+
+    def get_object_tracks(self, frames, read_from_stub=False, stub_path=None):
+        if read_from_stub and stub_path is not None and os.path.exists(stub_path):
+            with open(stub_path, "rb") as f:
+                tracks = pickle.load(f)
+            return tracks
+
+        detections = self.detect_frames(frames)
+        tracks = {"players": [], "referees": [], "ball": []}
+
+        per_frame = []
+        class_votes = {}
+        for detection in detections:
+            cls_names = detection.names
+            cls_names_inv = {v: k for k, v in cls_names.items()}
+
+            detection_supervision = sv.Detections.from_ultralytics(detection)
+
+            for object_ind, class_id in enumerate(detection_supervision.class_id):
+                if cls_names[class_id] == "goalkeeper":
+                    detection_supervision.class_id[object_ind] = cls_names_inv["player"]
+
+            tracked = self.tracker.update_with_detections(detection_supervision)
+            per_frame.append((tracked, detection_supervision, cls_names, cls_names_inv))
+
+            for fd in tracked:
+                cls_name = cls_names[fd[3]]
+                track_id = fd[4]
+                if cls_name in ("player", "referee"):
+                    v = class_votes.setdefault(track_id, {"player": 0, "referee": 0})
+                    v[cls_name] += 1
+
+        track_class = {
+            tid: ("player" if v["player"] >= v["referee"] else "referee")
+            for tid, v in class_votes.items()
+        }
+
+        for frame_num, (tracked, raw_detections, cls_names, cls_names_inv) in enumerate(
+            per_frame
+        ):
+            tracks["players"].append({})
+            tracks["referees"].append({})
+            tracks["ball"].append({})
+
+            for fd in tracked:
+                bbox = fd[0].tolist()
+                track_id = fd[4]
+                stable_cls = track_class.get(track_id)
+                if stable_cls == "player":
+                    tracks["players"][frame_num][track_id] = {"bbox": bbox}
+                elif stable_cls == "referee":
+                    tracks["referees"][frame_num][track_id] = {"bbox": bbox}
+
+            for fd in raw_detections:
+                if fd[3] == cls_names_inv["ball"]:
+                    tracks["ball"][frame_num][1] = {"bbox": fd[0].tolist()}
+
+        if stub_path is not None:
+            with open(stub_path, "wb") as f:
+                pickle.dump(tracks, f)
+
+        return tracks
+
+    def draw_ellipse(self, frame, bbox, color, track_id=None):
+        y2 = int(bbox[3])
+        x_center, _ = get_center_of_bbox(bbox)
+        width = get_bbox_width(bbox)
+
+        cv2.ellipse(
+            frame,
+            center=(x_center, y2),
+            axes=(int(width), int(0.35 * width)),
+            angle=0.0,
+            startAngle=-45,
+            endAngle=235,
+            color=color,
+            thickness=2,
+            lineType=cv2.LINE_4,
+        )
+
+        rectangle_width = 40
+        rectangle_height = 20
+        x1_rect = x_center - rectangle_width // 2
+        x2_rect = x_center + rectangle_width // 2
+        y1_rect = (y2 - rectangle_height // 2) + 15
+        y2_rect = (y2 + rectangle_height // 2) + 15
+
+        if track_id is not None:
+            cv2.rectangle(
+                frame,
+                (int(x1_rect), int(y1_rect)),
+                (int(x2_rect), int(y2_rect)),
+                color,
+                cv2.FILLED,
+            )
+            x1_text = x1_rect + 12
+            if track_id > 99:
+                x1_text -= 10
+            cv2.putText(
+                frame,
+                f"{track_id}",
+                (int(x1_text), int(y1_rect + 15)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.6,
+                (0, 0, 0),
+                2,
+            )
+        return frame
+
+    def draw_traingle(self, frame, bbox, color):
+        y = int(bbox[1])
+        x, _ = get_center_of_bbox(bbox)
+        triangle_points = np.array(
+            [
+                [x, y],
+                [x - 10, y - 20],
+                [x + 10, y - 20],
+            ]
+        )
+        cv2.drawContours(frame, [triangle_points], 0, color, cv2.FILLED)
+        cv2.drawContours(frame, [triangle_points], 0, (0, 0, 0), 2)
+        return frame
+
+    def classify_jersey(self, frame, bbox):
+        x1, y1, x2, y2 = [int(v) for v in bbox]
+        h_box, w_box = y2 - y1, x2 - x1
+        if h_box <= 0 or w_box <= 0:
+            return None
+        jy1 = y1 + int(0.15 * h_box)
+        jy2 = y1 + int(0.55 * h_box)
+        jx1 = x1 + int(0.25 * w_box)
+        jx2 = x1 + int(0.75 * w_box)
+        H, W = frame.shape[:2]
+        jy1, jy2 = max(0, jy1), min(H, jy2)
+        jx1, jx2 = max(0, jx1), min(W, jx2)
+        if jy2 - jy1 < 3 or jx2 - jx1 < 3:
+            return None
+        patch = frame[jy1:jy2, jx1:jx2]
+        hsv = cv2.cvtColor(patch, cv2.COLOR_BGR2HSV)
+        s_v = (hsv[..., 1] > 80) & (hsv[..., 2] > 50)
+        h = hsv[..., 0]
+        red = (((h <= 10) | (h >= 170)) & s_v).sum()
+        blue = ((h >= 100) & (h <= 130) & s_v).sum()
+        min_pixels = max(20, int(0.02 * patch.shape[0] * patch.shape[1]))
+        if red < min_pixels and blue < min_pixels:
+            return None
+        return "red" if red >= blue else "blue"
+
+    def _ref_bottom_center(self, bbox, H_inv):
+        xc, _ = get_center_of_bbox(bbox)
+        yb = int(bbox[3])
+        pt = cv2.perspectiveTransform(np.array([[[xc, yb]]], dtype=np.float32), H_inv)[
+            0
+        ][0]
+        return float(pt[0]), float(pt[1])
+
+    def _minimap_extent(self, tracks, camera_motion):
+        xs, ys = [], []
+        n = len(tracks["players"])
+        for i in range(n):
+            H_inv = (
+                np.linalg.inv(camera_motion[i])
+                if camera_motion is not None
+                else np.eye(3)
+            )
+            for key in ("players", "referees"):
+                for p in tracks[key][i].values():
+                    x, y = self._ref_bottom_center(p["bbox"], H_inv)
+                    xs.append(x)
+                    ys.append(y)
+        if not xs:
+            return None
+        min_x, max_x = min(xs), max(xs)
+        min_y, max_y = min(ys), max(ys)
+        pad_x = 0.05 * max(1.0, max_x - min_x)
+        pad_y = 0.05 * max(1.0, max_y - min_y)
+        return min_x - pad_x, min_y - pad_y, max_x + pad_x, max_y + pad_y
+
+    def _make_minimap_bg(self, mm_w, mm_h):
+        bg = np.full((mm_h, mm_w, 3), (40, 110, 40), dtype=np.uint8)
+        cv2.rectangle(bg, (2, 2), (mm_w - 3, mm_h - 3), (240, 240, 240), 2)
+        cv2.line(bg, (mm_w // 2, 2), (mm_w // 2, mm_h - 3), (240, 240, 240), 1)
+        cv2.circle(bg, (mm_w // 2, mm_h // 2), max(10, mm_h // 8), (240, 240, 240), 1)
+        return bg
+
+    def _project_to_minimap(self, extent, mm_w, mm_h, x, y):
+        min_x, min_y, max_x, max_y = extent
+        dx = max(1e-6, max_x - min_x)
+        dy = max(1e-6, max_y - min_y)
+        scale = min((mm_w - 10) / dx, (mm_h - 10) / dy)
+        off_x = (mm_w - scale * dx) / 2.0
+        off_y = (mm_h - scale * dy) / 2.0
+        return int(off_x + (x - min_x) * scale), int(off_y + (y - min_y) * scale)
+
+    def _smooth_points(self, pts, window):
+        if window <= 1 or len(pts) < 2:
+            return pts
+        pts = np.asarray(pts, dtype=np.float32)
+        n = len(pts)
+        half = window // 2
+        smoothed = np.empty_like(pts)
+        for i in range(n):
+            lo = max(0, i - half)
+            hi = min(n, i + half + 1)
+            smoothed[i] = pts[lo:hi].mean(axis=0)
+        return smoothed
+
+    def draw_trail(self, frame, points, color):
+        if len(points) < 2:
+            return frame
+        pts = np.array(points, dtype=np.int32).reshape(-1, 1, 2)
+        cv2.polylines(
+            frame, [pts], isClosed=False, color=color, thickness=2, lineType=cv2.LINE_AA
+        )
+        return frame
+
+    def _point_to_bbox_distance(self, px, py, bbox):
+        x1, y1, x2, y2 = bbox
+        dx = max(x1 - px, 0.0, px - x2)
+        dy = max(y1 - py, 0.0, py - y2)
+        return float(np.hypot(dx, dy))
+
+    def _ball_contact(self, player_dict, ball_bbox, contact_pad_ratio):
+        bx, by = get_center_of_bbox(ball_bbox)
+        best_tid, best_d, best_bbox = None, float("inf"), None
+        for tid, player in player_dict.items():
+            d = self._point_to_bbox_distance(bx, by, player["bbox"])
+            if d < best_d:
+                best_tid, best_d, best_bbox = tid, d, player["bbox"]
+        if best_bbox is None:
+            return None
+        w_box = best_bbox[2] - best_bbox[0]
+        h_box = best_bbox[3] - best_bbox[1]
+        if best_d > contact_pad_ratio * max(w_box, h_box):
+            return None
+        return best_tid
+
+    def _count_total_contacts(self, tracks, contact_gap_frames, contact_pad_ratio):
+        totals = {}
+        last_contact_frame = {}
+        for frame_num, (player_dict, ball_dict) in enumerate(
+            zip(tracks["players"], tracks["ball"])
+        ):
+            ball = ball_dict.get(1)
+            if ball is None or not player_dict:
+                continue
+            tid = self._ball_contact(player_dict, ball["bbox"], contact_pad_ratio)
+            if tid is None:
+                continue
+            last = last_contact_frame.get(tid)
+            if last is None or (frame_num - last) > contact_gap_frames:
+                totals[tid] = totals.get(tid, 0) + 1
+            last_contact_frame[tid] = frame_num
+        return totals
+
+    def draw_player_hud(
+        self, frame, player_id, contacts, distance_m, color, headshot=None
+    ):
+        x, y = 10, 10
+        bg_color = (131, 41, 92)
+        text_color = (47, 186, 64)
+        if headshot is not None:
+            hh, hw = headshot.shape[:2]
+            w, h = hw + 280, max(110, hh + 20)
+            text_x = x + hw + 20
+        else:
+            w, h = 320, 100
+            text_x = x + 12
+        cv2.rectangle(frame, (x, y), (x + w, y + h), bg_color, cv2.FILLED)
+        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
+        if headshot is not None:
+            hy, hx = y + 10, x + 10
+            frame[hy : hy + headshot.shape[0], hx : hx + headshot.shape[1]] = headshot
+            cv2.rectangle(
+                frame,
+                (hx, hy),
+                (hx + headshot.shape[1], hy + headshot.shape[0]),
+                color,
+                2,
+            )
+        cv2.putText(
+            frame,
+            "Player #8",
+            (text_x, y + 28),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.7,
+            text_color,
+            2,
+        )
+        cv2.putText(
+            frame,
+            f"Ball contacts: {contacts}",
+            (text_x, y + 58),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            text_color,
+            1,
+        )
+        cv2.putText(
+            frame,
+            f"Distance: {distance_m:.1f} m",
+            (text_x, y + 85),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            text_color,
+            1,
+        )
+        return frame
+
+    def draw_annotations(
+        self,
+        video_frames,
+        tracks,
+        camera_motion=None,
+        trail_length=30,
+        contact_gap_frames=5,
+        contact_pad_ratio=0.25,
+        player_height_m=1.8,
+        headshot_path=None,
+        headshot_size=90,
+        logo_path=None,
+        logo_height=80,
+        logo_margin=15,
+        trail_smooth_window=11,
+        show_minimap=True,
+        minimap_size=(320, 200),
+        minimap_margin=15,
+    ):
+        output_video_frames = []
+        player_trails = {}
+        team_votes = {}
+        team_bgr = {"red": (0, 0, 255), "blue": (255, 0, 0)}
+        default_color = (200, 200, 200)
+
+        frames_count = {}
+        for frame_players in tracks["players"]:
+            for tid in frame_players:
+                frames_count[tid] = frames_count.get(tid, 0) + 1
+        total_contacts = self._count_total_contacts(
+            tracks, contact_gap_frames, contact_pad_ratio
+        )
+
+        heights = [
+            p["bbox"][3] - p["bbox"][1]
+            for frame_players in tracks["players"]
+            for p in frame_players.values()
+            if p["bbox"][3] > p["bbox"][1]
+        ]
+        px_per_meter = float(np.median(heights)) / player_height_m if heights else 1.0
+
+        headshot = None
+        if headshot_path is not None and os.path.exists(headshot_path):
+            img = cv2.imread(headshot_path)
+            if img is not None:
+                headshot = cv2.resize(
+                    img, (headshot_size, headshot_size), interpolation=cv2.INTER_AREA
+                )
+
+        logo_bgr, logo_alpha = None, None
+        if logo_path is not None and os.path.exists(logo_path):
+            img = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED)
+            if img is not None:
+                scale = logo_height / img.shape[0]
+                new_w = max(1, int(round(img.shape[1] * scale)))
+                img = cv2.resize(
+                    img, (new_w, logo_height), interpolation=cv2.INTER_LANCZOS4
+                )
+                if img.ndim == 3 and img.shape[2] == 4:
+                    logo_bgr = img[..., :3]
+                    logo_alpha = (img[..., 3:4].astype(np.float32)) / 255.0
+                else:
+                    logo_bgr = (
+                        img if img.ndim == 3 else cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                    )
+
+        minimap_bg, minimap_extent = None, None
+        if show_minimap:
+            minimap_extent = self._minimap_extent(tracks, camera_motion)
+            if minimap_extent is not None:
+                minimap_bg = self._make_minimap_bg(minimap_size[0], minimap_size[1])
+
+        if total_contacts:
+            focal_tid = max(
+                total_contacts,
+                key=lambda t: (total_contacts[t], frames_count.get(t, 0)),
+            )
+        elif frames_count:
+            focal_tid = max(frames_count, key=frames_count.get)
+        else:
+            focal_tid = None
+
+        last_ref_pt = {}
+        player_distance = {}
+        player_contacts = {}
+        last_contact_frame = {}
+        for frame_num, frame in enumerate(video_frames):
+            frame = frame.copy()
+
+            player_dict = tracks["players"][frame_num]
+            ball_dict = tracks["ball"][frame_num]
+            referee_dict = tracks["referees"][frame_num]
+
+            H_cum = camera_motion[frame_num] if camera_motion is not None else np.eye(3)
+            H_inv = np.linalg.inv(H_cum)
+
+            active_ids = set(player_dict.keys())
+            for track_id, player in player_dict.items():
+                x_center, _ = get_center_of_bbox(player["bbox"])
+                y_bottom = int(player["bbox"][3])
+                ref_pt = cv2.perspectiveTransform(
+                    np.array([[[x_center, y_bottom]]], dtype=np.float32), H_inv
+                )[0][0]
+                ref_tuple = (float(ref_pt[0]), float(ref_pt[1]))
+                player_trails.setdefault(track_id, []).append(ref_tuple)
+                if len(player_trails[track_id]) > trail_length:
+                    player_trails[track_id] = player_trails[track_id][-trail_length:]
+
+                if track_id in last_ref_pt:
+                    dx = ref_tuple[0] - last_ref_pt[track_id][0]
+                    dy = ref_tuple[1] - last_ref_pt[track_id][1]
+                    player_distance[track_id] = player_distance.get(
+                        track_id, 0.0
+                    ) + float(np.hypot(dx, dy))
+                last_ref_pt[track_id] = ref_tuple
+
+                vote = self.classify_jersey(frame, player["bbox"])
+                if vote is not None:
+                    counts = team_votes.setdefault(track_id, {"red": 0, "blue": 0})
+                    counts[vote] += 1
+            for track_id in list(player_trails.keys()):
+                if track_id not in active_ids:
+                    del player_trails[track_id]
+                    last_ref_pt.pop(track_id, None)
+
+            ball = ball_dict.get(1)
+            if ball is not None and player_dict:
+                tid = self._ball_contact(player_dict, ball["bbox"], contact_pad_ratio)
+                if tid is not None:
+                    last = last_contact_frame.get(tid)
+                    if last is None or (frame_num - last) > contact_gap_frames:
+                        player_contacts[tid] = player_contacts.get(tid, 0) + 1
+                    last_contact_frame[tid] = frame_num
+
+            focal_color = (131, 41, 92)
+
+            def color_for(track_id):
+                if track_id == focal_tid:
+                    return focal_color
+                counts = team_votes.get(track_id)
+                if not counts or (counts["red"] == 0 and counts["blue"] == 0):
+                    return default_color
+                return (
+                    team_bgr["red"]
+                    if counts["red"] >= counts["blue"]
+                    else team_bgr["blue"]
+                )
+
+            for track_id, ref_points in player_trails.items():
+                smoothed_ref = self._smooth_points(ref_points, trail_smooth_window)
+                pts = cv2.perspectiveTransform(
+                    np.asarray(smoothed_ref, dtype=np.float32).reshape(-1, 1, 2), H_cum
+                ).reshape(-1, 2)
+                frame = self.draw_trail(frame, pts.tolist(), color_for(track_id))
+
+            for track_id, player in player_dict.items():
+                frame = self.draw_ellipse(frame, player["bbox"], color_for(track_id))
+
+            for _, referee in referee_dict.items():
+                frame = self.draw_ellipse(frame, referee["bbox"], (0, 255, 255))
+
+            for track_id, ball in ball_dict.items():
+                frame = self.draw_traingle(frame, ball["bbox"], (0, 255, 0))
+
+            if focal_tid is not None:
+                frame = self.draw_player_hud(
+                    frame,
+                    focal_tid,
+                    player_contacts.get(focal_tid, 0),
+                    player_distance.get(focal_tid, 0.0) / px_per_meter,
+                    color_for(focal_tid),
+                    headshot=headshot,
+                )
+
+            if logo_bgr is not None:
+                lh, lw = logo_bgr.shape[:2]
+                fh, fw = frame.shape[:2]
+                x0 = max(0, fw - lw - logo_margin)
+                y0 = logo_margin
+                x1, y1 = x0 + lw, y0 + lh
+                if logo_alpha is not None:
+                    roi = frame[y0:y1, x0:x1].astype(np.float32)
+                    blended = (
+                        roi * (1.0 - logo_alpha)
+                        + logo_bgr.astype(np.float32) * logo_alpha
+                    )
+                    frame[y0:y1, x0:x1] = blended.astype(np.uint8)
+                else:
+                    frame[y0:y1, x0:x1] = logo_bgr
+
+            if minimap_bg is not None and minimap_extent is not None:
+                mm = minimap_bg.copy()
+                mm_w, mm_h = minimap_size
+                for tid, player in player_dict.items():
+                    rx, ry = self._ref_bottom_center(player["bbox"], H_inv)
+                    mx, my = self._project_to_minimap(
+                        minimap_extent, mm_w, mm_h, rx, ry
+                    )
+                    dot_color = color_for(tid)
+                    radius = 6 if tid == focal_tid else 4
+                    cv2.circle(mm, (mx, my), radius, dot_color, cv2.FILLED)
+                    cv2.circle(mm, (mx, my), radius, (0, 0, 0), 1)
+                for referee in referee_dict.values():
+                    rx, ry = self._ref_bottom_center(referee["bbox"], H_inv)
+                    mx, my = self._project_to_minimap(
+                        minimap_extent, mm_w, mm_h, rx, ry
+                    )
+                    cv2.circle(mm, (mx, my), 3, (0, 255, 255), cv2.FILLED)
+                    cv2.circle(mm, (mx, my), 3, (0, 0, 0), 1)
+                ball = ball_dict.get(1)
+                if ball is not None:
+                    bx, by = get_center_of_bbox(ball["bbox"])
+                    bref = cv2.perspectiveTransform(
+                        np.array([[[bx, by]]], dtype=np.float32), H_inv
+                    )[0][0]
+                    mx, my = self._project_to_minimap(
+                        minimap_extent, mm_w, mm_h, float(bref[0]), float(bref[1])
+                    )
+                    cv2.circle(mm, (mx, my), 4, (0, 255, 0), cv2.FILLED)
+                    cv2.circle(mm, (mx, my), 4, (0, 0, 0), 1)
+                fh, fw = frame.shape[:2]
+                x0 = max(0, fw - mm_w - minimap_margin)
+                y0 = max(0, fh - mm_h - minimap_margin)
+                frame[y0 : y0 + mm_h, x0 : x0 + mm_w] = mm
+
+            output_video_frames.append(frame)
+
+        return output_video_frames
+
+
+class FootballAnalyzer(GstBase.BaseTransform):
+    """
+    Buffers every incoming video frame, then on EOS runs the full batch
+    pipeline (YOLO detection, ByteTrack with whole-clip class voting,
+    SIFT/RANSAC camera motion, annotated drawing with trails / HUD /
+    logo / minimap) and pushes the annotated frames downstream before
+    forwarding EOS.
+    """
+
+    __gstmetadata__ = (
+        "Football Analyzer",
+        "Filter/Effect/Video",
+        "Runs football_analysis (YOLO + ByteTrack + SIFT camera motion + "
+        "annotated drawing) on the full clip and emits annotated frames on EOS",
+        "Marcus Edel <marcus@urgs.org>",
+    )
+
+    src_template = Gst.PadTemplate.new(
+        "src",
+        Gst.PadDirection.SRC,
+        Gst.PadPresence.ALWAYS,
+        VIDEO_CAPS.copy(),
+    )
+    sink_template = Gst.PadTemplate.new(
+        "sink",
+        Gst.PadDirection.SINK,
+        Gst.PadPresence.ALWAYS,
+        VIDEO_CAPS.copy(),
+    )
+    __gsttemplates__ = (src_template, sink_template)
+
+    model_path = GObject.Property(
+        type=str,
+        default="",
+        nick="Model Path",
+        blurb="Path to the YOLO weights (must be set before processing)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    headshot_path = GObject.Property(
+        type=str,
+        default="",
+        nick="Headshot Path",
+        blurb="Optional headshot image for the focal-player HUD",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    logo_path = GObject.Property(
+        type=str,
+        default="",
+        nick="Logo Path",
+        blurb="Optional top-right logo overlay",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    tracks_stub_path = GObject.Property(
+        type=str,
+        default="",
+        nick="Tracks Stub Path",
+        blurb="Optional pickle path for cached object tracks (read & written)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    camera_motion_stub_path = GObject.Property(
+        type=str,
+        default="",
+        nick="Camera Motion Stub Path",
+        blurb="Optional pickle path for cached camera-motion homographies (read & written)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    show_minimap = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Show Minimap",
+        blurb="Render the bottom-right minimap overlay",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    def __init__(self):
+        super().__init__()
+        self.logger = LoggerFactory.get(LoggerFactory.LOGGER_TYPE_GST)
+        self._frames = []
+        self._pts = []
+        self._duration = []
+        self._width = 0
+        self._height = 0
+        self._tracker = None
+
+    def _ensure_tracker(self):
+        if self._tracker is not None:
+            return self._tracker
+        if not self.model_path or not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"YOLO model not found: {self.model_path!r}")
+        self.logger.info(f"Loading FootballAnalyzer Tracker from {self.model_path}")
+        self._tracker = Tracker(self.model_path)
+        return self._tracker
+
+    def do_set_caps(self, incaps, outcaps):
+        info = GstVideo.VideoInfo.new_from_caps(incaps)
+        self._width = info.width
+        self._height = info.height
+        return True
+
+    def do_transform_ip(self, buf):
+        try:
+            ok, mapinfo = buf.map(Gst.MapFlags.READ)
+            if not ok:
+                self.logger.error("Failed to map incoming buffer for read")
+                return Gst.FlowReturn.ERROR
+            try:
+                frame = (
+                    np.frombuffer(mapinfo.data, dtype=np.uint8)
+                    .reshape(self._height, self._width, 3)
+                    .copy()
+                )
+            finally:
+                buf.unmap(mapinfo)
+
+            self._frames.append(frame)
+            self._pts.append(buf.pts)
+            self._duration.append(buf.duration)
+            return Gst.FlowReturn.OK
+
+        except Exception as e:
+            self.logger.error(f"FootballAnalyzer chain error: {e}")
+            return Gst.FlowReturn.ERROR
+
+    def do_sink_event(self, event):
+        if event.type == Gst.EventType.EOS:
+            try:
+                self._run_pipeline_and_push()
+            except Exception as e:
+                self.logger.error(f"FootballAnalyzer EOS processing failed: {e}")
+                # Forward EOS regardless so the pipeline shuts down cleanly.
+        return GstBase.BaseTransform.do_sink_event(self, event)
+
+    def _run_pipeline_and_push(self):
+        if not self._frames:
+            self.logger.info("FootballAnalyzer: no frames buffered, skipping")
+            return
+
+        tracker = self._ensure_tracker()
+        n = len(self._frames)
+        self.logger.info(f"FootballAnalyzer: running pipeline on {n} frames")
+
+        tracks_stub = self.tracks_stub_path or None
+        cam_stub = self.camera_motion_stub_path or None
+        headshot = self.headshot_path or None
+        logo = self.logo_path or None
+
+        tracks = tracker.get_object_tracks(
+            self._frames,
+            read_from_stub=tracks_stub is not None and os.path.exists(tracks_stub),
+            stub_path=tracks_stub,
+        )
+        camera_motion = tracker.get_camera_motion(
+            self._frames,
+            tracks,
+            read_from_stub=cam_stub is not None and os.path.exists(cam_stub),
+            stub_path=cam_stub,
+        )
+        annotated = tracker.draw_annotations(
+            self._frames,
+            tracks,
+            camera_motion=camera_motion,
+            headshot_path=headshot,
+            logo_path=logo,
+            show_minimap=self.show_minimap,
+        )
+
+        if len(annotated) != n:
+            self.logger.warning(
+                f"draw_annotations returned {len(annotated)} frames for {n} inputs; "
+                "padding/truncating to match"
+            )
+            if len(annotated) < n:
+                annotated = list(annotated) + [annotated[-1]] * (n - len(annotated))
+            else:
+                annotated = annotated[:n]
+
+        srcpad = self.srcpad
+        for i, out in enumerate(annotated):
+            data = np.ascontiguousarray(out, dtype=np.uint8).tobytes()
+            outbuf = Gst.Buffer.new_allocate(None, len(data), None)
+            outbuf.fill(0, data)
+            outbuf.pts = self._pts[i]
+            outbuf.duration = self._duration[i]
+            ret = srcpad.push(outbuf)
+            if ret != Gst.FlowReturn.OK:
+                self.logger.error(
+                    f"Pushing annotated frame {i} failed with {ret}; aborting"
+                )
+                break
+
+        self._frames.clear()
+        self._pts.clear()
+        self._duration.clear()
+
+
+if CAN_REGISTER_ELEMENT:
+    GObject.type_register(FootballAnalyzer)
+    __gstelementfactory__ = (
+        "pyml_football_analyzer",
+        Gst.Rank.NONE,
+        FootballAnalyzer,
+    )
+else:
+    GlobalLogger().warning(
+        "The 'pyml_football_analyzer' element will not be registered because "
+        "required modules are missing."
+    )
diff --git a/plugins/python/football_overlay.py b/plugins/python/football_overlay.py
new file mode 100644
index 0000000..9dc4f42
--- /dev/null
+++ b/plugins/python/football_overlay.py
@@ -0,0 +1,1135 @@
+# FootballOverlay
+# Copyright (C) 2024-2026 Collabora Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+import os
+
+from log.global_logger import GlobalLogger
+
+CAN_REGISTER_ELEMENT = True
+try:
+    import re
+    import gi
+
+    gi.require_version("Gst", "1.0")
+    gi.require_version("GstBase", "1.0")
+    gi.require_version("GstVideo", "1.0")
+    gi.require_version("GstAnalytics", "1.0")
+    gi.require_version("GLib", "2.0")
+    from gi.repository import (
+        Gst,
+        GstBase,
+        GstVideo,
+        GstAnalytics,
+        GObject,
+        GLib,
+    )  # noqa: E402
+
+    from log.logger_factory import LoggerFactory  # noqa: E402
+
+    OVERLAY_CAPS = Gst.Caps.from_string(
+        "video/x-raw, format=(string){ RGBA, ARGB, BGRA, ABGR }"
+    )
+
+except ImportError as e:
+    CAN_REGISTER_ELEMENT = False
+    GlobalLogger().warning(
+        f"The 'pyml_football_overlay' element will not be available. Error: {e}"
+    )
+
+
+_FORMAT_ORDER = {
+    "RGBA": (0, 1, 2, 3),
+    "ARGB": (3, 0, 1, 2),
+    "BGRA": (2, 1, 0, 3),
+    "ABGR": (3, 2, 1, 0),
+}
+
+_PALETTE = [
+    (239, 71, 111, 255),
+    (255, 209, 102, 255),
+    (6, 214, 160, 255),
+    (17, 138, 178, 255),
+    (255, 107, 107, 255),
+    (78, 205, 196, 255),
+    (199, 125, 255, 255),
+    (255, 159, 28, 255),
+    (46, 196, 182, 255),
+    (118, 200, 247, 255),
+]
+
+_REFEREE_RGBA = (255, 215, 0, 255)
+_BALL_RGBA = (0, 230, 0, 255)
+_PLAYER_RGBA = (0, 200, 255, 255)
+_RED_TEAM_RGBA = (255, 40, 40, 255)
+_BLUE_TEAM_RGBA = (40, 90, 255, 255)
+_DEFAULT_RGBA = (235, 235, 235, 255)
+_BLACK_RGBA = (0, 0, 0, 255)
+_HUD_BG_RGBA = (92, 41, 131, 255)
+_HUD_TEXT_RGBA = (64, 186, 47, 255)
+_HIGHLIGHT_RGBA = (255, 255, 255, 255)
+
+
+def _is_ball(label):
+    return "ball" in label
+
+
+def _is_referee(label):
+    return "referee" in label or label == "ref"
+
+
+class FootballOverlay(GstBase.BaseTransform):
+    """
+    Metadata-driven broadcast overlay (football_analysis style), streaming.
+
+    Reads upstream GstAnalytics detection/tracking metadata and draws: an
+    ellipse + optional id badge per subject, a gold ellipse for referees, a
+    green triangle on the ball, fading motion trails, and a focal-player HUD
+    with a headshot, accumulated ball contacts, and distance travelled.
+    """
+
+    __gstmetadata__ = (
+        "Football Overlay",
+        "Filter/Effect/Video",
+        "Broadcast-style detection/tracking overlay (ellipses, ball triangle, "
+        "trails, headshot HUD with ball contacts + distance) from GstAnalytics",
+        "Marcus Edel <marcus.edel@collabora.com>",
+    )
+
+    src_template = Gst.PadTemplate.new(
+        "src", Gst.PadDirection.SRC, Gst.PadPresence.ALWAYS, OVERLAY_CAPS.copy()
+    )
+    sink_template = Gst.PadTemplate.new(
+        "sink", Gst.PadDirection.SINK, Gst.PadPresence.ALWAYS, OVERLAY_CAPS.copy()
+    )
+    __gsttemplates__ = (src_template, sink_template)
+
+    show_labels = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Show Labels",
+        blurb="Draw the class name above each object",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    show_ids = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Show Track IDs",
+        blurb="Draw the track-id badge under each tracked object",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    trails = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Show Trails",
+        blurb="Draw a fading motion trail behind each tracked object",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    trail_length = GObject.Property(
+        type=int,
+        default=30,
+        minimum=2,
+        maximum=300,
+        nick="Trail Length",
+        blurb="Number of recent positions kept in each motion trail",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    show_ball = GObject.Property(
+        type=bool,
+        default=False,
+        nick="Show Ball",
+        blurb="Draw the marker on the ball (the ball is still tracked for "
+        "contact counting either way)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    show_hud = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Show HUD",
+        blurb="Draw the focal-player HUD (headshot, label, contacts, distance)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    headshot_path = GObject.Property(
+        type=str,
+        default="data/Chinedu-Obasi_2684938.jpg",
+        nick="Headshot Path",
+        blurb="Image shown in the HUD (empty to disable)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    headshot_size = GObject.Property(
+        type=int,
+        default=90,
+        minimum=16,
+        maximum=512,
+        nick="Headshot Size",
+        blurb="Headshot square size in pixels",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    player_label = GObject.Property(
+        type=str,
+        default="Player #8",
+        nick="Player Label",
+        blurb="Static label drawn in the HUD",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    contact_pad_ratio = GObject.Property(
+        type=float,
+        default=0.25,
+        minimum=0.0,
+        maximum=5.0,
+        nick="Contact Pad Ratio",
+        blurb="Ball counts as a contact within this fraction of the player box size",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    contact_gap_frames = GObject.Property(
+        type=int,
+        default=5,
+        minimum=0,
+        maximum=1000,
+        nick="Contact Gap Frames",
+        blurb="Min frames between counted contacts for the same player",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    player_height = GObject.Property(
+        type=float,
+        default=1.8,
+        minimum=0.1,
+        maximum=10.0,
+        nick="Player Height (m)",
+        blurb="Assumed real-world height used to convert pixels to metres",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    min_confidence = GObject.Property(
+        type=float,
+        default=0.0,
+        minimum=0.0,
+        maximum=1.0,
+        nick="Min Confidence",
+        blurb="Skip detections whose confidence is below this threshold",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    class_names = GObject.Property(
+        type=str,
+        default="",
+        nick="Class Names",
+        blurb="Comma-separated names to map numeric labels (label_N) from the "
+        "onnx/objectdetector path, e.g. 'ball,goalkeeper,player,referee'",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    team_colors = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Team Colors",
+        blurb="Colour players by jersey team (red/blue, per-track majority vote); "
+        "off draws all players one colour",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    draw_from_detections = GObject.Property(
+        type=bool,
+        default=False,
+        nick="Draw From Detections",
+        blurb="Draw ellipses on the raw per-frame detection boxes instead of the "
+        "tracker's boxes -- no Kalman drift, coasted phantoms or track-split "
+        "doubles. Team colour is then classified per frame; the HUD still uses "
+        "tracker metadata if present",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    merge_iou = GObject.Property(
+        type=float,
+        default=0.5,
+        minimum=0.0,
+        maximum=1.0,
+        nick="Merge IoU",
+        blurb="Collapse overlapping boxes (across classes) into one before "
+        "drawing, so one player isn't circled twice; a box is merged when its "
+        "IoU or containment with a kept box exceeds this (0 disables)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    position_smoothing = GObject.Property(
+        type=float,
+        default=0.5,
+        minimum=0.0,
+        maximum=0.95,
+        nick="Position Smoothing",
+        blurb="Temporal EMA on drawn box positions (0=off, higher=smoother but "
+        "more lag). Boxes are associated frame-to-frame by proximity, so this "
+        "damps detection jitter and the steps from a detection interval > 1",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    highlight_focal = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Highlight Focal Player",
+        blurb="Mark the focal player (the one shown in the HUD) on the pitch "
+        "with a chevron above their head and a bolder ellipse",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    focal_track_id = GObject.Property(
+        type=int,
+        default=-1,
+        minimum=-1,
+        maximum=100000,
+        nick="Focal Track ID",
+        blurb="Pin the focal/highlighted player to this track id; -1 = auto "
+        "(the player tracked the most, with hysteresis so it stays stable)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    def __init__(self):
+        super().__init__()
+        self.logger = LoggerFactory.get(LoggerFactory.LOGGER_TYPE_GST)
+        self.set_in_place(True)
+        self.width = 0
+        self.height = 0
+        self._order = _FORMAT_ORDER["RGBA"]
+        # per-track state, accumulated across frames
+        self._trail = {}
+        self._last_pt = {}
+        self._distance_px = {}
+        self._heights = []
+        self._widths = []
+        self._ell_w = {}  # track_id -> smoothed ellipse half-width (px)
+        self._contacts = {}
+        self._last_contact_frame = {}
+        self._frames_seen = {}
+        self._track_label = {}
+        self._class_votes = {}  # track_id -> {label: count}, for stable class
+        self._team_votes = {}  # track_id -> {"red": n, "blue": n}, jersey team
+        self._frame = 0
+        self._focal = None  # current focal track id (sticky, for hysteresis)
+        self._headshot = None
+        self._headshot_loaded = False
+        self._inv_order = [0, 1, 2, 3]  # buffer-channel -> logical RGBA index
+        # Position-smoothing slots: {"box": np[x1,y1,x2,y2]} kept across frames
+        # and matched by proximity, so the drawn ellipse can be low-passed.
+        self._smooth_slots = []
+
+    def do_set_caps(self, incaps, outcaps):
+        info = GstVideo.VideoInfo.new_from_caps(incaps)
+        self.width = info.width
+        self.height = info.height
+        fmt = info.finfo.name if info.finfo else "RGBA"
+        self._order = _FORMAT_ORDER.get(fmt, _FORMAT_ORDER["RGBA"])
+        # buffer channel j holds logical[self._order[j]]; invert so we can pull
+        # logical R,G,B out of the buffer for jersey colour classification.
+        self._inv_order = [self._order.index(c) for c in range(4)]
+        self._headshot_loaded = False  # re-load in the new channel order
+        self.logger.info(f"FootballOverlay caps: {fmt} {self.width}x{self.height}")
+        return True
+
+    def _map_label(self, label):
+        if self.class_names:
+            m = re.match(r"label_(\d+)$", label)
+            if m:
+                names = [s.strip() for s in self.class_names.split(",") if s.strip()]
+                i = int(m.group(1))
+                if 0 <= i < len(names):
+                    return names[i]
+        return label
+
+    def _parse_label(self, full_label):
+        core = full_label
+        m = re.match(r"stream_\d+_(.*)$", full_label)
+        if m:
+            core = m.group(1)
+        m = re.match(r"(.+)_id_(\d+)$", core)
+        if m:
+            return self._map_label(m.group(1)), int(m.group(2))
+        m = re.match(r"id_(\d+)$", core)
+        if m:
+            return "object", int(m.group(1))
+        return self._map_label(core or "object"), None
+
+    def _read_metadata(self, buf):
+        entries = []
+        meta = GstAnalytics.buffer_get_analytics_relation_meta(buf)
+        if not meta:
+            return entries
+        for index in range(GstAnalytics.relation_get_length(meta)):
+            ret, od_mtd = meta.get_od_mtd(index)
+            if not ret or od_mtd is None:
+                continue
+            full_label = GLib.quark_to_string(od_mtd.get_obj_type())
+            presence, x, y, w, h, score = od_mtd.get_location()
+            if not presence:
+                continue
+            label, track_id = self._parse_label(full_label)
+            entries.append(
+                {
+                    "label": label.lower(),
+                    "track_id": track_id,
+                    "confidence": score,
+                    "box": (x, y, x + w, y + h),
+                }
+            )
+        return entries
+
+    @staticmethod
+    def _point_to_bbox_distance(px, py, box):
+        x1, y1, x2, y2 = box
+        dx = max(x1 - px, 0.0, px - x2)
+        dy = max(y1 - py, 0.0, py - y2)
+        return (dx * dx + dy * dy) ** 0.5
+
+    def _ball_contact(self, players, ball_box):
+        """Closest player to the ball, if within contact_pad_ratio of its size."""
+        bx = (ball_box[0] + ball_box[2]) / 2.0
+        by = (ball_box[1] + ball_box[3]) / 2.0
+        best_tid, best_d, best_box = None, float("inf"), None
+        for tid, box in players.items():
+            d = self._point_to_bbox_distance(bx, by, box)
+            if d < best_d:
+                best_tid, best_d, best_box = tid, d, box
+        if best_box is None:
+            return None
+        w = best_box[2] - best_box[0]
+        h = best_box[3] - best_box[1]
+        if best_d > self.contact_pad_ratio * max(w, h):
+            return None
+        return best_tid
+
+    def _update_tracks(self, entries, det_ball_box=None):
+        self._frame += 1
+        active = set()
+        players = {}
+        ball_box = None
+        # Accumulate per-track class votes first so the stable label below
+        # already reflects this frame.
+        for e in entries:
+            tid = e["track_id"]
+            if tid is None:
+                continue
+            v = self._class_votes.setdefault(tid, {})
+            v[e["label"]] = v.get(e["label"], 0) + 1
+        for e in entries:
+            tid = e["track_id"]
+            if tid is None:
+                continue
+            label = self._stable_label(tid, e["label"])
+            if _is_ball(label):
+                ball_box = e["box"]
+                continue
+            active.add(tid)
+            players[tid] = e["box"]
+            self._track_label[tid] = label
+            self._frames_seen[tid] = self._frames_seen.get(tid, 0) + 1
+            x1, y1, x2, y2 = e["box"]
+            foot = (int((x1 + x2) / 2), int(y2))
+            if y2 - y1 > 0:
+                self._heights.append(y2 - y1)
+                if len(self._heights) > 600:
+                    self._heights = self._heights[-600:]
+            self._update_ellipse_width(tid, x2 - x1)
+            prev = self._last_pt.get(tid)
+            if prev is not None:
+                self._distance_px[tid] = (
+                    self._distance_px.get(tid, 0.0)
+                    + ((foot[0] - prev[0]) ** 2 + (foot[1] - prev[1]) ** 2) ** 0.5
+                )
+            self._last_pt[tid] = foot
+            trail = self._trail.setdefault(tid, [])
+            trail.append(foot)
+            if len(trail) > self.trail_length:
+                del trail[: -self.trail_length]
+
+        # Fall back to the detected ball if no tracked ball this frame.
+        if ball_box is None:
+            ball_box = det_ball_box
+
+        # Ball contacts (debounced per player), like football_analyzer.
+        if ball_box is not None and players:
+            tid = self._ball_contact(players, ball_box)
+            if tid is not None:
+                last = self._last_contact_frame.get(tid)
+                if last is None or (self._frame - last) > self.contact_gap_frames:
+                    self._contacts[tid] = self._contacts.get(tid, 0) + 1
+                self._last_contact_frame[tid] = self._frame
+
+        for tid in list(self._trail.keys()):
+            if tid not in active:
+                del self._trail[tid]
+                self._last_pt.pop(tid, None)
+                self._ell_w.pop(tid, None)
+        return active
+
+    def _update_ellipse_width(self, track_id, raw_w):
+        # Smooth (and outlier-reject) the per-track ellipse width so a single
+        # oversized box -- two players merged, or a drifting keep-alive
+        # prediction -- can't balloon the circle for one frame.
+        if raw_w <= 0:
+            return
+        if self._widths:
+            self._widths.append(raw_w)
+            if len(self._widths) > 600:
+                self._widths = self._widths[-600:]
+            srt = sorted(self._widths)
+            med = srt[len(srt) // 2]
+            clamped = min(max(raw_w, 0.5 * med), 1.8 * med)
+        else:
+            self._widths.append(raw_w)
+            clamped = raw_w
+        prev = self._ell_w.get(track_id)
+        # EMA: slow enough to keep the circle size steady frame-to-frame, fast
+        # enough to still follow real perspective changes as players move.
+        self._ell_w[track_id] = (
+            clamped if prev is None else 0.25 * clamped + 0.75 * prev
+        )
+
+    def _px_per_meter(self):
+        if not self._heights:
+            return None
+        import numpy as np
+
+        return float(np.median(self._heights)) / max(0.1, self.player_height)
+
+    def _focal_track(self):
+        # Pin to an explicit track id if requested.
+        if self.focal_track_id >= 0:
+            return (
+                self.focal_track_id
+                if self.focal_track_id in self._frames_seen
+                else self._focal
+            )
+        keys = set(self._frames_seen)
+        if not keys:
+            return None
+
+        # Only consider *sustained* tracks. Otherwise a track that flickered for
+        # a few frames -- common when detection/tracking churns -- can win on a
+        # single ball contact and then show ~0 distance (it was barely tracked).
+        # The floor scales with elapsed frames, with a small absolute minimum.
+        floor = max(10, int(0.2 * self._frame))
+        candidates = [t for t in keys if self._frames_seen.get(t, 0) >= floor] or list(
+            keys
+        )
+
+        # Rank by ball contacts (the player most involved with the ball), with
+        # frames-seen as a tiebreak / pre-contact fallback (before anyone has
+        # touched the ball, the most-tracked player is shown).
+        def score(t):
+            return (self._contacts.get(t, 0), self._frames_seen.get(t, 0))
+
+        best = max(candidates, key=score)
+        # Stability: keep the current focal unless a challenger has *strictly
+        # more* contacts, so the highlight/HUD don't flip on ties or noise.
+        cur = self._focal
+        if (
+            cur is not None
+            and cur in candidates
+            and self._contacts.get(best, 0) <= self._contacts.get(cur, 0)
+        ):
+            best = cur
+        self._focal = best
+        return best
+
+    def _stable_label(self, track_id, fallback=""):
+        # Majority-voted class over the track's history — smooths frame-to-frame
+        # misclassifications (e.g. a player briefly tagged 'referee'), so the
+        # gold referee marking doesn't flicker.
+        votes = self._class_votes.get(track_id)
+        if not votes:
+            return fallback
+        return max(votes, key=votes.get)
+
+    def _c(self, rgba):
+        return tuple(rgba[i] for i in self._order)
+
+    def _team_color(self, track_id):
+        # Confident kit colour from a track's accumulated jersey votes, else
+        # None. Red/blue -> team; "ref" (distinctive non-team kit) -> gold.
+        # Requires a minimum number of votes AND a clear majority, so a few
+        # noisy frames can't decide the colour.
+        if track_id is None:
+            return None
+        c = self._team_votes.get(track_id)
+        if not c:
+            return None
+        red, blue, ref = c.get("red", 0), c.get("blue", 0), c.get("ref", 0)
+        total = red + blue + ref
+        if total < 4:
+            return None
+        colors = {_RED_TEAM_RGBA: red, _BLUE_TEAM_RGBA: blue, _REFEREE_RGBA: ref}
+        color, n = max(colors.items(), key=lambda kv: kv[1])
+        return color if n >= 0.6 * total else None
+
+    def _is_referee_track(self, track_id, fallback_label):
+        # A track is a referee only if referee *clearly dominates* its class
+        # votes. Referees are rare, so a mostly-player track with a few stray
+        # 'referee' mislabels stays a player (won't get the gold circle).
+        votes = self._class_votes.get(track_id) if track_id is not None else None
+        if not votes:
+            return _is_referee(fallback_label)
+        total = sum(votes.values())
+        ref = sum(c for lbl, c in votes.items() if _is_referee(lbl))
+        return total > 0 and ref >= 3 and ref >= 0.6 * total
+
+    def _color_for(self, label, track_id):
+        # Colour by the track's *accumulated* jersey team (robust to per-frame
+        # noise). Referee/player only decides the fallback when the team is
+        # undecided: a referee keeps gold (stays visible), a player isn't drawn.
+        if _is_ball(label):
+            return _BALL_RGBA
+        if self.team_colors:
+            team = self._team_color(track_id)
+            if team is not None:
+                return team
+            return _REFEREE_RGBA if self._is_referee_track(track_id, label) else None
+        return _REFEREE_RGBA if _is_referee(label) else _PLAYER_RGBA
+
+    @staticmethod
+    def _overlap(a, b):
+        # max(IoU, intersection-over-smaller-area): catches both heavy overlap
+        # and a small duplicate box sitting inside a larger one.
+        ax1, ay1, ax2, ay2 = a
+        bx1, by1, bx2, by2 = b
+        iw = max(0.0, min(ax2, bx2) - max(ax1, bx1))
+        ih = max(0.0, min(ay2, by2) - max(ay1, by1))
+        inter = iw * ih
+        if inter <= 0.0:
+            return 0.0
+        area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
+        area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
+        union = area_a + area_b - inter
+        iou = inter / union if union > 0.0 else 0.0
+        smaller = min(area_a, area_b)
+        contain = inter / smaller if smaller > 0.0 else 0.0
+        return max(iou, contain)
+
+    @staticmethod
+    def _feet_close(a, b):
+        # True when two boxes' foot points (bottom-centre, where the ellipse is
+        # drawn) are within ~0.4 of the smaller box width. The ellipse is ~2x the
+        # box width, so near-coincident feet = one player circled twice even when
+        # the boxes' IoU is low. Genuinely adjacent players are ~a full width
+        # apart at the feet, so they're not merged.
+        fax, fay = (a[0] + a[2]) / 2.0, a[3]
+        fbx, fby = (b[0] + b[2]) / 2.0, b[3]
+        ref = max(1.0, min(a[2] - a[0], b[2] - b[0]))
+        return ((fax - fbx) ** 2 + (fay - fby) ** 2) ** 0.5 < 0.4 * ref
+
+    def _merge_overlaps(self, entries):
+        # Class-agnostic greedy suppression: keep the most confident box, drop
+        # any later box that overlaps it past merge_iou OR sits at the same feet.
+        # Collapses a player circled twice (e.g. player+goalkeeper on one person,
+        # or two offset boxes) into one. The ball is never merged against players.
+        if self.merge_iou <= 0.0 or len(entries) < 2:
+            return entries
+        ordered = sorted(entries, key=lambda e: e["confidence"], reverse=True)
+        kept = []
+        for e in ordered:
+            if _is_ball(e["label"]):
+                kept.append(e)
+                continue
+            if any(
+                not _is_ball(k["label"])
+                and (
+                    self._overlap(e["box"], k["box"]) >= self.merge_iou
+                    or self._feet_close(e["box"], k["box"])
+                )
+                for k in kept
+            ):
+                continue
+            kept.append(e)
+        return kept
+
+    def _assign_track_ids(self, draw_entries, track_entries):
+        # Give each drawn box a stable track id: track-mode boxes already carry
+        # one; detection-mode boxes borrow the id of the best-overlapping track
+        # (greedy, each track used once) so detection circles can use the
+        # tracker's persistent id for the badge and the accumulated colour.
+        ids = [e["track_id"] for e in draw_entries]
+        if not track_entries:
+            return ids
+        pairs = []
+        for di, e in enumerate(draw_entries):
+            if e["track_id"] is not None or _is_ball(e["label"]):
+                continue
+            for t in track_entries:
+                if _is_ball(t["label"]):
+                    continue
+                ov = self._overlap(e["box"], t["box"])
+                if ov >= 0.3:
+                    pairs.append((ov, di, t["track_id"]))
+        pairs.sort(key=lambda p: p[0], reverse=True)
+        used_draw, used_track = set(), set()
+        for _ov, di, tid in pairs:
+            if di in used_draw or tid in used_track:
+                continue
+            ids[di] = tid
+            used_draw.add(di)
+            used_track.add(tid)
+        return ids
+
+    def _smooth_boxes(self, np, entries):
+        # Temporal EMA on the boxes we're about to draw. Each box is matched to
+        # the nearest slot from last frame (by centre, within a size-relative
+        # gate) and pulled toward the new detection; slots not matched this
+        # frame are dropped (no phantoms). Damps jitter and interval steps. The
+        # ball is passed through unsmoothed so it never lags.
+        a = float(self.position_smoothing)
+        if a <= 0.0 or not entries:
+            return entries
+        used = set()
+        out = []
+        for e in entries:
+            if _is_ball(e["label"]):
+                out.append(e)
+                continue
+            box = np.array(e["box"], dtype=np.float64)
+            cx, cy = (box[0] + box[2]) / 2.0, (box[1] + box[3]) / 2.0
+            # Generous gate so a coherent interval-step jump still associates
+            # (and glides) without grabbing a different nearby player.
+            gate = 1.5 * max(box[2] - box[0], box[3] - box[1], 1.0)
+            best, best_d = None, gate
+            for idx, slot in enumerate(self._smooth_slots):
+                if idx in used:
+                    continue
+                sb = slot["box"]
+                d = (
+                    ((sb[0] + sb[2]) / 2.0 - cx) ** 2
+                    + ((sb[1] + sb[3]) / 2.0 - cy) ** 2
+                ) ** 0.5
+                if d < best_d:
+                    best, best_d = idx, d
+            if best is None:
+                self._smooth_slots.append({"box": box.copy()})
+                used.add(len(self._smooth_slots) - 1)
+                smoothed = box
+            else:
+                used.add(best)
+                slot = self._smooth_slots[best]
+                slot["box"] = a * slot["box"] + (1.0 - a) * box
+                smoothed = slot["box"]
+            ne = dict(e)
+            ne["box"] = (
+                float(smoothed[0]),
+                float(smoothed[1]),
+                float(smoothed[2]),
+                float(smoothed[3]),
+            )
+            out.append(ne)
+        self._smooth_slots = [s for i, s in enumerate(self._smooth_slots) if i in used]
+        return out
+
+    def _detection_color(self, cv2, np, frame, label, box):
+        # Colour a raw detection box (no track id) by its jersey team, classified
+        # from this frame -- referees included. When the jersey isn't clearly a
+        # team colour, a referee falls back to gold (so real refs stay visible)
+        # and a player isn't drawn (matching the track-mode behaviour).
+        ref = _is_referee(label)
+        if not self.team_colors:
+            return _REFEREE_RGBA if ref else _PLAYER_RGBA
+        vote = self._classify_jersey(cv2, np, frame, box)
+        if vote == "red":
+            return _RED_TEAM_RGBA
+        if vote == "blue":
+            return _BLUE_TEAM_RGBA
+        if vote == "ref":
+            return _REFEREE_RGBA
+        return _REFEREE_RGBA if ref else None
+
+    def _classify_jersey(self, cv2, np, frame, box):
+        # Dominant jersey colour in the torso patch -> "red"/"blue"/"ref"/None
+        # (HSV). "ref" is a distinctive non-team kit colour (yellow/orange or
+        # pink/magenta) -- chosen to avoid grass-green and the red/blue teams --
+        # so the referee is identified by its kit colour, not the class label.
+        x1, y1, x2, y2 = (int(v) for v in box)
+        h_box, w_box = y2 - y1, x2 - x1
+        if h_box <= 0 or w_box <= 0:
+            return None
+        jy1, jy2 = y1 + int(0.15 * h_box), y1 + int(0.55 * h_box)
+        jx1, jx2 = x1 + int(0.25 * w_box), x1 + int(0.75 * w_box)
+        H, W = frame.shape[:2]
+        jy1, jy2 = max(0, jy1), min(H, jy2)
+        jx1, jx2 = max(0, jx1), min(W, jx2)
+        if jy2 - jy1 < 3 or jx2 - jx1 < 3:
+            return None
+        # logical RGB from the buffer's channel order, then HSV
+        rgb = np.ascontiguousarray(frame[jy1:jy2, jx1:jx2][:, :, self._inv_order[:3]])
+        hsv = cv2.cvtColor(rgb, cv2.COLOR_RGB2HSV)
+        s_v = (hsv[..., 1] > 80) & (hsv[..., 2] > 50)
+        h = hsv[..., 0]
+        red = int((((h <= 10) | (h >= 170)) & s_v).sum())
+        blue = int(((h >= 100) & (h <= 130) & s_v).sum())
+        # Referee kit: yellow/orange (~18-34) or pink/magenta (~145-165). These
+        # bands skip grass-green (~40-90) and the red/blue team bands.
+        ref = int(((((h >= 18) & (h <= 34)) | ((h >= 145) & (h <= 165))) & s_v).sum())
+        min_pixels = max(20, int(0.02 * rgb.shape[0] * rgb.shape[1]))
+        counts = {"red": red, "blue": blue, "ref": ref}
+        best = max(counts, key=counts.get)
+        if counts[best] < min_pixels:
+            return None
+        return best
+
+    def _load_headshot(self, cv2, np):
+        if self._headshot_loaded:
+            return self._headshot
+        self._headshot_loaded = True
+        self._headshot = None
+        path = self.headshot_path
+        if not path or not os.path.exists(path):
+            if path:
+                self.logger.warning(f"headshot not found: {path}")
+            return None
+        img = cv2.imread(path)  # BGR
+        if img is None:
+            return None
+        sz = int(self.headshot_size)
+        img = cv2.resize(img, (sz, sz), interpolation=cv2.INTER_AREA)
+        rgb = img[:, :, ::-1]  # BGR -> RGB
+        alpha = np.full((sz, sz, 1), 255, dtype=np.uint8)
+        rgba = np.concatenate([rgb, alpha], axis=2).astype(np.uint8)  # logical RGBA
+
+        self._headshot = np.ascontiguousarray(rgba[:, :, list(self._order)])
+        return self._headshot
+
+    def _draw_trail(self, cv2, np, frame, points, rgba):
+        if len(points) < 2:
+            return
+        pts = np.array(points, dtype=np.int32).reshape(-1, 1, 2)
+        cv2.polylines(frame, [pts], False, self._c(rgba), 2, cv2.LINE_AA)
+
+    def _draw_ellipse(self, cv2, frame, box, rgba, track_id):
+        x1, y1, x2, y2 = box
+        y_bottom = int(y2)
+        x_center = int((x1 + x2) / 2)
+        # Prefer the per-track smoothed width so the ellipse stays stable even
+        # when a single detection box is momentarily oversized.
+        smoothed = self._ell_w.get(track_id)
+        width = max(1, int(smoothed if smoothed is not None else x2 - x1))
+        color = self._c(rgba)
+        cv2.ellipse(
+            frame,
+            (x_center, y_bottom),
+            (width, max(1, int(0.35 * width))),
+            0.0,
+            -45,
+            235,
+            color,
+            2,
+            cv2.LINE_AA,
+        )
+        if self.show_ids and track_id is not None:
+            rect_w, rect_h = 40, 18
+            x1r = x_center - rect_w // 2
+            x2r = x_center + rect_w // 2
+            y1r = y_bottom - rect_h // 2 + 15
+            y2r = y_bottom + rect_h // 2 + 15
+            cv2.rectangle(frame, (x1r, y1r), (x2r, y2r), color, cv2.FILLED)
+            tx = x1r + 12 - (10 if track_id > 99 else 0)
+            cv2.putText(
+                frame,
+                str(track_id),
+                (tx, y1r + 14),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                self._c(_BLACK_RGBA),
+                2,
+                cv2.LINE_AA,
+            )
+
+    def _draw_triangle(self, cv2, np, frame, box, rgba):
+        x1, y1, x2, y2 = box
+        x = int((x1 + x2) / 2)
+        y = int(y1)
+        pts = np.array([[x, y], [x - 10, y - 20], [x + 10, y - 20]], dtype=np.int32)
+        cv2.drawContours(frame, [pts], 0, self._c(rgba), cv2.FILLED)
+        cv2.drawContours(frame, [pts], 0, self._c(_BLACK_RGBA), 2)
+
+    def _draw_focal_marker(self, cv2, np, frame, box):
+        # Broadcast-style "selected player" chevron floating above the head,
+        # plus a bolder ellipse, to flag the focal (HUD) player on the pitch.
+        x1, y1, x2, y2 = box
+        cx = int((x1 + x2) / 2)
+        tip_y = int(y1) - 10
+        s = 16
+        pts = np.array(
+            [
+                [cx, tip_y],
+                [cx - s, tip_y - int(s * 1.5)],
+                [cx + s, tip_y - int(s * 1.5)],
+            ],
+            dtype=np.int32,
+        )
+        cv2.drawContours(frame, [pts], 0, self._c(_HIGHLIGHT_RGBA), cv2.FILLED)
+        cv2.drawContours(frame, [pts], 0, self._c(_BLACK_RGBA), 2)
+        # Bolder ring at the feet to reinforce the selection.
+        x_center = int((x1 + x2) / 2)
+        width = max(1, int(x2 - x1))
+        cv2.ellipse(
+            frame,
+            (x_center, int(y2)),
+            (width, max(1, int(0.35 * width))),
+            0.0,
+            -45,
+            235,
+            self._c(_HIGHLIGHT_RGBA),
+            4,
+            cv2.LINE_AA,
+        )
+
+    def _draw_label(self, cv2, frame, box, label, rgba):
+        x1, y1, _, _ = box
+        cv2.putText(
+            frame,
+            label,
+            (int(x1), max(12, int(y1) - 6)),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            self._c(rgba),
+            1,
+            cv2.LINE_AA,
+        )
+
+    def _draw_hud(self, cv2, frame, contacts, distance_m, rgba, headshot):
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        x, y = 10, 10
+        if headshot is not None:
+            hh, hw = headshot.shape[:2]
+            w, h = hw + 280, max(110, hh + 20)
+            text_x = x + hw + 20
+        else:
+            w, h = 320, 100
+            text_x = x + 12
+        cv2.rectangle(frame, (x, y), (x + w, y + h), self._c(_HUD_BG_RGBA), cv2.FILLED)
+        cv2.rectangle(frame, (x, y), (x + w, y + h), self._c(rgba), 2)
+        if headshot is not None:
+            hy, hx = y + 10, x + 10
+            fh, fw = frame.shape[:2]
+            hh = min(hh, fh - hy)
+            hw = min(hw, fw - hx)
+            if hh > 0 and hw > 0:
+                frame[hy : hy + hh, hx : hx + hw] = headshot[:hh, :hw]
+                cv2.rectangle(frame, (hx, hy), (hx + hw, hy + hh), self._c(rgba), 2)
+        tc = self._c(_HUD_TEXT_RGBA)
+        cv2.putText(
+            frame, self.player_label, (text_x, y + 28), font, 0.7, tc, 2, cv2.LINE_AA
+        )
+        cv2.putText(
+            frame,
+            f"Ball contacts: {contacts}",
+            (text_x, y + 58),
+            font,
+            0.6,
+            tc,
+            1,
+            cv2.LINE_AA,
+        )
+        cv2.putText(
+            frame,
+            f"Distance: {distance_m:.1f} m",
+            (text_x, y + 85),
+            font,
+            0.6,
+            tc,
+            1,
+            cv2.LINE_AA,
+        )
+
+    def do_transform_ip(self, buf):
+        try:
+            import numpy as np
+
+            all_entries = self._read_metadata(buf)
+            # The buffer carries both the detector's boxes (track_id None) and
+            # the tracker's boxes (track_id set). Tracking state/HUD always use
+            # the tracked entries; what we *draw* depends on draw_from_detections.
+            track_entries = [e for e in all_entries if e["track_id"] is not None]
+            det_entries = [e for e in all_entries if e["track_id"] is None]
+
+            # Ball position for contact counting: prefer a tracked ball, else
+            # fall back to the strongest ball *detection* (the ball is small and
+            # fast, so it often isn't tracked) -- so contacts still get counted.
+            det_ball_box = None
+            best_ball = -1.0
+            for e in det_entries:
+                if _is_ball(e["label"]) and e["confidence"] > best_ball:
+                    best_ball, det_ball_box = e["confidence"], e["box"]
+
+            # Per-track state (votes, contacts, distance, focal) from the tracker.
+            active = self._update_tracks(
+                track_entries if track_entries else all_entries, det_ball_box
+            )
+
+            if self.draw_from_detections:
+                draw_entries = list(det_entries)
+                # Bridge missed detections: the detector occasionally drops a
+                # player for a frame, which would flicker the circle. The tracker
+                # is still coasting that player (Kalman keep-alive), so draw any
+                # confirmed track that has no detection this frame -- detections
+                # still drive everything they cover; tracks only fill the gaps.
+                if track_entries:
+                    covered = set()
+                    for d in det_entries:
+                        if _is_ball(d["label"]):
+                            continue
+                        for t in track_entries:
+                            if t["track_id"] in covered or _is_ball(t["label"]):
+                                continue
+                            if self._overlap(d["box"], t["box"]) >= 0.3:
+                                covered.add(t["track_id"])
+                    draw_entries += [
+                        t
+                        for t in track_entries
+                        if not _is_ball(t["label"]) and t["track_id"] not in covered
+                    ]
+            else:
+                draw_entries = track_entries if track_entries else det_entries
+            # min-confidence gates only what we *draw* (tracks carry conf 1.0, so
+            # they're unaffected); the contact math above used the raw detections.
+            if self.min_confidence > 0.0:
+                draw_entries = [
+                    e for e in draw_entries if e["confidence"] >= self.min_confidence
+                ]
+            # Collapse overlapping boxes so one player isn't circled twice,
+            # then low-pass the positions so the circle glides.
+            draw_entries = self._merge_overlaps(draw_entries)
+            draw_entries = self._smooth_boxes(np, draw_entries)
+            if not all_entries:
+                return Gst.FlowReturn.OK
+
+            import cv2
+
+            ok, mapinfo = buf.map(Gst.MapFlags.WRITE)
+            if not ok:
+                self.logger.error("Failed to map buffer for writing")
+                return Gst.FlowReturn.ERROR
+            try:
+                frame = np.frombuffer(
+                    mapinfo.data, dtype=np.uint8, count=self.height * self.width * 4
+                ).reshape(self.height, self.width, 4)
+
+                # Jersey team voting first, so trails/ellipses use this frame's
+                # vote (track mode; detection mode classifies per box at draw).
+                # Referees are voted on too -- their colour comes from the jersey
+                # (gold only as the fallback), not the class label.
+                if self.team_colors:
+                    for e in track_entries:
+                        tid = e["track_id"]
+                        lab = self._stable_label(tid, e["label"])
+                        if _is_ball(lab):
+                            continue
+                        vote = self._classify_jersey(cv2, np, frame, e["box"])
+                        if vote:
+                            tv = self._team_votes.setdefault(
+                                tid, {"red": 0, "blue": 0, "ref": 0}
+                            )
+                            tv[vote] = tv.get(vote, 0) + 1
+
+                if self.trails:
+                    for tid in active:
+                        rgba = self._color_for(self._track_label.get(tid, ""), tid)
+                        if rgba is None:
+                            continue
+                        self._draw_trail(cv2, np, frame, self._trail.get(tid, []), rgba)
+
+                # Which drawn box is the focal (HUD) player? Match the focal
+                # track's box to the nearest drawn box so we can highlight it
+                # even when drawing from detections (no track id on the box).
+                focal_idx = None
+                if self.highlight_focal:
+                    focal_tid = self._focal_track()
+                    focal_box = None
+                    if focal_tid is not None:
+                        for t in track_entries:
+                            if t["track_id"] == focal_tid:
+                                focal_box = t["box"]
+                                break
+                    if focal_box is not None:
+                        best = 0.0
+                        for i, e in enumerate(draw_entries):
+                            if _is_ball(e["label"]):
+                                continue
+                            ov = self._overlap(e["box"], focal_box)
+                            if ov > best:
+                                best, focal_idx = ov, i
+
+                # Stable track id per drawn box (detection boxes borrow the id of
+                # the track they overlap) -- used for the id badge and to look up
+                # the track's accumulated colour.
+                draw_ids = self._assign_track_ids(draw_entries, track_entries)
+
+                for i, e in enumerate(draw_entries):
+                    box = e["box"]
+                    badge_id = draw_ids[i]
+                    # Use the track's stable identity (class + accumulated team
+                    # votes) for colour whenever the box maps to a track -- in
+                    # detection mode that's the box's matched track id. This
+                    # makes colour robust to per-frame label/jersey noise. Only
+                    # an unmatched detection falls back to this frame's guess.
+                    color_tid = e["track_id"] if e["track_id"] is not None else badge_id
+                    if color_tid is not None:
+                        label = self._stable_label(color_tid, e["label"])
+                    else:
+                        label = e["label"]
+                    if _is_ball(label):
+                        if self.show_ball:
+                            self._draw_triangle(cv2, np, frame, box, _BALL_RGBA)
+                        continue
+                    if color_tid is not None:
+                        rgba = self._color_for(label, color_tid)
+                    else:
+                        rgba = self._detection_color(cv2, np, frame, label, box)
+                    if rgba is None:
+                        continue
+                    self._draw_ellipse(cv2, frame, box, rgba, badge_id)
+                    if i == focal_idx:
+                        self._draw_focal_marker(cv2, np, frame, box)
+                    if self.show_labels:
+                        self._draw_label(cv2, frame, box, label, rgba)
+
+                if self.show_hud:
+                    focal = self._focal_track()
+                    if focal is not None:
+                        ppm = self._px_per_meter()
+                        dist_m = (
+                            (self._distance_px.get(focal, 0.0) / ppm) if ppm else 0.0
+                        )
+                        hud_rgba = (
+                            self._color_for(self._track_label.get(focal, ""), focal)
+                            or _DEFAULT_RGBA
+                        )
+                        self._draw_hud(
+                            cv2,
+                            frame,
+                            self._contacts.get(focal, 0),
+                            dist_m,
+                            hud_rgba,
+                            self._load_headshot(cv2, np),
+                        )
+            finally:
+                buf.unmap(mapinfo)
+
+            return Gst.FlowReturn.OK
+
+        except Exception as e:
+            self.logger.error(f"FootballOverlay transform error: {e}")
+            return Gst.FlowReturn.ERROR
+
+
+if CAN_REGISTER_ELEMENT:
+    GObject.type_register(FootballOverlay)
+    __gstelementfactory__ = (
+        "pyml_football_overlay",
+        Gst.Rank.NONE,
+        FootballOverlay,
+    )
+else:
+    GlobalLogger().warning(
+        "The 'pyml_football_overlay' element will not be registered because "
+        "required modules are missing."
+    )
diff --git a/plugins/python/objectdetector.py b/plugins/python/objectdetector.py
index a429eb5..9272295 100644
--- a/plugins/python/objectdetector.py
+++ b/plugins/python/objectdetector.py
@@ -46,12 +46,40 @@ class ObjectDetector(BaseObjectDetector):
         "Aaron Boxer <aaron.boxer@collabora.com>",
     )
 
+    confidence = GObject.Property(
+        type=float,
+        default=0.25,
+        minimum=0.0,
+        maximum=1.0,
+        nick="Confidence Threshold",
+        blurb="Minimum detection confidence for the decoder post-process "
+        "(anchor_free); lower = more (and weaker) detections",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    nms_iou = GObject.Property(
+        type=float,
+        default=0.45,
+        minimum=0.0,
+        maximum=1.0,
+        nick="NMS IoU",
+        blurb="NMS IoU threshold for the decoder post-process; higher keeps "
+        "more overlapping boxes",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
     def __init__(self):
         super().__init__()
         self.logger.info(
             "ObjectDetector created without a model. Please set the 'model-name' property."
         )
 
+    def do_forward(self, frames):
+        # Push decoder thresholds to the engine before it post-processes.
+        if self.engine:
+            self.engine.conf = self.confidence
+            self.engine.iou = self.nms_iou
+        return super().do_forward(frames)
+
 
 if CAN_REGISTER_ELEMENT:
     GObject.type_register(ObjectDetector)
diff --git a/plugins/python/tracker.py b/plugins/python/tracker.py
index ad0db4d..7ff3520 100644
--- a/plugins/python/tracker.py
+++ b/plugins/python/tracker.py
@@ -130,12 +130,137 @@ def iou_batch(bb_det, bb_trk):
 class SortTracker:
     """SORT/ByteTrack multi-object tracker using IoU + Kalman filtering."""
 
-    def __init__(self, max_age=30, min_hits=3, iou_threshold=0.3):
+    def __init__(
+        self,
+        max_age=30,
+        min_hits=3,
+        iou_threshold=0.3,
+        keep_alive=2,
+        new_track_conf=0.25,
+        camera_motion=True,
+        dup_iou=0.8,
+    ):
         self.max_age = max_age
         self.min_hits = min_hits
         self.iou_threshold = iou_threshold
+        # ByteTrack-style activation gate: a brand-new track is only started
+        # from a confident detection. Weak/ghost boxes can still *continue* an
+        # existing track (matched above) but won't spawn phantom circles.
+        self.new_track_conf = new_track_conf
+        # Keep emitting a confirmed track (with its Kalman-predicted box) for up
+        # to keep_alive frames after a missed detection — bridges flicker so the
+        # overlay doesn't blink when the detector drops a box for a frame or two.
+        self.keep_alive = keep_alive
+        # Camera-motion compensation: estimate the global image shift from the
+        # tracks that matched, then re-try matching the leftovers with their
+        # predictions shifted by it. Re-attaches players during a pan instead of
+        # leaving the old track behind and spawning a duplicate.
+        self.camera_motion = camera_motion
+        # Two confirmed tracks overlapping more than this IoU are duplicates;
+        # the weaker one is dropped (ByteTrack's remove_duplicate_stracks).
+        self.dup_iou = dup_iou
         self.trackers = []
 
+    @staticmethod
+    def _center(bbox):
+        return (bbox[0] + bbox[2] / 2.0, bbox[1] + bbox[3] / 2.0)
+
+    def _estimate_motion(self, matches, predicted, det_bboxes):
+        """Fit a global 2D similarity transform (translation + uniform scale +
+        rotation) mapping each matched track's predicted centre to its observed
+        centre. Returns a callable box->warped-box, or None if it can't be
+        estimated. Uses RANSAC (via OpenCV) so players moving against the camera
+        consensus are rejected as outliers; falls back to a robust median
+        translation if OpenCV is unavailable or the fit is degenerate."""
+        import numpy as np
+
+        if len(matches) < 3:
+            return None
+        src = np.array(
+            [self._center(predicted[ti]) for _, ti in matches], dtype=np.float32
+        )
+        dst = np.array(
+            [self._center(det_bboxes[di]) for di, _ in matches], dtype=np.float32
+        )
+
+        M = None
+        try:
+            import cv2
+
+            M, _ = cv2.estimateAffinePartial2D(
+                src, dst, method=cv2.RANSAC, ransacReprojThreshold=5.0
+            )
+        except Exception:
+            M = None
+
+        if M is not None:
+            scale = float(np.hypot(M[0, 0], M[0, 1]))
+            # Reject implausible fits (e.g. from too few/noisy correspondences).
+            if 0.5 <= scale <= 2.0:
+
+                def warp(box):
+                    cx, cy = self._center(box)
+                    ncx = M[0, 0] * cx + M[0, 1] * cy + M[0, 2]
+                    ncy = M[1, 0] * cx + M[1, 1] * cy + M[1, 2]
+                    nw, nh = box[2] * scale, box[3] * scale
+                    return np.array([ncx - nw / 2.0, ncy - nh / 2.0, nw, nh])
+
+                return warp
+
+        # Fallback: robust median translation (pan/tilt only).
+        delta = dst - src
+        tx, ty = float(np.median(delta[:, 0])), float(np.median(delta[:, 1]))
+        if abs(tx) < 1.0 and abs(ty) < 1.0:
+            return None
+        return lambda box: np.array([box[0] + tx, box[1] + ty, box[2], box[3]])
+
+    def _associate(self, det_bboxes, det_idxs, trk_idxs, trk_boxes, detections):
+        """Hungarian-match a subset of detections to a subset of trackers,
+        applying updates to matched trackers. Returns list of (det_i, trk_i)."""
+        from scipy.optimize import linear_sum_assignment
+
+        if not det_idxs or not trk_idxs:
+            return []
+        dets = [det_bboxes[d] for d in det_idxs]
+        trks = [trk_boxes[t] for t in trk_idxs]
+        iou_matrix = iou_batch(dets, trks)
+        if iou_matrix.size == 0:
+            return []
+        cost = 1.0 - iou_matrix
+        row_ind, col_ind = linear_sum_assignment(cost)
+        matches = []
+        for r, c in zip(row_ind, col_ind):
+            if iou_matrix[r, c] >= self.iou_threshold:
+                di, ti = det_idxs[r], trk_idxs[c]
+                self.trackers[ti].update(detections[di][:4])
+                self.trackers[ti].label_quark = detections[di][5]
+                matches.append((di, ti))
+        return matches
+
+    def _suppress_duplicates(self):
+        """Drop the weaker of any two confirmed tracks sitting on the same box."""
+        n = len(self.trackers)
+        if n < 2:
+            return
+        boxes = [t.get_bbox() for t in self.trackers]
+        iou_matrix = iou_batch(boxes, boxes)
+        remove = set()
+        for i in range(n):
+            if i in remove:
+                continue
+            for j in range(i + 1, n):
+                if j in remove:
+                    continue
+                if iou_matrix[i, j] > self.dup_iou:
+                    ti, tj = self.trackers[i], self.trackers[j]
+                    # Keep the better track: matched more recently, then more
+                    # hits; drop the other (usually the freshly-spawned dup).
+                    ki = (ti.time_since_update, -ti.hits)
+                    kj = (tj.time_since_update, -tj.hits)
+                    remove.add(j if ki <= kj else i)
+        if remove:
+            self.trackers = [t for k, t in enumerate(self.trackers) if k not in remove]
+
     def update(self, detections):
         """
         Update tracks with new detections.
@@ -147,54 +272,66 @@ def update(self, detections):
             list of (track_id, bbox, label_quark) for confirmed tracks
         """
         import numpy as np
-        from scipy.optimize import linear_sum_assignment
 
         # Predict new locations for existing tracks
-        predicted = []
         to_remove = []
         for i, trk in enumerate(self.trackers):
-            pred = trk.predict()
-            if np.any(np.isnan(pred)):
+            if np.any(np.isnan(trk.predict())):
                 to_remove.append(i)
-            else:
-                predicted.append(pred)
         for i in reversed(to_remove):
             self.trackers.pop(i)
 
-        # Build cost matrix using IoU
         det_bboxes = [d[:4] for d in detections] if len(detections) > 0 else []
-        iou_matrix = iou_batch(det_bboxes, predicted)
-        cost_matrix = 1.0 - iou_matrix
-
-        # Hungarian assignment
-        matched_det = set()
-        matched_trk = set()
-        if cost_matrix.size > 0:
-            row_ind, col_ind = linear_sum_assignment(cost_matrix)
-            for r, c in zip(row_ind, col_ind):
-                if iou_matrix[r, c] >= self.iou_threshold:
-                    matched_det.add(r)
-                    matched_trk.add(c)
-                    self.trackers[c].update(detections[r][:4])
-                    # Store latest label quark on tracker
-                    self.trackers[c].label_quark = detections[r][5]
-
-        # Create new tracks for unmatched detections
-        for d_idx in range(len(detections)):
+        n_det = len(det_bboxes)
+        n_trk = len(self.trackers)
+        # Predicted box per tracker, captured before any update this frame.
+        predicted = [self.trackers[i].get_bbox() for i in range(n_trk)]
+
+        # 1) First association on the raw predictions.
+        matches = self._associate(
+            det_bboxes, list(range(n_det)), list(range(n_trk)), predicted, detections
+        )
+        matched_det = {di for di, _ in matches}
+        matched_trk = {ti for _, ti in matches}
+
+        # 2) Camera-motion compensation: fit a global image transform (pan, zoom
+        # and rotation) from the tracks that matched, apply it to the leftover
+        # predictions, and re-match. This recovers tracks during camera moves
+        # instead of leaving them behind and spawning duplicates.
+        if self.camera_motion:
+            warp = self._estimate_motion(matches, predicted, det_bboxes)
+            if warp is not None:
+                rem_trk = [i for i in range(n_trk) if i not in matched_trk]
+                rem_det = [d for d in range(n_det) if d not in matched_det]
+                if rem_trk and rem_det:
+                    shifted = {i: warp(predicted[i]) for i in rem_trk}
+                    m2 = self._associate(
+                        det_bboxes, rem_det, rem_trk, shifted, detections
+                    )
+                    matched_det.update(di for di, _ in m2)
+
+        # Create new tracks for unmatched detections, but only from confident
+        # ones (ByteTrack activation gate) so weak/ghost boxes don't start a
+        # phantom track that gets drawn as a stray circle.
+        for d_idx in range(n_det):
             if d_idx not in matched_det:
+                if detections[d_idx][4] < self.new_track_conf:
+                    continue
                 trk = KalmanBoxTracker(detections[d_idx][:4])
                 trk.label_quark = detections[d_idx][5]
                 self.trackers.append(trk)
 
-        # Remove dead tracks
+        # Remove dead tracks, then drop duplicate tracks sitting on one object.
         self.trackers = [
             t for t in self.trackers if t.time_since_update <= self.max_age
         ]
+        self._suppress_duplicates()
 
-        # Return confirmed tracks
+        # Return confirmed tracks, including ones that missed a detection this
+        # frame (predicted box) for up to keep_alive frames — prevents flicker.
         results = []
         for trk in self.trackers:
-            if trk.hits >= self.min_hits and trk.time_since_update == 0:
+            if trk.hits >= self.min_hits and trk.time_since_update <= self.keep_alive:
                 results.append((trk.id, trk.get_bbox(), trk.label_quark))
         return results
 
@@ -268,6 +405,50 @@ class TrackerTransform(GstBase.BaseTransform):
         flags=GObject.ParamFlags.READWRITE,
     )
 
+    keep_alive = GObject.Property(
+        type=int,
+        default=2,
+        minimum=0,
+        maximum=1000,
+        nick="Keep Alive",
+        blurb="Frames to keep emitting a confirmed track (Kalman-predicted box) "
+        "after a missed detection; bridges flicker (0 = only matched frames)",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    new_track_confidence = GObject.Property(
+        type=float,
+        default=0.25,
+        minimum=0.0,
+        maximum=1.0,
+        nick="New Track Confidence",
+        blurb="Minimum detection confidence to START a new track (ByteTrack "
+        "activation gate); weak boxes still continue existing tracks but "
+        "won't spawn phantom/duplicate circles",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    camera_motion = GObject.Property(
+        type=bool,
+        default=True,
+        nick="Camera Motion Compensation",
+        blurb="Estimate the global image shift from matched tracks and re-match "
+        "leftovers shifted by it, so a panning camera re-attaches players "
+        "instead of leaving the old track behind and spawning a duplicate",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
+    duplicate_iou = GObject.Property(
+        type=float,
+        default=0.8,
+        minimum=0.0,
+        maximum=1.0,
+        nick="Duplicate IoU",
+        blurb="Two confirmed tracks overlapping more than this are treated as "
+        "duplicates and the weaker one is dropped",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
     def __init__(self):
         super().__init__()
         self.logger = LoggerFactory.get(LoggerFactory.LOGGER_TYPE_GST)
@@ -281,6 +462,10 @@ def _ensure_tracker(self):
                 max_age=self.max_age,
                 min_hits=self.min_hits,
                 iou_threshold=self.iou_threshold,
+                keep_alive=self.keep_alive,
+                new_track_conf=self.new_track_confidence,
+                camera_motion=self.camera_motion,
+                dup_iou=self.duplicate_iou,
             )
         return self._tracker
 
@@ -351,6 +536,14 @@ def do_get_property(self, prop):
             return self.min_hits
         elif prop.name == "iou-threshold":
             return self.iou_threshold
+        elif prop.name == "keep-alive":
+            return self.keep_alive
+        elif prop.name == "new-track-confidence":
+            return self.new_track_confidence
+        elif prop.name == "camera-motion":
+            return self.camera_motion
+        elif prop.name == "duplicate-iou":
+            return self.duplicate_iou
         else:
             raise AttributeError(f"Unknown property {prop.name}")
 
@@ -367,6 +560,18 @@ def do_set_property(self, prop, value):
         elif prop.name == "iou-threshold":
             self.iou_threshold = value
             self._tracker = None
+        elif prop.name == "keep-alive":
+            self.keep_alive = value
+            self._tracker = None
+        elif prop.name == "new-track-confidence":
+            self.new_track_confidence = value
+            self._tracker = None
+        elif prop.name == "camera-motion":
+            self.camera_motion = value
+            self._tracker = None
+        elif prop.name == "duplicate-iou":
+            self.duplicate_iou = value
+            self._tracker = None
         else:
             raise AttributeError(f"Unknown property {prop.name}")
 
diff --git a/plugins/python/yolo.py b/plugins/python/yolo.py
index a34deb6..2140c74 100644
--- a/plugins/python/yolo.py
+++ b/plugins/python/yolo.py
@@ -160,6 +160,9 @@ def do_forward(self, frames):
             )
             end_pre = time.time()
 
+            conf = getattr(self, "conf", 0.25)
+            iou = getattr(self, "iou", 0.5)
+            agnostic = getattr(self, "agnostic_nms", True)
             if self.track:
                 # Ensure tracker persists across batches
                 results = self.execute_with_stream(
@@ -167,14 +170,23 @@ def do_forward(self, frames):
                         source=img_list,
                         persist=True,
                         imgsz=640,
-                        conf=0.1,
+                        conf=conf,
+                        iou=iou,
+                        agnostic_nms=agnostic,
                         verbose=True,
                         tracker="botsort.yaml",
                     )
                 )
             else:
                 results = self.execute_with_stream(
-                    lambda: model(img_list, imgsz=640, conf=0.1, verbose=True)
+                    lambda: model(
+                        img_list,
+                        imgsz=640,
+                        conf=conf,
+                        iou=iou,
+                        agnostic_nms=agnostic,
+                        verbose=True,
+                    )
                 )
             end_inf = time.time()
 
@@ -205,6 +217,36 @@ class YOLOTransform(BaseObjectDetector):
         "Aaron Boxer <aaron.boxer@collabora.com>",
     )
 
+    confidence = GObject.Property(
+        type=float,
+        default=0.1,
+        minimum=0.0,
+        maximum=1.0,
+        nick="Confidence Threshold",
+        blurb="Minimum detection confidence (matches football_analyzer); kept "
+        "low on purpose so the tracker can use weak boxes to continue tracks "
+        "-- the tracker's new-track-confidence gates phantom tracks",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    nms_iou = GObject.Property(
+        type=float,
+        default=0.7,
+        minimum=0.0,
+        maximum=1.0,
+        nick="NMS IoU",
+        blurb="NMS IoU threshold (matches football_analyzer's default); lower "
+        "suppresses more overlap but can also drop genuinely close players",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+    agnostic_nms = GObject.Property(
+        type=bool,
+        default=False,
+        nick="Class-Agnostic NMS",
+        blurb="Suppress overlapping boxes across classes too; off by default "
+        "(like football_analyzer) so two close players aren't merged",
+        flags=GObject.ParamFlags.READWRITE,
+    )
+
     def __init__(self):
         super().__init__()
         self.mgr.engine_name = "pyml_yolo_engine"
@@ -222,6 +264,14 @@ def engine_name(self, value):
             "The 'engine_name' property cannot be set in this derived class."
         )
 
+    def do_forward(self, frames):
+        # Push NMS/confidence knobs to the engine before it runs the model.
+        if self.engine:
+            self.engine.conf = self.confidence
+            self.engine.iou = self.nms_iou
+            self.engine.agnostic_nms = self.agnostic_nms
+        return super().do_forward(frames)
+
     def do_decode(self, buf, result, stream_idx=0):
         self.logger.debug(
             f"Decoding YOLO result for buffer {hex(id(buf))}, stream {stream_idx}: {result}"
@@ -250,7 +300,9 @@ def do_decode(self, buf, result, stream_idx=0):
             score = boxes.conf[i]
             label = boxes.cls[i]
             label_num = label.item()
-            class_name = COCO_CLASSES.get(label_num, f"unknown_{label_num}")
+            # Prefer the model's own class names; fall back to COCO for plain yolo.
+            names = getattr(result, "names", None) or COCO_CLASSES
+            class_name = names.get(label_num, f"unknown_{label_num}")
 
             # Use class name for detection, track_id for tracking
             if self.engine.track and hasattr(boxes, "id") and boxes.id is not None:
diff --git a/rzv2h/CMakeLists.txt b/rzv2h/CMakeLists.txt
new file mode 100644
index 0000000..5f09e2f
--- /dev/null
+++ b/rzv2h/CMakeLists.txt
@@ -0,0 +1,54 @@
+# Build the `drpai_runtime` Python extension for RZ/V2H.
+#
+# This mirrors the SDK's apps/CMakeLists.txt (same TVM includes, same V2H
+# runtime libraries) but produces a Python module instead of an executable.
+# It MUST be configured with the SDK cross-toolchain and built inside the
+# RZ/V2H DRP-AI TVM SDK Docker. See README.md.
+#
+# Required env: TVM_ROOT  (root of rzv_drp-ai_tvm), SDK (Yocto cross SDK)
+# Required -D : PYBIND11_INCLUDE_DIR, PYTHON_INCLUDE_DIR (target aarch64 python)
+cmake_minimum_required(VERSION 3.16)
+project(drpai_runtime CXX)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if(NOT DEFINED ENV{TVM_ROOT})
+  message(FATAL_ERROR "TVM_ROOT not set — source the DRP-AI TVM SDK env first")
+endif()
+set(TVM_ROOT "$ENV{TVM_ROOT}")
+
+set(DRPAI_APPS "${TVM_ROOT}/apps" CACHE PATH "rzv_drp-ai_tvm/apps directory")
+set(PYBIND11_INCLUDE_DIR "" CACHE PATH "pybind11 include directory")
+set(PYTHON_INCLUDE_DIR "" CACHE PATH "target python3 include dir")
+set(LIBMERA_RT_PATH ${TVM_ROOT}/obj/build_runtime/v2h/lib)
+
+add_library(drpai_runtime MODULE
+  drpai_runtime_pybind.cpp
+  ${DRPAI_APPS}/MeraDrpRuntimeWrapper.cpp
+)
+set_target_properties(drpai_runtime PROPERTIES PREFIX "" SUFFIX ".so")
+
+target_include_directories(drpai_runtime PRIVATE
+  ${DRPAI_APPS}
+  ${TVM_ROOT}/tvm/include
+  ${TVM_ROOT}/setup/include
+  ${TVM_ROOT}/tvm/3rdparty/dlpack/include
+  ${TVM_ROOT}/tvm/3rdparty/dmlc-core/include
+  ${TVM_ROOT}/tvm/3rdparty/compiler-rt
+  ${PYBIND11_INCLUDE_DIR}
+  ${PYTHON_INCLUDE_DIR}
+)
+
+add_definitions(-DMERA_DRP_RUNTIME)
+target_compile_definitions(drpai_runtime PUBLIC KDLDRPAI)
+target_link_directories(drpai_runtime PRIVATE ${LIBMERA_RT_PATH})
+target_link_libraries(drpai_runtime PRIVATE
+  mera2_runtime
+  mera2_plan_io
+  drp_tvm_rt
+  pthread
+)
+set_target_properties(drpai_runtime PROPERTIES
+  LINK_FLAGS "-Wl,-rpath,${LIBMERA_RT_PATH} -Wl,-rpath-link,${LIBMERA_RT_PATH}")
+
+target_compile_options(drpai_runtime PRIVATE -O3 -mtune=cortex-a55 -Wall -fvisibility=hidden)
diff --git a/rzv2h/README.md b/rzv2h/README.md
new file mode 100644
index 0000000..de8d0bc
--- /dev/null
+++ b/rzv2h/README.md
@@ -0,0 +1,80 @@
+# Object detection on Renesas RZ/V2H (DRP-AI NPU)
+
+This runs `pyml_objectdetector` on the **RZ/V2H** DRP-AI NPU, using a YOLO11
+model compiled with the **DRP-AI TVM** compiler (powered by EdgeCortix MERA).
+
+It is the decomposed, metadata-passing pipeline used elsewhere in this repo —
+detector -> (tracker) -> overlay, but the detector's inference runs on the NPU:
+
+```
+... ! pyml_objectdetector engine-name=drpai model-name=<deploy_dir> device=drpai
+      input-format=nchw post-process=anchor_free
+    ! pyml_tracker ! pyml_overlay ! ...
+```
+## Prerequisites
+
+- RZ/V2H EVK with the **RZ/V2H AI SDK v6.00** Yocto image (provides the DRP-AI
+  driver, `/dev/drpai0`, GStreamer, and Python 3).
+- The **DRP-AI TVM** package (`rzv_drp-ai_tvm`) and its SDK Docker, with the
+  environment sourced so `TVM_ROOT`, `SDK` (cross SDK), and the DRP-AI
+  translator are set. (`PRODUCT=V2H`.)
+- `pybind11` headers available to the cross build.
+
+## 1 — Convert the model (in the SDK Docker)
+
+```bash
+./convert_yolo11_v2h.sh yolo11m 640
+```
+
+This exports YOLO11->ONNX (input node `images`, `1x3x640x640`) and runs the V2H
+DRP-AI TVM compiler. See the script for the exact commands.
+
+## 2 — Build the Python binding (in the SDK Docker)
+
+Source the SDK env first (so `TVM_ROOT`/`SDK` are set and CXX is the aarch64
+cross compiler), then:
+
+```bash
+cd rzv2h
+cmake -B build \
+  -DCMAKE_TOOLCHAIN_FILE="$TVM_ROOT/apps/toolchain/runtime.cmake" \
+  -DPYBIND11_INCLUDE_DIR="$(python3 -m pybind11 --includes | sed 's/-I//;q')" \
+  -DPYTHON_INCLUDE_DIR="$SDK/sysroots/aarch64-poky-linux/usr/include/python3.12"
+cmake --build build -j
+```
+
+Adjust `python3.12` to the AI SDK image's Python version, and point
+`PYBIND11_INCLUDE_DIR` at a real pybind11 headers dir if the one-liner doesn't
+resolve in the container.
+
+## 3 — Deploy to the board
+
+Copy onto the RZ/V2H (e.g. under `/home/weston`):
+
+- this repo's `plugins/` (the gst-python-ml elements),
+- `build/drpai_runtime.so`,
+- the compiled `yolo11m_drpai_v2h/` deploy dir,
+- a COCO label file if you overlay class names.
+
+```bash
+export GST_PLUGIN_PATH=/home/weston/gst-python-ml/plugins:$GST_PLUGIN_PATH
+export PYTHONPATH=/home/weston/rzv2h/build:$PYTHONPATH
+gst-inspect-1.0 pyml_objectdetector
+```
+
+## 4 — Run on the board
+
+File -> annotated file (run as a user that can open `/dev/drpai0`, often root):
+
+```bash
+gst-launch-1.0 filesrc location=clip.mp4 ! decodebin ! videoconvert ! videoscale \
+  ! "video/x-raw,format=RGB,width=640,height=640" \
+  ! pyml_objectdetector engine-name=drpai model-name=yolo11m_drpai_v2h device=drpai \
+        input-format=nchw post-process=anchor_free \
+  ! pyml_tracker tracker-type=bytetrack \
+  ! videoconvert ! "video/x-raw,format=RGBA" ! pyml_overlay \
+  ! videoconvert ! autovideosink
+```
+
+Live camera (MIPI/USB): swap `filesrc ! decodebin` for the camera source
+(`v4l2src` / the EVK's ISP source), keeping the `640x640` caps into the detector.
diff --git a/rzv2h/convert_yolo11_v2h.sh b/rzv2h/convert_yolo11_v2h.sh
new file mode 100755
index 0000000..06d8412
--- /dev/null
+++ b/rzv2h/convert_yolo11_v2h.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Compile YOLO11 (ONNX) -> RZ/V2H DRP-AI (INT8) deploy dir, using the REAL
+# mera2 + DRP-AI Translator i8 + DRP-AI Quantizer flow.
+#
+# RUN INSIDE the drpai-tvm-v2h container (built via rzv2h/sdk_eval/build_image.sh),
+# with this repo mounted at /work. RZ/V2H uses the DRP-AI INT8 accelerator, so
+# quantization is MANDATORY and calibration images are required — this is why
+# the plain FP compile_onnx_model.py does NOT work for V2H.
+#
+# Usage (inside container):
+#   ./rzv2h/convert_yolo11_v2h.sh [MODEL.onnx] [OUT_DIR] [CALIB_DIR] [IMGSZ]
+# Defaults assume the repo is at /work and the ONNX is exported already
+# (e.g. `yolo export model=models/yolo11m/yolo11m.pt format=onnx imgsz=640` on a
+# host with ultralytics — the container has no ultralytics).
+set -euo pipefail
+
+ONNX="${1:-/work/models/yolo11m/yolo11m.onnx}"
+OUT="${2:-/work/rzv2h/yolo11m_drpai_v2h}"
+CALIB="${3:-/work/rzv2h/calib}"
+IMGSZ="${4:-640}"
+
+: "${TVM_ROOT:?run inside the drpai-tvm-v2h container (TVM_ROOT unset)}"
+export PRODUCT=V2H
+export SDK="$(find /opt/ -name sysroots -type d | head -1)/../"
+export TRANSLATOR="$(find /opt/ -name python_api -type d | head -1)/../../"
+: "${QUANTIZER:?QUANTIZER env not set (expected from the image)}"
+export PATH="$TVM_ROOT/tutorials:$PATH"          # so run_drp_compiler.sh resolves
+chmod +x "$TVM_ROOT"/tutorials/*.sh 2>/dev/null || true   # SDK ships them non-+x
+
+[[ -f "$ONNX" ]] || { echo "ONNX not found: $ONNX (export it first)"; exit 1; }
+[[ -d "$CALIB" ]] || { echo "calibration image dir not found: $CALIB"; exit 1; }
+
+# The stock quant script preprocesses calibration images as ImageNet (224 +
+# mean/std) — wrong for YOLO (needs IMGSZ, /255, RGB, CHW). Patch that one line.
+python3 - "$TVM_ROOT/tutorials/compile_onnx_model_quant.py" "$IMGSZ" <<'PYEOF'
+import sys
+p, sz = sys.argv[1], int(sys.argv[2])
+s = open(p).read()
+old = "input_data = pre_process_imagenet_pytorch(image, mean, stdev, need_transpose=True)"
+new = ("input_data = (cv2.resize(image,(%d,%d))[:,:,::-1]"
+       ".astype('float32')/255.0).transpose(2,0,1)" % (sz, sz))
+if old in s:
+    open(p, "w").write(s.replace(old, new)); print("[patch] calibration preprocessing ->", sz)
+else:
+    print("[patch] calibration line already patched / not found")
+PYEOF
+
+rm -rf "$OUT"
+cd "$TVM_ROOT/tutorials"
+python3 compile_onnx_model_quant.py "$ONNX" \
+  -o "$OUT" -i images -s "1,3,${IMGSZ},${IMGSZ}" \
+  -t "$SDK" -d "$TRANSLATOR" -c "$QUANTIZER" --images "$CALIB"
+
+echo
+echo "Done. RZ/V2H DRP-AI (INT8) deploy dir: $OUT"
+echo "  sub_0000__CPU_DRP_TVM/{deploy.so,deploy.json,deploy.params}  (aarch64 + DRP-AI)"
+echo "  preprocess/   (DRP-AI pre-processing runtime objects)"
+echo "Copy $OUT to the board; load sub_0000__CPU_DRP_TVM with the MERA runtime."
diff --git a/rzv2h/drpai_runtime_pybind.cpp b/rzv2h/drpai_runtime_pybind.cpp
new file mode 100644
index 0000000..5d2d0b9
--- /dev/null
+++ b/rzv2h/drpai_runtime_pybind.cpp
@@ -0,0 +1,149 @@
+// drpai_runtime_pybind.cpp
+// Copyright (C) 2024-2026 Collabora Ltd. — LGPL (see COPYING).
+//
+// pybind11 binding around the Renesas DRP-AI TVM runtime
+// (MeraDrpRuntimeWrapper, powered by EdgeCortix MERA(TM)) for RZ/V2H.
+//
+// Exposes a minimal `drpai_runtime.Runtime` class to Python so the pure-Python
+// `drpai_engine.py` can drive the DRP-AI NPU:
+//
+//     import drpai_runtime
+//     rt = drpai_runtime.Runtime()
+//     rt.load("/path/to/deploy_dir")     # deploy.so/json/params
+//     rt.set_input(0, nchw_float32_numpy)
+//     rt.run()
+//     out0 = rt.get_output(0)            # numpy (float32, fp16 upcast)
+//
+// Build with CMake against the board's DRP-AI TVM runtime — see CMakeLists.txt
+// and README.md. This compiles only inside the RZ/V2H DRP-AI TVM SDK and runs
+// only on the board (it talks to /dev/drpai0).
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/drpai.h>
+
+#include "MeraDrpRuntimeWrapper.h"
+
+namespace py = pybind11;
+
+static float fp16_to_fp32(uint16_t h) {
+  uint32_t sign = static_cast<uint32_t>(h & 0x8000) << 16;
+  uint32_t exp = (h >> 10) & 0x1F;
+  uint32_t mant = h & 0x3FF;
+  uint32_t f;
+  if (exp == 0) {
+    if (mant == 0) {
+      f = sign;
+    } else {
+      exp = 127 - 15 + 1;
+      while ((mant & 0x400) == 0) {
+        mant <<= 1;
+        exp--;
+      }
+      mant &= 0x3FF;
+      f = sign | (exp << 23) | (mant << 13);
+    }
+  } else if (exp == 0x1F) {
+    f = sign | 0x7F800000 | (mant << 13);  // Inf / NaN
+  } else {
+    f = sign | ((exp - 15 + 127) << 23) | (mant << 13);
+  }
+  float out;
+  std::memcpy(&out, &f, sizeof(out));
+  return out;
+}
+
+static uint64_t get_drpai_start_addr() {
+  int fd = open("/dev/drpai0", O_RDWR);
+  if (fd < 0) {
+    throw std::runtime_error("Failed to open /dev/drpai0 (run on the board, as root?)");
+  }
+  drpai_data_t drpai_data;
+  int ret = ioctl(fd, DRPAI_GET_DRPAI_AREA, &drpai_data);
+  close(fd);
+  if (ret == -1) {
+    throw std::runtime_error("ioctl(DRPAI_GET_DRPAI_AREA) failed");
+  }
+  return drpai_data.address;
+}
+
+class Runtime {
+ public:
+  Runtime() : rt_() {}
+
+  bool load(const std::string& model_dir) {
+    model_dir_ = model_dir;
+    return rt_.LoadModel(model_dir, get_drpai_start_addr());
+  }
+
+  void set_input(int index,
+                 py::array_t<float, py::array::c_style | py::array::forcecast> data) {
+    rt_.SetInput(index, static_cast<const float*>(data.data()));
+  }
+
+  void run() { rt_.Run(); }
+
+  int num_input() { return rt_.GetNumInput(model_dir_); }
+  int num_output() { return rt_.GetNumOutput(); }
+
+  py::array get_output(int index) {
+    auto out = rt_.GetOutput(index);
+    InOutDataType dtype = std::get<0>(out);
+    const void* ptr = std::get<1>(out);
+    int64_t size = std::get<2>(out);
+
+    switch (dtype) {
+      case InOutDataType::FLOAT16: {
+        const uint16_t* src = reinterpret_cast<const uint16_t*>(ptr);
+        py::array_t<float> result(size);
+        float* dst = static_cast<float*>(result.request().ptr);
+        for (int64_t i = 0; i < size; ++i) dst[i] = fp16_to_fp32(src[i]);
+        return result;
+      }
+      case InOutDataType::FLOAT32: {
+        py::array_t<float> result(size);
+        std::memcpy(result.request().ptr, ptr, size * sizeof(float));
+        return result;
+      }
+      case InOutDataType::INT32: {
+        py::array_t<int32_t> result(size);
+        std::memcpy(result.request().ptr, ptr, size * sizeof(int32_t));
+        return result;
+      }
+      case InOutDataType::INT64: {
+        py::array_t<int64_t> result(size);
+        std::memcpy(result.request().ptr, ptr, size * sizeof(int64_t));
+        return result;
+      }
+      default:
+        throw std::runtime_error("Unsupported DRP-AI output data type");
+    }
+  }
+
+ private:
+  MeraDrpRuntimeWrapper rt_;
+  std::string model_dir_;
+};
+
+PYBIND11_MODULE(drpai_runtime, m) {
+  m.doc() = "pybind11 binding for the Renesas DRP-AI TVM runtime (RZ/V2H)";
+  py::class_<Runtime>(m, "Runtime")
+      .def(py::init<>())
+      .def("load", &Runtime::load, py::arg("model_dir"),
+           "Load a DRP-AI TVM deploy directory (deploy.so/json/params).")
+      .def("set_input", &Runtime::set_input, py::arg("index"), py::arg("data"))
+      .def("run", &Runtime::run)
+      .def("num_input", &Runtime::num_input)
+      .def("num_output", &Runtime::num_output)
+      .def("get_output", &Runtime::get_output, py::arg("index"));
+}
diff --git a/rzv2h/emulation/drpai_runtime.py b/rzv2h/emulation/drpai_runtime.py
new file mode 100644
index 0000000..6fdbe25
--- /dev/null
+++ b/rzv2h/emulation/drpai_runtime.py
@@ -0,0 +1,123 @@
+# drpai_runtime.py  —  off-board stand-in for the native pybind `drpai_runtime`.
+# Copyright (C) 2024-2026 Collabora Ltd. — LGPL (see COPYING).
+#
+# Same interface as the C++ binding (Runtime.load / set_input / run /
+# num_output / get_output), with two backends auto-selected by what's in the
+# model directory and what's importable:
+#
+#   1. MERA / TVM graph_executor  — if the dir has deploy.so/json/params AND a
+#      `tvm` runtime is importable (i.e. inside the Renesas DRP-AI TVM SDK
+#      container, or on the board). This runs the REAL MERA/TVM runtime — the
+#      faithful "test through the TVM runtime". On the board the deploy.so runs
+#      on the DRP-AI NPU / Arm CPU; in the SDK container it runs on whatever the
+#      module was compiled for (aarch64 needs QEMU; an x86-target build runs
+#      natively for functional check).
+#
+#   2. ONNX Runtime (CPU)         — fallback look-alike for plain x86 dev boxes
+#      with no SDK: runs the same yolo11m.onnx that feeds the DRP-AI compiler so
+#      the engine's preprocess/reshape/decode path is exercised. Validates our
+#      code, NOT the DRP-AI/MERA runtime.
+#
+# get_output() always returns a FLAT array, matching the C++ GetOutput buffer,
+# so the engine's reshape-to-(1, 4+nc, anchors) path is genuinely tested.
+
+import glob
+import os
+
+import numpy as np
+
+
+class Runtime:
+    def __init__(self):
+        self._backend = None
+        # tvm backend
+        self._mod = None
+        self._dev = None
+        self._input_name = os.getenv("DRPAI_INPUT_NAME", "images")
+        # onnx backend
+        self._sess = None
+        self._ort_input = None
+        self._feed = None
+        self._outputs = None
+
+    def load(self, model_dir):
+        deploy_so = os.path.join(model_dir, "deploy.so")
+        if os.path.isfile(deploy_so) and self._try_load_tvm(model_dir, deploy_so):
+            return True
+        return self._try_load_onnx(model_dir)
+
+    # ---- backend 1: real MERA / TVM graph_executor ----
+    def _try_load_tvm(self, model_dir, deploy_so):
+        try:
+            import tvm
+            from tvm.contrib import graph_executor
+        except ImportError:
+            return False
+        try:
+            lib = tvm.runtime.load_module(deploy_so)
+            with open(os.path.join(model_dir, "deploy.json")) as f:
+                graph = f.read()
+            self._dev = tvm.cpu(0)
+            self._mod = graph_executor.create(graph, lib, self._dev)
+            with open(os.path.join(model_dir, "deploy.params"), "rb") as f:
+                self._mod.load_params(bytearray(f.read()))
+            self._backend = "tvm"
+            print(
+                f"[drpai_runtime] MERA/TVM graph_executor backend "
+                f"(deploy.so, input='{self._input_name}') — real runtime"
+            )
+            return True
+        except Exception as e:
+            print(f"[drpai_runtime] TVM backend load failed ({e}); trying ONNX")
+            return False
+
+    # ---- backend 2: ONNX Runtime look-alike ----
+    def _try_load_onnx(self, model_dir):
+        try:
+            import onnxruntime as ort
+        except ImportError:
+            print("[drpai_runtime] no TVM and no onnxruntime — cannot load")
+            return False
+        onnx_files = sorted(glob.glob(os.path.join(model_dir, "*.onnx")))
+        if not onnx_files:
+            print(f"[drpai_runtime] no deploy.so and no .onnx in {model_dir!r}")
+            return False
+        self._sess = ort.InferenceSession(
+            onnx_files[0], providers=["CPUExecutionProvider"]
+        )
+        self._ort_input = self._sess.get_inputs()[0].name
+        self._backend = "onnx"
+        print(
+            f"[drpai_runtime] ONNX Runtime EMULATION backend ({onnx_files[0]}, "
+            f"input='{self._ort_input}') — NOT the NPU/MERA runtime"
+        )
+        return True
+
+    def set_input(self, index, data):
+        arr = np.ascontiguousarray(data, dtype=np.float32)
+        if self._backend == "tvm":
+            import tvm
+
+            self._mod.set_input(self._input_name, tvm.nd.array(arr, self._dev))
+        else:
+            self._feed = arr
+
+    def run(self):
+        if self._backend == "tvm":
+            self._mod.run()
+        else:
+            self._outputs = self._sess.run(None, {self._ort_input: self._feed})
+
+    def num_input(self):
+        return 1
+
+    def num_output(self):
+        if self._backend == "tvm":
+            return self._mod.get_num_outputs()
+        return len(self._outputs) if self._outputs is not None else 0
+
+    def get_output(self, index):
+        # Flat buffer, like the C++ GetOutput; the engine reshapes it.
+        if self._backend == "tvm":
+            return self._mod.get_output(index).numpy().reshape(-1).astype(np.float32)
+        return np.asarray(self._outputs[index], dtype=np.float32).reshape(-1)
diff --git a/rzv2h/emulation/run_emulated.sh b/rzv2h/emulation/run_emulated.sh
new file mode 100755
index 0000000..527acf8
--- /dev/null
+++ b/rzv2h/emulation/run_emulated.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Run the DRP-AI object-detection pipeline on the DEV BOX using the emulated
+# drpai_runtime (CPU/ONNX Runtime stand-in) — same engine code as the board,
+# but no NPU. For validating the integration before deploying to RZ/V2H.
+#
+# Usage: ./run_emulated.sh [INPUT.mp4] [OUTPUT.mp4]
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"
+REPO="$(cd "$HERE/../.." && pwd)"
+cd "$REPO"
+
+source .venv/bin/activate
+export GST_PLUGIN_PATH="$REPO/plugins:${GST_PLUGIN_PATH:-}"
+export PYTHONPATH="$HERE:${PYTHONPATH:-}"          # resolves `import drpai_runtime` to the fake
+
+IN="${1:-08fd33_4.mp4}"
+OUT="${2:-${IN%.*}_drpai_emu.mp4}"
+DEPLOY="$HERE/yolo11m_drpai_v2h_emu"               # dir containing yolo11m.onnx
+
+if [[ ! -f "$DEPLOY/yolo11m.onnx" ]]; then
+  echo "Missing $DEPLOY/yolo11m.onnx — export it first:" >&2
+  echo "  yolo export model=yolo11m.pt format=onnx imgsz=640 opset=12 simplify=True" >&2
+  echo "  mkdir -p $DEPLOY && cp yolo11m.onnx $DEPLOY/" >&2
+  exit 1
+fi
+
+echo "EMULATED DRP-AI run: '$IN' -> '$OUT' (CPU/ONNX, not the NPU)"
+gst-launch-1.0 -e \
+  filesrc location="$IN" ! decodebin ! videoconvert ! videoscale \
+  ! "video/x-raw,format=RGB,width=640,height=640" \
+  ! pyml_objectdetector engine-name=drpai model-name="$DEPLOY" device=drpai \
+        input-format=nchw post-process=anchor_free \
+  ! pyml_tracker tracker-type=bytetrack \
+  ! videoconvert ! "video/x-raw,format=RGBA" \
+  ! pyml_football_overlay show-ids=false show-labels=false \
+  ! videoconvert ! openh264enc ! h264parse ! mp4mux ! filesink location="$OUT"
+echo "Done: $OUT"
diff --git a/rzv2h/sdk_eval/README.md b/rzv2h/sdk_eval/README.md
new file mode 100644
index 0000000..13fa30f
--- /dev/null
+++ b/rzv2h/sdk_eval/README.md
@@ -0,0 +1,113 @@
+# Faithful DRP-AI TVM eval (real mera2 / MERA runtime)
+
+This is the most faithful test short of running on hardware: the **real**
+`mera2` compile and the **real** MERA/TVM runtime, instead of the ONNX-RT
+look-alike in [../emulation](../emulation). It composes with the same
+`engine-name=drpai` + `drpai_runtime` shim we use everywhere else.
+
+## Read this first — what's gated, and the aarch64 catch
+
+Two things make this unable to run on a plain x86 box out of the box:
+
+1. **License-gated downloads (Renesas account required).** The stack build needs
+   the **DRP-AI Translator i8** and the **RZ/V2H AI SDK** (`RTK0EF0180F06000SJ.zip`).
+   There is **no public prebuilt image**; you download these and build Renesas'
+   `Dockerfile`. I cannot fetch them for you.
+2. **The compile targets aarch64, not x86.** Even `compile_cpu_only_onnx_model.py`
+   uses `target = "llvm ... -mtriple=aarch64-linux-gnu"` and the SDK's aarch64
+   cross-g++. So `deploy.so` runs on the board's Arm CPU / NPU — to execute it
+   off-board you either run on the **board**, under **QEMU-aarch64**, or compile
+   with an **x86 `llvm` target** for a pure functional check (see below).
+
+If you don't have the downloads, the ONNX-RT emulation in `../emulation`
+already validates all of *our* code (engine preprocess/reshape/decode +
+pipeline). What's left to validate here is mera2-compile success and runtime
+numerics — both inherently need Renesas assets or hardware.
+
+## Steps
+
+### 1. Build the SDK image (host, needs the two downloads)
+
+```bash
+mkdir -p rzv2h/sdk_eval/assets
+# put both Renesas downloads in rzv2h/sdk_eval/assets/ :
+#   DRP-AI_Translator_i8-*-Linux-x86_64-Install   and   RTK0EF0180F06000SJ.zip
+cd rzv2h/sdk_eval && ./build_image.sh
+```
+
+`build_image.sh` fetches the repo `Dockerfile`, assembles a clean build context
+(Dockerfile + the toolchain `.sh` it unzips from the AI SDK zip + the Translator
+installer), and runs `docker build --build-arg PRODUCT=V2H -t drpai-tvm-v2h`.
+The Dockerfile (`FROM ubuntu:22.04`) defaults `PRODUCT=V2H` and builds the TVM
+fork itself, so the build takes a while.
+
+To fetch just the Dockerfile by hand:
+`wget https://raw.githubusercontent.com/renesas-rz/rzv_drp-ai_tvm/main/Dockerfile`
+
+### 2. Compile YOLO11 with the real mera2 (inside the container)
+
+```bash
+docker run -it --rm -v "$PWD":/workspace/gst-python-ml drpai-tvm-v2h bash
+# inside:
+cd /workspace/gst-python-ml
+./rzv2h/convert_yolo11_v2h.sh yolo11m 640      # real mera2.from_onnx + mera2.drp.build
+# -> yolo11m_drpai_v2h/{deploy.so,deploy.json,deploy.params}   (aarch64)
+```
+
+For a **host x86 functional check** instead of the board artifact, compile with a
+native target (edit a copy of `tutorials/compile_onnx_model.py` to
+`target = "llvm"` and drop the aarch64 cross-compiler), producing an x86
+`deploy.so` the MERA/TVM `graph_executor` can run natively.
+
+### 3. Run through the real MERA/TVM runtime
+
+The [../emulation/drpai_runtime.py](../emulation/drpai_runtime.py) shim
+auto-selects the **MERA/TVM `graph_executor`** backend as soon as the model dir
+has `deploy.so/json/params` and `tvm` is importable (true inside this
+container). The engine code is unchanged.
+
+```bash
+export GST_PLUGIN_PATH=/workspace/gst-python-ml/plugins:$GST_PLUGIN_PATH
+export PYTHONPATH=/workspace/gst-python-ml/rzv2h/emulation:$PYTHONPATH
+# (x86 deploy.so) run natively; (aarch64 deploy.so) run under qemu-aarch64
+gst-launch-1.0 filesrc location=08fd33_4.mp4 ! decodebin ! videoconvert ! videoscale \
+  ! "video/x-raw,format=RGB,width=640,height=640" \
+  ! pyml_objectdetector engine-name=drpai model-name=yolo11m_drpai_v2h device=drpai \
+        input-format=nchw post-process=anchor_free \
+  ! pyml_tracker ! videoconvert ! "video/x-raw,format=RGBA" \
+  ! pyml_football_overlay ! videoconvert ! autovideosink
+```
+
+The shim prints which backend it picked:
+`[drpai_runtime] MERA/TVM graph_executor backend ... — real runtime`.
+
+## On the actual board
+
+Two ways to run the same pipeline on the RZ/V2H:
+
+- **Python graph_executor** — copy the `deploy.so/json/params` + the emulation
+  shim; if the board image has the MERA/TVM python runtime, it Just Works (the
+  shim's TVM backend), NPU included.
+- **C++ pybind binding** — build [../drpai_runtime_pybind.cpp](../drpai_runtime_pybind.cpp)
+  per [../README.md](../README.md); the native `drpai_runtime.so` takes
+  precedence over this shim on `PYTHONPATH`.
+
+## Verified results (RZ/V2H AI SDK v6.00 + DRP-AI Translator i8 v1.11)
+
+Both paths were run end-to-end driving the `drpai-tvm-v2h` image on an x86 host:
+
+- **x86 MERA/TVM runtime test** — `compile_x86_cpu.py` compiled YOLO11 via the
+  MERA-fork TVM (native `llvm`), and `x86_runtime_check.py` ran it through the
+  real `graph_executor`: output matched ONNX to **max|Δ| = 6.2e-3**, **22 = 22
+  detections** (label `person`). Confirms compile + MERA/TVM runtime + the
+  `drpai_runtime` shim + our decoder, no NPU needed.
+- **Real INT8 NPU compile** — `../convert_yolo11_v2h.sh` (quantized flow)
+  produced the RZ/V2H deploy dir: `[Finish DRP-AI Translator for V2H]`,
+  `sub_0000__CPU_DRP_TVM/{deploy.so (65 MB),deploy.json,deploy.params}` +
+  `preprocess/` (DRP-AI pre-processing objects). aarch64 — runs on the board.
+
+SDK gotchas the scripts now handle automatically: `run_drp_compiler.sh` ships
+non-executable and off-PATH (`chmod +x` + add tutorials to PATH); the quant
+script preprocesses calibration as ImageNet-224 instead of 640 (patched). And
+V2H **requires** the INT8 quantized flow — the plain FP `compile_onnx_model.py`
+drives a legacy translator path the i8 v1.11 layout lacks.
diff --git a/rzv2h/sdk_eval/_probe_sysroot.sh b/rzv2h/sdk_eval/_probe_sysroot.sh
new file mode 100644
index 0000000..0344994
--- /dev/null
+++ b/rzv2h/sdk_eval/_probe_sysroot.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Probe the RZ/V2H board rootfs (via the cross-SDK aarch64 sysroot) for the
+# GStreamer + Python stack our pipeline needs. Run inside drpai-tvm-v2h.
+# Target rootfs sysroot (NOT the x86_64-pokysdk-linux cross-compiler dir).
+SR=$(ls -d /opt/*/*/sysroots/*-poky-linux 2>/dev/null | grep -v pokysdk | head -1)
+[ -d "$SR" ] || SR=$(ls -d /opt/*/sysroots/*-poky-linux 2>/dev/null | grep -v pokysdk | head -1)
+echo "sysroot = $SR"
+echo "--- python3 ---"; ls -d "$SR"/usr/lib/python3* 2>/dev/null | head -1
+echo "--- gstreamer core ---"; ls "$SR"/usr/lib/libgstreamer-1.0.so.* 2>/dev/null
+grep -h "Version" "$SR"/usr/lib/pkgconfig/gstreamer-1.0.pc 2>/dev/null
+echo "--- gst-python loader (libgstpython) ---"; find "$SR" -name 'libgstpython*' 2>/dev/null | head
+echo "--- GstAnalytics (lib + typelib) ---"
+find "$SR" -iname '*gstanalytics*' 2>/dev/null | head
+ls "$SR"/usr/lib/girepository-1.0/ 2>/dev/null | grep -iE 'Analytics|GstApp|GstBase|^Gst-' | head
+echo "--- python modules on target: gi / numpy / cairo / cv2 ---"
+for m in gi numpy cairo cv2; do
+  hit=$(find "$SR" -maxdepth 7 -path '*python3*' -iname "${m}" 2>/dev/null | head -1)
+  echo "$m: ${hit:-MISSING}"
+done
+echo "--- tvm / mera python runtime on target? ---"
+find "$SR" -iname '*tvm*' -o -iname '*mera*' 2>/dev/null | grep -i python | head
+echo "--- gstreamer plugins present (count) ---"
+ls "$SR"/usr/lib/gstreamer-1.0/*.so 2>/dev/null | wc -l
diff --git a/rzv2h/sdk_eval/build_image.sh b/rzv2h/sdk_eval/build_image.sh
new file mode 100755
index 0000000..7519a5d
--- /dev/null
+++ b/rzv2h/sdk_eval/build_image.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Build the Renesas DRP-AI TVM (Mera2) Docker image for RZ/V2H.
+#
+# This is the *faithful* compile/runtime stack (real mera2 + MERA runtime).
+# It needs two downloads that require a Renesas account login — put them in
+# ./assets first (this script cannot download them for you). Both arrive as
+# ZIPs and can be dropped in as-is:
+#
+#   DRP-AI Translator i8  (ZIP, contains DRP-AI_Translator_i8-*-Linux-x86_64-Install)
+#     https://www.renesas.com/software-tool/drp-ai-translator-i8   (Downloads tab)
+#   RZ/V2H AI SDK         (RTK0EF0180F*SJ.zip)
+#     https://www.renesas.com/us/en/software-tool/rzv2h-ai-software-development-kit
+#
+# The repo Dockerfile COPYs every ./*.sh in the context and runs it, plus
+# ./DRP-AI_Translator*-Install. So we assemble a CLEAN context holding only:
+# Dockerfile + the SDK toolchain installer (.sh, from the AI SDK zip) + the
+# Translator installer (from the Translator zip).
+set -euo pipefail
+cd "$(dirname "$0")"
+ASSETS="${ASSETS:-./assets}"
+CTX="${CTX:-./context}"
+PRODUCT="${PRODUCT:-V2H}"
+TAG="${TAG:-drpai-tvm-v2h}"
+
+mkdir -p "$ASSETS"
+TMPS=()
+cleanup() { for d in "${TMPS[@]:-}"; do [[ -n "$d" ]] && rm -rf "$d"; done; }
+trap cleanup EXIT
+
+# --- DRP-AI Translator i8: accept an extracted *-Install or the downloaded zip ---
+TR=$(ls "$ASSETS"/DRP-AI_Translator*-Linux*-x86_64-Install 2>/dev/null | head -n1 || true)
+if [[ -z "$TR" ]]; then
+  TRZIP=$(ls "$ASSETS"/*[Tt]ranslator*i8*.zip "$ASSETS"/*DRP-AI_Translator*.zip 2>/dev/null | head -n1 || true)
+  if [[ -n "$TRZIP" ]]; then
+    t=$(mktemp -d); TMPS+=("$t")
+    unzip -o -q "$TRZIP" -d "$t"
+    TR=$(find "$t" -iname "DRP-AI_Translator*-Linux*-x86_64-Install" | head -n1 || true)
+  fi
+fi
+
+# --- RZ/V2H AI SDK zip (any v6.x build number) ---
+ZIP=$(ls "$ASSETS"/RTK0EF0180F*SJ.zip 2>/dev/null | head -n1 || true)
+
+if [[ -z "$TR" || -z "$ZIP" ]]; then
+  echo "Missing gated downloads in $ASSETS (Renesas login required):" >&2
+  [[ -z "$TR"  ]] && echo "  - DRP-AI Translator i8 (zip or extracted *-Install)" >&2
+  [[ -z "$ZIP" ]] && echo "  - RZ/V2H AI SDK  (RTK0EF0180F*SJ.zip)" >&2
+  exit 1
+fi
+
+# Clean build context.
+rm -rf "$CTX" && mkdir -p "$CTX"
+wget -nc https://raw.githubusercontent.com/renesas-rz/rzv_drp-ai_tvm/main/Dockerfile \
+  -O "$CTX/Dockerfile"
+cp "$TR" "$CTX/"
+
+# Unzip the AI SDK and extract its Yocto toolchain installer (.sh) into context.
+s=$(mktemp -d); TMPS+=("$s")
+unzip -o -q "$ZIP" -d "$s"
+# The Yocto toolchain installer is the big *toolchain*.sh (e.g.
+# ai_sdk_setup/rz-vlp-...-rzv2h-evk-toolchain-5.0.11.sh). Pick the largest
+# match so we don't grab a small board/flash helper script by mistake.
+SDK_SH=$(find "$s" -iname "*toolchain*.sh" -printf '%s\t%p\n' | sort -rn | head -n1 | cut -f2-)
+[[ -n "$SDK_SH" ]] || { echo "No toolchain .sh found inside $ZIP" >&2; exit 1; }
+cp "$SDK_SH" "$CTX/"
+
+echo "Build context ready in $CTX:"
+ls -1 "$CTX"
+echo
+echo "Building image '$TAG' (PRODUCT=$PRODUCT) — builds the TVM fork, takes a while..."
+docker build --build-arg PRODUCT="$PRODUCT" -t "$TAG" "$CTX"
+
+cat <<EOF
+
+Done. Start it with this repo mounted:
+  docker run -it --rm -v "\$PWD/../..":/workspace/gst-python-ml $TAG bash
+Inside, compile + run per this folder's README.md.
+EOF
diff --git a/rzv2h/sdk_eval/compile_x86_cpu.py b/rzv2h/sdk_eval/compile_x86_cpu.py
new file mode 100644
index 0000000..73229c3
--- /dev/null
+++ b/rzv2h/sdk_eval/compile_x86_cpu.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# compile_x86_cpu.py — functional x86 (host CPU) compile of an ONNX model using
+# the DRP-AI TVM (MERA fork) relay stack, for testing the MERA/TVM
+# graph_executor runtime WITHOUT a board/NPU/QEMU.
+#
+# It mirrors the TVM-backend half of the SDK's compile_cpu_only_onnx_model.py
+# but retargets to native "llvm" (host x86, default g++) and skips the DRP-AI
+# pre-processing runtime (we preprocess in Python in drpai_engine). Output:
+# <out>/deploy.{so,json,params} — loadable by tvm.contrib.graph_executor, i.e.
+# by the drpai_runtime shim's TVM backend.
+#
+# Run inside the drpai-tvm-v2h container:
+#   python3 compile_x86_cpu.py <model.onnx> <out_dir> [input_name] [C,H,W]
+import os
+import sys
+
+import onnx
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import build as _build, bind_params_by_name
+from tvm.relay.param_dict import save_param_dict
+from tvm.ir.transform import Sequential, PassContext
+
+model_file = sys.argv[1]
+out_dir = sys.argv[2]
+input_name = sys.argv[3] if len(sys.argv) > 3 else "images"
+chw = [int(x) for x in (sys.argv[4].split(",") if len(sys.argv) > 4 else [3, 640, 640])]
+input_shape = [1] + chw
+
+os.makedirs(out_dir, exist_ok=True)
+print(f"[x86 compile] {model_file} input {input_name}={input_shape} -> {out_dir}")
+
+onnx_model = onnx.load_model(model_file)
+mod, params = relay.frontend.from_onnx(onnx_model, {input_name: input_shape})
+if params:
+    mod["main"] = bind_params_by_name(mod["main"], params)
+
+with PassContext(opt_level=3):
+    mod = Sequential([
+        transform.SimplifyInference(),
+        transform.FoldConstant(),
+        transform.FoldExplicitPadding(),
+        transform.BackwardFoldScaleAxis(),
+        transform.ForwardFoldScaleAxis(),
+        transform.FoldConstant(),
+        transform.DynamicToStatic(),
+        transform.RemoveUnusedFunctions(),
+    ])(mod)
+
+target = "llvm"  # native host (x86), no aarch64 cross target
+with PassContext(opt_level=3):
+    graph, lib, all_params = _build(mod, target=target, target_host=target, params=params)
+
+lib.export_library(os.path.join(out_dir, "deploy.so"))  # default host compiler -> x86 .so
+with open(os.path.join(out_dir, "deploy.json"), "w") as f:
+    f.write(graph)
+with open(os.path.join(out_dir, "deploy.params"), "wb") as f:
+    f.write(save_param_dict(all_params))
+print(f"[x86 compile finished] -> {out_dir}/deploy.so,deploy.json,deploy.params")
diff --git a/rzv2h/sdk_eval/x86_runtime_check.py b/rzv2h/sdk_eval/x86_runtime_check.py
new file mode 100644
index 0000000..d1ee523
--- /dev/null
+++ b/rzv2h/sdk_eval/x86_runtime_check.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# x86_runtime_check.py — run a real input through the MERA/TVM graph_executor
+# (via the drpai_runtime shim's TVM backend) and check parity against the
+# known-good ONNX output. Run INSIDE the drpai-tvm-v2h container; needs the
+# x86 deploy dir + the pre-saved input/onnx-reference .npy files.
+import sys
+import numpy as np
+
+sys.path.insert(0, "/work/rzv2h/emulation")   # drpai_runtime shim (TVM backend)
+sys.path.insert(0, "/work/plugins/python")     # utils.detection_decoder (pure numpy)
+
+import drpai_runtime
+from utils.detection_decoder import decode
+
+DEPLOY = "/work/rzv2h/yolo11m_x86_cpu"
+x = np.load("/work/rzv2h/_x86test_input.npy").astype(np.float32)
+ref = np.load("/work/rzv2h/_x86test_onnxout.npy").astype(np.float32).reshape(-1)
+
+rt = drpai_runtime.Runtime()
+assert rt.load(DEPLOY), "drpai_runtime.load failed"
+rt.set_input(0, x)
+rt.run()
+out = np.asarray(rt.get_output(0), dtype=np.float32).reshape(-1)
+
+n = min(out.size, ref.size)
+maxdiff = float(np.max(np.abs(out[:n] - ref[:n]))) if n else float("nan")
+print(f"TVM out size={out.size} ref size={ref.size} max|TVM-ONNX|={maxdiff:.3e}")
+
+tvm_det = decode(out.reshape(1, 84, 8400), "anchor_free")[0]
+onnx_det = decode(ref.reshape(1, 84, 8400), "anchor_free")[0]
+print(f"detections  TVM={len(tvm_det['boxes'])}  ONNX={len(onnx_det['boxes'])}")
+if len(tvm_det["boxes"]):
+    print("TVM labels:", sorted(set(int(c) for c in tvm_det["labels"])))
+print("PASS" if maxdiff < 1e-2 and len(tvm_det["boxes"]) == len(onnx_det["boxes"]) else "CHECK")
diff --git a/rzv2h/yocto/README.md b/rzv2h/yocto/README.md
new file mode 100644
index 0000000..ccfbdb4
--- /dev/null
+++ b/rzv2h/yocto/README.md
@@ -0,0 +1 @@
+# Custom RZ/V2H image for the gst-python-ml pipeline
diff --git a/rzv2h/yocto/meta-gst-python-ml/conf/include/gstreamer-1.24.inc b/rzv2h/yocto/meta-gst-python-ml/conf/include/gstreamer-1.24.inc
new file mode 100644
index 0000000..cfa0a3d
--- /dev/null
+++ b/rzv2h/yocto/meta-gst-python-ml/conf/include/gstreamer-1.24.inc
@@ -0,0 +1,25 @@
+# Pin GStreamer to 1.24 across the stack so GstAnalytics is available.
+#
+# scarthgap's oe-core ships GStreamer 1.22.x. The 1.24 recipes must be present
+# in the build (see README: copy the gstreamer1.0* recipes from oe-core
+# styhead/master into recipes-multimedia/gstreamer/ of this layer, or layer in
+# a newer meta-oe). These PREFERRED_VERSION lines then select them.
+#
+# require this from local.conf:
+#   require ${TOPDIR}/../layers/meta-gst-python-ml/conf/include/gstreamer-1.24.inc
+
+GST_124 ?= "1.24.%"
+
+PREFERRED_VERSION_gstreamer1.0 = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-plugins-base = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-plugins-good = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-plugins-bad = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-plugins-ugly = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-libav = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-python = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-rtsp-server = "${GST_124}"
+PREFERRED_VERSION_gstreamer1.0-vaapi = "${GST_124}"
+
+# GstAnalytics + the object-detection / tracking metas live in -plugins-bad.
+# Make sure analytics isn't disabled by a PACKAGECONFIG override.
+PACKAGECONFIG:append:pn-gstreamer1.0-plugins-bad = " analytics"
diff --git a/rzv2h/yocto/meta-gst-python-ml/conf/layer.conf b/rzv2h/yocto/meta-gst-python-ml/conf/layer.conf
new file mode 100644
index 0000000..262f68a
--- /dev/null
+++ b/rzv2h/yocto/meta-gst-python-ml/conf/layer.conf
@@ -0,0 +1,15 @@
+# meta-gst-python-ml — adds the runtime stack gst-python-ml needs on RZ/V2H.
+#
+# The RZ/V2H AI SDK v6.00 image is Yocto scarthgap (5.0.11) with GStreamer
+# 1.22.x. gst-python-ml requires GStreamer >= 1.24 (for GstAnalytics, the
+# metadata type every pyml_* element uses), the gst-python plugin loader, and
+# numpy/pycairo/pygobject/opencv. This layer carries those additions.
+BBPATH .= ":${LAYERDIR}"
+BBFILES += "${LAYERDIR}/recipes-*/*/*.bb ${LAYERDIR}/recipes-*/*/*.bbappend"
+
+BBFILE_COLLECTIONS += "gst-python-ml"
+BBFILE_PATTERN_gst-python-ml = "^${LAYERDIR}/"
+BBFILE_PRIORITY_gst-python-ml = "20"
+
+LAYERDEPENDS_gst-python-ml = "core openembedded-layer"
+LAYERSERIES_COMPAT_gst-python-ml = "scarthgap styhead"
diff --git a/rzv2h/yocto/meta-gst-python-ml/recipes-core/packagegroups/packagegroup-gst-python-ml.bb b/rzv2h/yocto/meta-gst-python-ml/recipes-core/packagegroups/packagegroup-gst-python-ml.bb
new file mode 100644
index 0000000..4a9cef9
--- /dev/null
+++ b/rzv2h/yocto/meta-gst-python-ml/recipes-core/packagegroups/packagegroup-gst-python-ml.bb
@@ -0,0 +1,26 @@
+SUMMARY = "Runtime stack for gst-python-ml on RZ/V2H (GStreamer 1.24 + Python)"
+LICENSE = "MIT"
+
+inherit packagegroup
+
+RDEPENDS:${PN} = " \
+    gstreamer1.0 \
+    gstreamer1.0-plugins-base \
+    gstreamer1.0-plugins-good \
+    gstreamer1.0-plugins-bad \
+    gstreamer1.0-libav \
+    gstreamer1.0-python \
+    python3-core \
+    python3-pygobject \
+    python3-numpy \
+    python3-pycairo \
+    python3-opencv \
+"
+# Notes:
+# - gstreamer1.0-python provides the libgstpython.so plugin loader that runs
+#   the pyml_* .py elements. It is NOT in the stock AI SDK image.
+# - GstAnalytics (used by base_objectdetector / tracker / overlay) ships in
+#   gstreamer1.0-plugins-bad once GStreamer is >= 1.24 with the analytics
+#   PACKAGECONFIG enabled (see conf/include/gstreamer-1.24.inc).
+# - The DRP-AI MERA/TVM *Python* runtime is not a stock Yocto package; install
+#   it onto the image separately (see ../README.md "DRP-AI runtime on board").
diff --git a/rzv2h/yocto/meta-gst-python-ml/recipes-multimedia/gst-python-ml/gst-python-ml_git.bb b/rzv2h/yocto/meta-gst-python-ml/recipes-multimedia/gst-python-ml/gst-python-ml_git.bb
new file mode 100644
index 0000000..3b3245b
--- /dev/null
+++ b/rzv2h/yocto/meta-gst-python-ml/recipes-multimedia/gst-python-ml/gst-python-ml_git.bb
@@ -0,0 +1,35 @@
+SUMMARY = "gst-python-ml elements (pyml_*) + DRP-AI engine for RZ/V2H"
+DESCRIPTION = "Installs the pure-Python GStreamer elements and sets GST_PLUGIN_PATH/PYTHONPATH."
+LICENSE = "LGPL-2.1-or-later"
+LIC_FILES_CHKSUM = "file://COPYING;md5=<FILL_IN>"
+
+# Point this at your gst-python-ml source. Examples:
+#   SRC_URI = "git://github.com/collabora/gst-python-ml.git;branch=main;protocol=https"
+#   SRCREV  = "<commit>"
+# or a local checkout via:  SRC_URI = "file:///path/to/gst-python-ml"
+SRC_URI = "git://github.com/collabora/gst-python-ml.git;branch=master;protocol=https"
+SRCREV = "${AUTOREV}"
+S = "${WORKDIR}/git"
+
+# Pure-Python elements: nothing to compile.
+do_compile[noexec] = "1"
+
+PYML_DIR = "${datadir}/gst-python-ml"
+
+do_install() {
+    install -d ${D}${PYML_DIR}
+    cp -r ${S}/plugins ${D}${PYML_DIR}/plugins
+
+    # Environment so GStreamer finds the .py elements and Python finds the pkg.
+    install -d ${D}${sysconfdir}/profile.d
+    cat > ${D}${sysconfdir}/profile.d/gst-python-ml.sh <<EOF
+export GST_PLUGIN_PATH="${PYML_DIR}/plugins:\$GST_PLUGIN_PATH"
+export PYTHONPATH="${PYML_DIR}/plugins/python:\$PYTHONPATH"
+EOF
+}
+
+FILES:${PN} = "${PYML_DIR} ${sysconfdir}/profile.d/gst-python-ml.sh"
+
+# Runtime deps: the GStreamer/Python stack (see packagegroup) + the DRP-AI
+# Python runtime (provided out-of-band — see ../README.md).
+RDEPENDS:${PN} = "packagegroup-gst-python-ml"