From 5d2bbb214c08998d9a402e0074cf6f90266cb8d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20F=C4=85ferek?= <michal.faferek@42dot.ai>
Date: Thu, 19 Feb 2026 21:42:52 +0100
Subject: [PATCH 1/3] feat: add fault storm script and debounce demo config

- fault_storm.py: injects 9 noise faults + 1 real (MOTOR_OVERHEAT)
- medkit_params_debounce.yaml: confirmation_threshold=-3, healing enabled
- docker-compose.article3.yml: compose override for debounce config
- run-demo-debounce.sh: convenience script for debounce mode

Closes #34
---
 .../config/medkit_params_debounce.yaml        |  93 ++++++++++
 .../docker-compose.article3.yml               |  21 +++
 .../run-demo-debounce.sh                      | 105 +++++++++++
 .../scripts/fault_storm.py                    | 167 ++++++++++++++++++
 4 files changed, 386 insertions(+)
 create mode 100644 demos/turtlebot3_integration/config/medkit_params_debounce.yaml
 create mode 100644 demos/turtlebot3_integration/docker-compose.article3.yml
 create mode 100755 demos/turtlebot3_integration/run-demo-debounce.sh
 create mode 100644 demos/turtlebot3_integration/scripts/fault_storm.py

diff --git a/demos/turtlebot3_integration/config/medkit_params_debounce.yaml b/demos/turtlebot3_integration/config/medkit_params_debounce.yaml
new file mode 100644
index 0000000..d931df3
--- /dev/null
+++ b/demos/turtlebot3_integration/config/medkit_params_debounce.yaml
@@ -0,0 +1,93 @@
+# ros2_medkit gateway configuration for TurtleBot3 demo
+# ARTICLE 3 - DEBOUNCE VERSION
+# Differences from default:
+#   confirmation_threshold: -3 (was 0) — requires 3 sustained FAILED events
+#   healing_enabled: true (was false) — auto-heal after PASSED events
+#
+# Node runs under /diagnostics namespace, so we need to match that here
+diagnostics:
+  ros2_medkit_gateway:
+    ros__parameters:
+      server:
+        # Bind to all interfaces for Docker networking
+        host: "0.0.0.0"
+        port: 8080
+
+      refresh_interval_ms: 10000  # 10 seconds (default), reduces log spam
+
+      cors:
+        allowed_origins: ["*"]
+        allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"]
+        allowed_headers: ["Content-Type", "Accept"]
+        allow_credentials: false
+        max_age_seconds: 86400
+
+      max_parallel_topic_samples: 10
+
+      # Discovery configuration
+      discovery_mode: "hybrid"  # runtime_only, manifest_only, or hybrid
+      manifest_path: ""  # Will be set via launch argument
+      manifest_strict_validation: true
+
+      discovery:
+        runtime:
+          create_synthetic_components: false  # Manifest defines components
+
+# Fault Manager configuration (runs in root namespace)
+fault_manager:
+  ros__parameters:
+    # Storage configuration
+    storage_type: "sqlite"
+    database_path: "/var/lib/ros2_medkit/faults.db"
+
+    # === DEBOUNCE CONFIGURATION (Article 3) ===
+    confirmation_threshold: -3   # Need 3 sustained FAILED events to confirm
+    healing_enabled: true        # Auto-heal when problem resolves
+    healing_threshold: 3         # Need 3 PASSED events to heal
+    auto_confirm_after_sec: 0.0  # Disabled
+
+    # Snapshot configuration (freeze frames)
+    snapshots:
+      enabled: true
+      background_capture: true  # Non-blocking capture
+      timeout_sec: 2.0
+      max_message_size: 131072  # 128KB max per message
+
+      # Topics to capture for all faults
+      default_topics:
+        - /odom
+        - /amcl_pose
+        - /scan
+        - /tf
+        - /navigate_to_pose/_action/status
+
+      # Rosbag recording configuration
+      rosbag:
+        enabled: true
+        duration_sec: 10.0  # Record 10 seconds before fault confirmation
+        duration_after_sec: 2.0  # Record 2 seconds after confirmation
+        lazy_start: false  # Always recording (ring buffer)
+        format: "mcap"  # MCAP format (recommended for cross-platform)
+        storage_path: "/var/lib/ros2_medkit/rosbags"
+        max_bag_size_mb: 100  # Max size per rosbag file
+        max_total_storage_mb: 1000  # 1GB total storage limit
+        auto_cleanup: true  # Cleanup rosbags on fault clear
+
+        # Topics to record (use 'config' or 'all')
+        topics: "config"  # Use include/exclude lists below
+        include_topics:
+          - /odom
+          - /amcl_pose
+          - /scan
+          - /cmd_vel
+          - /tf
+          - /tf_static
+          - /navigate_to_pose/_action/status
+          - /navigate_to_pose/_action/feedback
+          - /local_costmap/costmap
+          - /global_costmap/costmap
+          - /plan
+          - /diagnostics
+        exclude_topics:
+          - /rosout
+          - /parameter_events
diff --git a/demos/turtlebot3_integration/docker-compose.article3.yml b/demos/turtlebot3_integration/docker-compose.article3.yml
new file mode 100644
index 0000000..5ba2f56
--- /dev/null
+++ b/demos/turtlebot3_integration/docker-compose.article3.yml
@@ -0,0 +1,21 @@
+# Docker Compose override for Article 3 (Debounce Demo)
+#
+# Usage:
+#   STORM (no debounce, default config):
+#     docker compose --profile cpu up -d
+#
+#   DEBOUNCE (with filtering):
+#     docker compose --profile cpu -f docker-compose.yml -f docker-compose.article3.yml up -d
+#
+# The override mounts the debounce config over the default one inside the container.
+# colcon build --symlink-install means the installed config points to the source,
+# so mounting over the source path works.
+
+services:
+  turtlebot3-demo:
+    volumes:
+      - ./config/medkit_params_debounce.yaml:/root/demo_ws/src/turtlebot3_medkit_demo/config/medkit_params.yaml:ro
+
+  turtlebot3-demo-nvidia:
+    volumes:
+      - ./config/medkit_params_debounce.yaml:/root/demo_ws/src/turtlebot3_medkit_demo/config/medkit_params.yaml:ro
diff --git a/demos/turtlebot3_integration/run-demo-debounce.sh b/demos/turtlebot3_integration/run-demo-debounce.sh
new file mode 100755
index 0000000..3499d46
--- /dev/null
+++ b/demos/turtlebot3_integration/run-demo-debounce.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# TurtleBot3 + ros2_medkit Demo Runner — DEBOUNCE MODE (Article 3)
+#
+# Same as run-demo.sh but uses debounce config:
+#   confirmation_threshold: -3 (requires sustained failure)
+#   healing_enabled: true (auto-heal on recovery)
+#
+# Compare with:
+#   ./run-demo.sh              → STORM (no debounce, threshold 0)
+#   ./run-demo-debounce.sh     → CALM  (debounce, threshold -3)
+
+set -eu
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+echo "🛡️  TurtleBot3 + ros2_medkit — DEBOUNCE MODE"
+echo "=============================================="
+echo "  confirmation_threshold: -3 (need 3 sustained FAILED events)"
+echo "  healing_enabled: true (auto-heal after 3 PASSED events)"
+echo ""
+
+# Set TurtleBot3 environment variables
+export TURTLEBOT3_MODEL=${TURTLEBOT3_MODEL:-burger}
+export GAZEBO_MODEL_PATH=${GAZEBO_MODEL_PATH:-}:/opt/ros/jazzy/share/turtlebot3_gazebo/models
+
+# Check for Docker
+if ! command -v docker &> /dev/null; then
+    echo "Error: Docker is not installed"
+    exit 1
+fi
+
+# Setup X11 forwarding for GUI (Gazebo)
+echo "Setting up X11 forwarding..."
+xhost +local:docker 2>/dev/null || {
+    echo "   Warning: xhost failed. GUI may not work."
+}
+
+# Cleanup function
+cleanup() {
+    echo ""
+    echo "Cleaning up..."
+    xhost -local:docker 2>/dev/null || true
+    echo "Done!"
+}
+trap cleanup EXIT
+
+# Parse arguments
+HEADLESS_MODE="false"
+DETACH_MODE="true"
+PROFILE="cpu"
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --nvidia)   PROFILE="nvidia" ;;
+        --headless) HEADLESS_MODE="true" ;;
+        --attached) DETACH_MODE="false" ;;
+        *)          echo "Unknown option: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+export HEADLESS=$HEADLESS_MODE
+
+DETACH_FLAG=""
+if [[ "$DETACH_MODE" == "true" ]]; then
+    DETACH_FLAG="-d"
+fi
+
+echo "Building and starting demo (debounce mode)..."
+echo ""
+echo "🌐 REST API: http://localhost:8080/api/v1/"
+echo "🌐 Web UI:   http://localhost:3000/"
+echo ""
+
+# Use docker-compose override to mount debounce config
+if docker compose version &> /dev/null; then
+    docker compose --profile "$PROFILE" \
+        -f docker-compose.yml \
+        -f docker-compose.article3.yml \
+        build && \
+    docker compose --profile "$PROFILE" \
+        -f docker-compose.yml \
+        -f docker-compose.article3.yml \
+        up ${DETACH_FLAG}
+else
+    docker-compose --profile "$PROFILE" \
+        -f docker-compose.yml \
+        -f docker-compose.article3.yml \
+        build && \
+    docker-compose --profile "$PROFILE" \
+        -f docker-compose.yml \
+        -f docker-compose.article3.yml \
+        up ${DETACH_FLAG}
+fi
+
+if [[ "$DETACH_MODE" == "true" ]]; then
+    echo ""
+    echo "✅ Demo started in DEBOUNCE mode!"
+    echo ""
+    echo "Run the article 3 demo sequence:"
+    echo "   ./article3-demo.sh"
+    echo ""
+    echo "🛑 To stop: ./stop-demo.sh"
+fi
diff --git a/demos/turtlebot3_integration/scripts/fault_storm.py b/demos/turtlebot3_integration/scripts/fault_storm.py
new file mode 100644
index 0000000..2974c48
--- /dev/null
+++ b/demos/turtlebot3_integration/scripts/fault_storm.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""Fault storm injector for ros2_medkit debounce demo.
+
+Fires 9 noise faults (1 report each) + 1 real problem (5 sustained reports),
+interleaved so the real fault is buried in the noise.
+
+  Storm (threshold=0):   all 10 CONFIRMED - wall of warnings, real issue invisible
+  Debounce (threshold=-3): 9 PREFAILED + 1 CONFIRMED - real issue pops out
+
+Requires ros2_medkit built in your workspace.
+
+Usage (standalone):
+    python3 fault_storm.py
+
+Usage (Docker demo):
+    docker exec turtlebot3_medkit_demo bash -c \
+        "source /opt/ros/jazzy/setup.bash && \
+         source /root/demo_ws/install/setup.bash && \
+         python3 /root/demo_ws/src/turtlebot3_medkit_demo/scripts/fault_storm.py"
+"""
+
+import rclpy
+from rclpy.node import Node
+from ros2_medkit_msgs.srv import ReportFault
+import time
+
+
+# === THE REAL PROBLEM (sustained, confirms even with debounce) ===
+REAL_FAULT = {
+    "fault_code": "MOTOR_OVERHEAT",
+    "severity": 1,  # WARN (same as noise — invisible in storm mode)
+    "description": "Drive motor temperature 92C (limit: 80C)",
+    "source_id": "/drive/thermal_monitor",
+    "burst": 5,
+}
+
+# === NOISE: 9 faults across 3 categories → wall of similar alerts ===
+NOISE_FAULTS = [
+    # Sensor noise cluster (4 faults)
+    {
+        "fault_code": "SENSOR_NOISE_LIDAR",
+        "severity": 1,
+        "description": "LiDAR std dev: 0.12m (limit: 0.03m)",
+        "source_id": "/sensors/lidar",
+    },
+    {
+        "fault_code": "SENSOR_NOISE_IMU",
+        "severity": 1,
+        "description": "IMU gyro drift: 1.8 deg/s (limit: 0.5 deg/s)",
+        "source_id": "/sensors/imu",
+    },
+    {
+        "fault_code": "SENSOR_NOISE_CAMERA",
+        "severity": 1,
+        "description": "Depth camera: 3 frames dropped in 1s",
+        "source_id": "/sensors/camera",
+    },
+    {
+        "fault_code": "SENSOR_NOISE_ODOM",
+        "severity": 1,
+        "description": "Wheel odometry jitter: 0.12 m/s (limit: 0.05 m/s)",
+        "source_id": "/sensors/odom",
+    },
+    # Nav timeout cluster (4 faults)
+    {
+        "fault_code": "NAV_TIMEOUT_CONTROLLER",
+        "severity": 1,
+        "description": "Controller loop: 62ms (limit: 50ms)",
+        "source_id": "/nav/controller",
+    },
+    {
+        "fault_code": "NAV_TIMEOUT_PLANNER",
+        "severity": 1,
+        "description": "Planner response: 320ms (limit: 200ms)",
+        "source_id": "/nav/planner",
+    },
+    {
+        "fault_code": "NAV_TIMEOUT_COSTMAP",
+        "severity": 1,
+        "description": "Costmap update: 250ms (limit: 100ms)",
+        "source_id": "/nav/costmap",
+    },
+    {
+        "fault_code": "NAV_TIMEOUT_TF",
+        "severity": 1,
+        "description": "TF lookup: 85ms (limit: 50ms)",
+        "source_id": "/nav/tf_monitor",
+    },
+    # Comms cluster (1 fault)
+    {
+        "fault_code": "COMM_LATENCY_WIFI",
+        "severity": 1,
+        "description": "WiFi round-trip: 230ms (limit: 100ms)",
+        "source_id": "/network/wifi_monitor",
+    },
+]
+
+
+class FaultStormNode(Node):
+    def __init__(self):
+        super().__init__("fault_storm_injector")
+        self.client = self.create_client(
+            ReportFault, "/fault_manager/report_fault"
+        )
+        self.get_logger().info("Waiting for fault_manager...")
+        self.client.wait_for_service(timeout_sec=5.0)
+        self.get_logger().info("Connected")
+
+    def fire(self, fault_code, severity, description, source_id):
+        req = ReportFault.Request()
+        req.fault_code = fault_code
+        req.event_type = ReportFault.Request.EVENT_FAILED
+        req.severity = severity
+        req.description = description
+        req.source_id = source_id
+        future = self.client.call_async(req)
+        rclpy.spin_until_future_complete(self, future, timeout_sec=2.0)
+        return future.result() and future.result().accepted
+
+    def run_storm(self):
+        self.get_logger().info("=== STORM ===")
+        n = 0
+
+        # Interleave: noise, noise, REAL, noise, noise, REAL, ...
+        # MOTOR_OVERHEAT buried among SENSOR_NOISE, NAV_TIMEOUT, and COMM faults
+        real_sent = 0
+        noise_idx = 0
+        sequence = [
+            "noise", "noise", "REAL",       # lidar, imu, MOTOR #1
+            "noise", "noise", "REAL",       # camera, odom, MOTOR #2
+            "noise", "noise", "REAL",       # controller, planner, MOTOR #3
+            "noise", "noise", "noise",      # costmap, tf, wifi
+            "REAL", "REAL",                 # MOTOR #4, #5
+        ]
+
+        for step in sequence:
+            if step == "REAL" and real_sent < REAL_FAULT["burst"]:
+                f = REAL_FAULT
+                real_sent += 1
+                label = f"MOTOR_OVERHEAT #{real_sent}"
+            elif step == "noise" and noise_idx < len(NOISE_FAULTS):
+                f = NOISE_FAULTS[noise_idx]
+                noise_idx += 1
+                label = f["fault_code"]
+            else:
+                continue
+
+            ok = self.fire(f["fault_code"], f["severity"],
+                           f["description"], f["source_id"])
+            n += 1
+            self.get_logger().info(
+                f"  [{n:2d}] {label} -> {'OK' if ok else 'FAIL'}")
+            time.sleep(0.4)
+
+        self.get_logger().info(f"=== DONE: {n} events ===")
+
+
+def main():
+    rclpy.init()
+    node = FaultStormNode()
+    node.run_storm()
+    node.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()

From 57266189286f9e5db5317636c5587c4b8bf75bc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20F=C4=85ferek?= <michal.faferek@42dot.ai>
Date: Thu, 19 Feb 2026 21:46:19 +0100
Subject: [PATCH 2/3] refactor: rename docker-compose.article3.yml to
 docker-compose.debounce.yml

Remove internal article references from compose override and
run-demo-debounce.sh.
---
 ...e.article3.yml => docker-compose.debounce.yml} |  4 ++--
 demos/turtlebot3_integration/run-demo-debounce.sh | 15 ++++++++-------
 2 files changed, 10 insertions(+), 9 deletions(-)
 rename demos/turtlebot3_integration/{docker-compose.article3.yml => docker-compose.debounce.yml} (89%)

diff --git a/demos/turtlebot3_integration/docker-compose.article3.yml b/demos/turtlebot3_integration/docker-compose.debounce.yml
similarity index 89%
rename from demos/turtlebot3_integration/docker-compose.article3.yml
rename to demos/turtlebot3_integration/docker-compose.debounce.yml
index 5ba2f56..9585713 100644
--- a/demos/turtlebot3_integration/docker-compose.article3.yml
+++ b/demos/turtlebot3_integration/docker-compose.debounce.yml
@@ -1,11 +1,11 @@
-# Docker Compose override for Article 3 (Debounce Demo)
+# Docker Compose override for debounce mode
 #
 # Usage:
 #   STORM (no debounce, default config):
 #     docker compose --profile cpu up -d
 #
 #   DEBOUNCE (with filtering):
-#     docker compose --profile cpu -f docker-compose.yml -f docker-compose.article3.yml up -d
+#     docker compose --profile cpu -f docker-compose.yml -f docker-compose.debounce.yml up -d
 #
 # The override mounts the debounce config over the default one inside the container.
 # colcon build --symlink-install means the installed config points to the source,
diff --git a/demos/turtlebot3_integration/run-demo-debounce.sh b/demos/turtlebot3_integration/run-demo-debounce.sh
index 3499d46..ff834d5 100755
--- a/demos/turtlebot3_integration/run-demo-debounce.sh
+++ b/demos/turtlebot3_integration/run-demo-debounce.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# TurtleBot3 + ros2_medkit Demo Runner — DEBOUNCE MODE (Article 3)
+# TurtleBot3 + ros2_medkit Demo Runner - DEBOUNCE MODE
 #
 # Same as run-demo.sh but uses debounce config:
 #   confirmation_threshold: -3 (requires sustained failure)
@@ -77,20 +77,20 @@ echo ""
 if docker compose version &> /dev/null; then
     docker compose --profile "$PROFILE" \
         -f docker-compose.yml \
-        -f docker-compose.article3.yml \
+        -f docker-compose.debounce.yml \
         build && \
     docker compose --profile "$PROFILE" \
         -f docker-compose.yml \
-        -f docker-compose.article3.yml \
+        -f docker-compose.debounce.yml \
         up ${DETACH_FLAG}
 else
     docker-compose --profile "$PROFILE" \
         -f docker-compose.yml \
-        -f docker-compose.article3.yml \
+        -f docker-compose.debounce.yml \
         build && \
     docker-compose --profile "$PROFILE" \
         -f docker-compose.yml \
-        -f docker-compose.article3.yml \
+        -f docker-compose.debounce.yml \
         up ${DETACH_FLAG}
 fi
 
@@ -98,8 +98,9 @@ if [[ "$DETACH_MODE" == "true" ]]; then
     echo ""
     echo "✅ Demo started in DEBOUNCE mode!"
     echo ""
-    echo "Run the article 3 demo sequence:"
-    echo "   ./article3-demo.sh"
+    echo "Fire fault storm to see debounce in action:"
+    echo "   docker exec turtlebot3_medkit_demo bash -c \\"
+    echo "     'source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && python3 /root/demo_ws/src/turtlebot3_medkit_demo/scripts/fault_storm.py'"
     echo ""
     echo "🛑 To stop: ./stop-demo.sh"
 fi

From 451ab8e84af906e2462b631202ae79b4184a5302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20F=C4=85ferek?= <michal.faferek@42dot.ai>
Date: Thu, 19 Feb 2026 21:57:10 +0100
Subject: [PATCH 3/3] fix: address PR review comments

- Add Apache 2.0 license header to fault_storm.py
- Handle wait_for_service() timeout with error instead of silent continue
- Document nvidia container name variant in docstring
- Use correct container name based on profile in run-demo-debounce.sh
---
 .../run-demo-debounce.sh                      |  7 +++++-
 .../scripts/fault_storm.py                    | 23 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/demos/turtlebot3_integration/run-demo-debounce.sh b/demos/turtlebot3_integration/run-demo-debounce.sh
index ff834d5..eb541db 100755
--- a/demos/turtlebot3_integration/run-demo-debounce.sh
+++ b/demos/turtlebot3_integration/run-demo-debounce.sh
@@ -95,11 +95,16 @@ else
 fi
 
 if [[ "$DETACH_MODE" == "true" ]]; then
+    if [[ "$PROFILE" == "nvidia" ]]; then
+        CONTAINER="turtlebot3_medkit_demo_nvidia"
+    else
+        CONTAINER="turtlebot3_medkit_demo"
+    fi
     echo ""
     echo "✅ Demo started in DEBOUNCE mode!"
     echo ""
     echo "Fire fault storm to see debounce in action:"
-    echo "   docker exec turtlebot3_medkit_demo bash -c \\"
+    echo "   docker exec $CONTAINER bash -c \\"
     echo "     'source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && python3 /root/demo_ws/src/turtlebot3_medkit_demo/scripts/fault_storm.py'"
     echo ""
     echo "🛑 To stop: ./stop-demo.sh"
diff --git a/demos/turtlebot3_integration/scripts/fault_storm.py b/demos/turtlebot3_integration/scripts/fault_storm.py
index 2974c48..11bdd5e 100644
--- a/demos/turtlebot3_integration/scripts/fault_storm.py
+++ b/demos/turtlebot3_integration/scripts/fault_storm.py
@@ -1,4 +1,18 @@
 #!/usr/bin/env python3
+# Copyright 2026 selfpatch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Fault storm injector for ros2_medkit debounce demo.
 
 Fires 9 noise faults (1 report each) + 1 real problem (5 sustained reports),
@@ -13,10 +27,13 @@
     python3 fault_storm.py
 
 Usage (Docker demo):
+    # CPU profile (default):
     docker exec turtlebot3_medkit_demo bash -c \
         "source /opt/ros/jazzy/setup.bash && \
          source /root/demo_ws/install/setup.bash && \
          python3 /root/demo_ws/src/turtlebot3_medkit_demo/scripts/fault_storm.py"
+    # NVIDIA profile:
+    docker exec turtlebot3_medkit_demo_nvidia bash -c ...
 """
 
 import rclpy
@@ -103,7 +120,11 @@ def __init__(self):
             ReportFault, "/fault_manager/report_fault"
         )
         self.get_logger().info("Waiting for fault_manager...")
-        self.client.wait_for_service(timeout_sec=5.0)
+        if not self.client.wait_for_service(timeout_sec=5.0):
+            self.get_logger().error(
+                "fault_manager service not available after 5s; aborting."
+            )
+            raise RuntimeError("fault_manager service not available")
         self.get_logger().info("Connected")
 
     def fire(self, fault_code, severity, description, source_id):