diff --git a/demos/sensor_diagnostics/Dockerfile b/demos/sensor_diagnostics/Dockerfile index 58a12e7..1295195 100644 --- a/demos/sensor_diagnostics/Dockerfile +++ b/demos/sensor_diagnostics/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y \ python3-requests \ nlohmann-json3-dev \ libcpp-httplib-dev \ + sqlite3 \ libsqlite3-dev \ git \ curl \ @@ -24,14 +25,13 @@ RUN apt-get update && apt-get install -y \ # Clone ros2_medkit from GitHub (gateway + dependencies) WORKDIR ${COLCON_WS}/src -RUN git clone --depth 1 --recurse-submodules https://github.com/selfpatch/ros2_medkit.git && \ +RUN git clone --depth 1 https://github.com/selfpatch/ros2_medkit.git && \ mv ros2_medkit/src/ros2_medkit_gateway . && \ mv ros2_medkit/src/ros2_medkit_serialization . && \ mv ros2_medkit/src/ros2_medkit_msgs . && \ mv ros2_medkit/src/ros2_medkit_fault_manager . && \ mv ros2_medkit/src/ros2_medkit_fault_reporter . && \ mv ros2_medkit/src/ros2_medkit_diagnostic_bridge . && \ - mv ros2_medkit/src/dynamic_message_introspection/dynmsg . && \ rm -rf ros2_medkit # Copy demo package @@ -55,5 +55,5 @@ RUN echo "source /opt/ros/jazzy/setup.bash" >> ~/.bashrc && \ # Expose gateway port EXPOSE 8080 -# Default command: launch the demo -CMD ["bash", "-c", "source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && ros2 launch sensor_diagnostics_demo demo.launch.py"] +# Default command: create storage dirs (volume mount hides build-time mkdir) and launch +CMD ["bash", "-c", "mkdir -p /var/lib/ros2_medkit/rosbags && source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && ros2 launch sensor_diagnostics_demo demo.launch.py"] diff --git a/demos/sensor_diagnostics/check-demo.sh b/demos/sensor_diagnostics/check-demo.sh index 8da9027..48cec64 100755 --- a/demos/sensor_diagnostics/check-demo.sh +++ b/demos/sensor_diagnostics/check-demo.sh @@ -88,7 +88,61 @@ echo "These parameters can be modified at runtime to inject faults..." curl -s "${API_BASE}/apps/lidar-sim/configurations" | jq '.items[] | {name: .name, value: .value, type: .type}' echo_step "9. Checking Current Faults" -curl -s "${API_BASE}/faults" | jq '.' +FAULTS_JSON=$(curl -s "${API_BASE}/faults") +echo "$FAULTS_JSON" | jq '.' + +# If there are faults, demonstrate snapshot / bulk-data endpoints +FAULT_COUNT=$(echo "$FAULTS_JSON" | jq '.items | length') +if [ "$FAULT_COUNT" -gt 0 ]; then + # Find the first fault that has both a non-null entity_id and code + FIRST_FAULT_ENTRY=$(echo "$FAULTS_JSON" | jq -r '.items[] | select(.entity_id != null and .code != null) | "\(.entity_type) \(.entity_id) \(.code)"' | head -n 1) + + if [ -z "$FIRST_FAULT_ENTRY" ]; then + echo "" + echo " Faults exist but none provide both 'entity_id' and 'code'." + echo " Skipping snapshot and bulk-data demonstration." + else + FIRST_ENTITY_TYPE=$(echo "$FIRST_FAULT_ENTRY" | awk '{print $1}') + FIRST_ENTITY=$(echo "$FIRST_FAULT_ENTRY" | awk '{print $2}') + FIRST_FAULT=$(echo "$FIRST_FAULT_ENTRY" | awk '{print $3}') + # Map entity_type to plural resource path (e.g., "app" -> "apps") + case "$FIRST_ENTITY_TYPE" in + app|apps) ENTITY_PATH="apps" ;; + component|components) ENTITY_PATH="components" ;; + area|areas) ENTITY_PATH="areas" ;; + *) ENTITY_PATH="apps" ;; + esac + + echo_step "10. Fault Detail with Environment Data (Snapshots)" + echo "Fetching fault ${FIRST_FAULT} on ${ENTITY_PATH}/${FIRST_ENTITY}..." + curl -s "${API_BASE}/${ENTITY_PATH}/${FIRST_ENTITY}/faults/${FIRST_FAULT}" | jq '{ + code: .item.code, + status: .item.status, + environment_data: { + extended_data_records: .environment_data.extended_data_records, + snapshot_count: (.environment_data.snapshots | length) + } + }' + + echo_step "11. Bulk-Data Categories (Rosbag Recordings)" + echo "Checking available bulk-data categories..." + curl -s "${API_BASE}/${ENTITY_PATH}/${FIRST_ENTITY}/bulk-data" | jq '.' + + echo_step "12. Bulk-Data Descriptors (Rosbag Files)" + echo "Listing available rosbag recordings..." + curl -s "${API_BASE}/${ENTITY_PATH}/${FIRST_ENTITY}/bulk-data/rosbags" | jq '.items[] | { + id: .id, + name: .name, + size: .size, + mimetype: .mimetype, + "x-medkit": ."x-medkit" + }' + fi +else + echo "" + echo " No active faults. Inject a fault first to see snapshot/bulk-data features:" + echo " ./inject-noise.sh && sleep 5 && bash $0" +fi echo "" echo_success "API demonstration complete!" @@ -100,9 +154,10 @@ echo " ./inject-nan.sh # Inject NaN values" echo " ./inject-drift.sh # Inject sensor drift" echo " ./restore-normal.sh # Restore normal operation" echo "" +echo "📸 After injecting a fault, check snapshots and rosbags:" +echo " curl ${API_BASE}/faults | jq # List faults" +echo " curl ${API_BASE}/components/lidar-unit/faults/ | jq # Fault detail + snapshots" +echo " curl ${API_BASE}/components/lidar-unit/bulk-data/rosbags | jq # List rosbag recordings" +echo "" echo "🌐 Web UI: http://localhost:3000" echo "🌐 REST API: http://localhost:8080/api/v1/" -echo "" -echo "📖 More examples:" -echo " curl ${API_BASE}/apps/imu-sim/configurations | jq # IMU parameters" -echo " curl ${API_BASE}/apps/gps-sim/data/fix | jq # GPS data" diff --git a/demos/sensor_diagnostics/config/medkit_params.yaml b/demos/sensor_diagnostics/config/medkit_params.yaml index 4bffa95..17d9a78 100644 --- a/demos/sensor_diagnostics/config/medkit_params.yaml +++ b/demos/sensor_diagnostics/config/medkit_params.yaml @@ -26,3 +26,55 @@ diagnostics: discovery: runtime: create_synthetic_components: false # Manifest defines components + +# Fault Manager configuration (runs in root namespace) +fault_manager: + ros__parameters: + # Storage configuration + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + + # Debounce configuration + confirmation_threshold: 0 # Immediate confirmation + healing_enabled: false + healing_threshold: 3 + auto_confirm_after_sec: 0.0 + + # Snapshot configuration (freeze frames) + snapshots: + enabled: true + background_capture: true # Non-blocking capture + timeout_sec: 2.0 + max_message_size: 131072 # 128KB max per message + + # Topics to capture for all faults + default_topics: + - /sensors/scan + - /sensors/imu + - /sensors/fix + - /sensors/image_raw + - /diagnostics + + # Rosbag recording configuration + rosbag: + enabled: true + duration_sec: 10.0 # Record 10 seconds before fault confirmation + duration_after_sec: 2.0 # Record 2 seconds after confirmation + lazy_start: false # Always recording (ring buffer) + format: "mcap" # MCAP format (recommended for cross-platform) + storage_path: "/var/lib/ros2_medkit/rosbags" + max_bag_size_mb: 100 # Max size per rosbag file + max_total_storage_mb: 1000 # 1GB total storage limit + auto_cleanup: true # Cleanup rosbags on fault clear + + # Topics to record (use 'config' or 'all') + topics: "config" # Use include/exclude lists below + include_topics: + - /sensors/scan + - /sensors/imu + - /sensors/fix + - /sensors/image_raw + - /diagnostics + exclude_topics: + - /rosout + - /parameter_events diff --git a/demos/sensor_diagnostics/docker-compose.yml b/demos/sensor_diagnostics/docker-compose.yml index 2bf4f8a..541d864 100644 --- a/demos/sensor_diagnostics/docker-compose.yml +++ b/demos/sensor_diagnostics/docker-compose.yml @@ -14,7 +14,8 @@ services: # Default command launches the full demo # Override with: docker compose run sensor-demo bash command: > - bash -c "source /opt/ros/jazzy/setup.bash && + bash -c "mkdir -p /var/lib/ros2_medkit/rosbags && + source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && ros2 launch sensor_diagnostics_demo demo.launch.py" @@ -39,7 +40,8 @@ services: ports: - "8080:8080" command: > - bash -c "source /opt/ros/jazzy/setup.bash && + bash -c "mkdir -p /var/lib/ros2_medkit/rosbags && + source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && ros2 launch sensor_diagnostics_demo demo.launch.py & sleep 10 && diff --git a/demos/sensor_diagnostics/launch/demo.launch.py b/demos/sensor_diagnostics/launch/demo.launch.py index 796ffd1..4a35590 100644 --- a/demos/sensor_diagnostics/launch/demo.launch.py +++ b/demos/sensor_diagnostics/launch/demo.launch.py @@ -125,13 +125,17 @@ def generate_launch_description(): # ===== Fault Manager (at root namespace) ===== # Services at /fault_manager/* (e.g., /fault_manager/report_fault) # Both paths report here: diagnostic_bridge (legacy) and anomaly_detector (modern) + # Also handles snapshot and rosbag capture when faults are confirmed Node( package="ros2_medkit_fault_manager", executable="fault_manager_node", name="fault_manager", namespace="", # Root namespace so services are at /fault_manager/* output="screen", - parameters=[{"use_sim_time": use_sim_time}], + parameters=[ + medkit_params_file, + {"use_sim_time": use_sim_time}, + ], ), ] ) diff --git a/demos/turtlebot3_integration/Dockerfile b/demos/turtlebot3_integration/Dockerfile index 52437ba..8988a49 100644 --- a/demos/turtlebot3_integration/Dockerfile +++ b/demos/turtlebot3_integration/Dockerfile @@ -45,14 +45,13 @@ RUN apt-get update && apt-get install -y \ # Clone ros2_medkit from GitHub WORKDIR ${COLCON_WS}/src -RUN git clone --depth 1 --recurse-submodules https://github.com/selfpatch/ros2_medkit.git && \ - mv ros2_medkit/src/ros2_medkit_gateway \ - ros2_medkit/src/ros2_medkit_msgs \ - ros2_medkit/src/ros2_medkit_serialization \ - ros2_medkit/src/ros2_medkit_fault_manager \ - ros2_medkit/src/ros2_medkit_fault_reporter \ - ros2_medkit/src/ros2_medkit_diagnostic_bridge \ - ros2_medkit/src/dynamic_message_introspection . && \ +RUN git clone --depth 1 https://github.com/selfpatch/ros2_medkit.git && \ + mv ros2_medkit/src/ros2_medkit_gateway . && \ + mv ros2_medkit/src/ros2_medkit_msgs . && \ + mv ros2_medkit/src/ros2_medkit_serialization . && \ + mv ros2_medkit/src/ros2_medkit_fault_manager . && \ + mv ros2_medkit/src/ros2_medkit_fault_reporter . && \ + mv ros2_medkit/src/ros2_medkit_diagnostic_bridge . && \ rm -rf ros2_medkit # Copy demo package from local context (this repo) diff --git a/demos/turtlebot3_integration/README.md b/demos/turtlebot3_integration/README.md index 605e30b..65ff80b 100644 --- a/demos/turtlebot3_integration/README.md +++ b/demos/turtlebot3_integration/README.md @@ -15,6 +15,7 @@ This demo demonstrates: - Running Nav2 navigation stack (AMCL, planner, controller) - Running ros2_medkit gateway with **manifest-based discovery** - Fault management via **diagnostic_bridge** (legacy /diagnostics support) +- **Rosbag snapshot capture** when faults are confirmed (MCAP format) - Querying robot data via **REST API** - Entity hierarchy: Areas → Components → Apps → Functions - Controlling the robot via sovd_web_ui @@ -75,10 +76,12 @@ ros2 topic echo /odom ```bash ./stop-demo.sh # Stop containers -./stop-demo.sh --volumes # Stop and remove volumes ./stop-demo.sh --images # Stop and remove images ``` +**Note:** Fault data and rosbag recordings are ephemeral — they are stored +inside the container and cleared on restart. + ### 2. Access the Web UI The Web UI is automatically started by docker-compose and available at . @@ -211,10 +214,42 @@ curl http://localhost:8080/api/v1/faults | jq # Get faults for a specific area curl http://localhost:8080/api/v1/areas/robot/faults | jq +# Get fault details with environment data (includes snapshots) +curl http://localhost:8080/api/v1/faults/NAVIGATION_GOAL_ABORTED | jq + # Clear a specific fault curl -X DELETE http://localhost:8080/api/v1/apps/diagnostic-bridge/faults/TURTLEBOT3_NODE ``` +### Rosbag Snapshots (Bulk Data) + +When a fault is confirmed, the FaultManager automatically captures: +- **Freeze frame snapshots**: Latest messages from key topics (odometry, pose, scan) +- **Rosbag recording**: 10 seconds before + 2 seconds after fault confirmation + +```bash +# List bulk-data categories for an entity +curl http://localhost:8080/api/v1/apps/{entity_id}/bulk-data | jq + +# List rosbag files available for download +curl http://localhost:8080/api/v1/apps/{entity_id}/bulk-data/rosbags | jq + +# Download a rosbag file (returns MCAP format) +curl -O http://localhost:8080/api/v1/apps/{entity_id}/bulk-data/rosbags/{fault_code} + +# Get fault detail with snapshots (freeze frames) +curl http://localhost:8080/api/v1/apps/{entity_id}/faults/{fault_code} | jq +``` + +**Recorded Topics:** +- `/odom`, `/amcl_pose`, `/scan` - Robot state +- `/cmd_vel` - Velocity commands +- `/tf`, `/tf_static` - Transforms +- `/navigate_to_pose/_action/status`, `/navigate_to_pose/_action/feedback` - Navigation state +- `/local_costmap/costmap`, `/global_costmap/costmap` - Costmaps +- `/plan` - Navigation plan +- `/diagnostics` - System diagnostics + ### Operations (Service Calls) ```bash @@ -373,7 +408,7 @@ curl http://localhost:8080/api/v1/faults | jq curl http://localhost:8080/api/v1/areas/navigation/faults | jq # Clear specific fault -curl -X DELETE http://localhost:8080/api/v1/faults/{fault_id} +curl -X DELETE http://localhost:8080/api/v1/apps/{entity_id}/faults/{fault_code} # Clear all faults curl -X DELETE http://localhost:8080/api/v1/faults diff --git a/demos/turtlebot3_integration/config/medkit_params.yaml b/demos/turtlebot3_integration/config/medkit_params.yaml index 74a30b9..5b7a1f5 100644 --- a/demos/turtlebot3_integration/config/medkit_params.yaml +++ b/demos/turtlebot3_integration/config/medkit_params.yaml @@ -27,3 +27,62 @@ diagnostics: discovery: runtime: create_synthetic_components: false # Manifest defines components + +# Fault Manager configuration (runs in root namespace) +fault_manager: + ros__parameters: + # Storage configuration + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + + # Debounce configuration + confirmation_threshold: 0 # Immediate confirmation + healing_enabled: false + healing_threshold: 3 + auto_confirm_after_sec: 0.0 + + # Snapshot configuration (freeze frames) + snapshots: + enabled: true + background_capture: true # Non-blocking capture + timeout_sec: 2.0 + max_message_size: 131072 # 128KB max per message + + # Topics to capture for all faults + default_topics: + - /odom + - /amcl_pose + - /scan + - /tf + - /navigate_to_pose/_action/status + + # Rosbag recording configuration + rosbag: + enabled: true + duration_sec: 10.0 # Record 10 seconds before fault confirmation + duration_after_sec: 2.0 # Record 2 seconds after confirmation + lazy_start: false # Always recording (ring buffer) + format: "mcap" # MCAP format (recommended for cross-platform) + storage_path: "/var/lib/ros2_medkit/rosbags" + max_bag_size_mb: 100 # Max size per rosbag file + max_total_storage_mb: 1000 # 1GB total storage limit + auto_cleanup: true # Cleanup rosbags on fault clear + + # Topics to record (use 'config' or 'all') + topics: "config" # Use include/exclude lists below + include_topics: + - /odom + - /amcl_pose + - /scan + - /cmd_vel + - /tf + - /tf_static + - /navigate_to_pose/_action/status + - /navigate_to_pose/_action/feedback + - /local_costmap/costmap + - /global_costmap/costmap + - /plan + - /diagnostics + exclude_topics: + - /rosout + - /parameter_events diff --git a/demos/turtlebot3_integration/docker-compose.yml b/demos/turtlebot3_integration/docker-compose.yml index 7b1c78f..e5817f4 100644 --- a/demos/turtlebot3_integration/docker-compose.yml +++ b/demos/turtlebot3_integration/docker-compose.yml @@ -19,7 +19,8 @@ services: stdin_open: true tty: true command: > - bash -c "source /opt/ros/jazzy/setup.bash && + bash -c "mkdir -p /var/lib/ros2_medkit/rosbags && + source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && export TURTLEBOT3_MODEL=burger && ros2 launch turtlebot3_medkit_demo demo.launch.py headless:=$${HEADLESS}" @@ -56,7 +57,8 @@ services: stdin_open: true tty: true command: > - bash -c "source /opt/ros/jazzy/setup.bash && + bash -c "mkdir -p /var/lib/ros2_medkit/rosbags && + source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && export TURTLEBOT3_MODEL=burger && ros2 launch turtlebot3_medkit_demo demo.launch.py headless:=$${HEADLESS}" diff --git a/demos/turtlebot3_integration/launch/demo.launch.py b/demos/turtlebot3_integration/launch/demo.launch.py index 254320b..1355f4f 100644 --- a/demos/turtlebot3_integration/launch/demo.launch.py +++ b/demos/turtlebot3_integration/launch/demo.launch.py @@ -131,13 +131,17 @@ def generate_launch_description(): ), # Launch ros2_medkit fault_manager in root namespace # Aggregates faults from all nodes via ReportFault service + # Also handles snapshot and rosbag capture when faults are confirmed Node( package="ros2_medkit_fault_manager", executable="fault_manager_node", name="fault_manager", namespace="", output="screen", - parameters=[{"use_sim_time": use_sim_time}], + parameters=[ + medkit_params_file, + {"use_sim_time": use_sim_time}, + ], ), # Launch diagnostic_bridge under /bridge namespace # Converts legacy /diagnostics topic to faults