diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 3488e94ea..a62c49a81 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -3,7 +3,7 @@ on: pull_request: push: branches: - - ros2 + - ros2-humble schedule: # Run every week at 20:00 on Sunday - cron: "0 20 * * 0" @@ -22,18 +22,30 @@ jobs: uncrustify, xmllint, ] - runs-on: ubuntu-latest + include: + - distro: humble + os: ubuntu-22.04 + runs-on: ${{ matrix.os }} env: AMENT_CPPCHECK_ALLOW_SLOW_VERSIONS: 1 steps: - uses: actions/checkout@v1 - uses: ros-tooling/setup-ros@master - - run: sudo pip install pydocstyle==6.1.1 # downgrade to fix https://github.com/ament/ament_lint/pull/428 + with: + required-ros-distributions: ${{ matrix.distro }} - uses: ros-tooling/action-ros-lint@master with: linter: ${{ matrix.linter }} package-name: | diagnostic_aggregator diagnostic_common_diagnostics + diagnostic_remote_logging diagnostic_updater self_test + + check_licenses: + name: Check licenses + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: boschresearch/ros_license_toolkit@2.0.1 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1e07c0adc..2e29e6b73 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -3,7 +3,7 @@ on: pull_request: push: branches: - - ros2 + - ros2-humble schedule: # Run every week at 20:00 on Sunday - cron: "0 20 * * 0" @@ -14,25 +14,22 @@ jobs: strategy: fail-fast: false matrix: - package: - [ + package: [ diagnostic_aggregator, diagnostic_common_diagnostics, + diagnostic_remote_logging, diagnostic_updater, self_test, ] - distro: [humble, iron, rolling] include: - distro: humble os: 22.04 - - distro: iron - os: 22.04 - - distro: rolling - os: 24.04 runs-on: ubuntu-latest container: ubuntu:${{ matrix.os }} steps: - uses: ros-tooling/setup-ros@master + with: + required-ros-distributions: ${{ matrix.distro }} - uses: ros-tooling/action-ros-ci@master with: target-ros2-distro: ${{ matrix.distro }} diff --git a/README.md b/README.md index 96c20ae59..4cce43b7a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Test diagnostics](https://img.shields.io/github/actions/workflow/status/ros/diagnostics/test.yaml?label=test&style=flat-square)](https://github.com/ros/diagnostics/actions/workflows/test.yaml) [![Lint diagnostics](https://img.shields.io/github/actions/workflow/status/ros/diagnostics/lint.yaml?label=lint&style=flat-square)](https://github.com/ros/diagnostics/actions/workflows/lint.yaml) [![ROS2 Humble](https://img.shields.io/ros/v/humble/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#humble) [![ROS2 Iron](https://img.shields.io/ros/v/iron/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#iron) [![ROS2 Rolling](https://img.shields.io/ros/v/rolling/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#rolling) +[![Test diagnostics](https://img.shields.io/github/actions/workflow/status/ros/diagnostics/test.yaml?label=test&style=flat-square)](https://github.com/ros/diagnostics/actions/workflows/test.yaml) [![Lint diagnostics](https://img.shields.io/github/actions/workflow/status/ros/diagnostics/lint.yaml?label=lint&style=flat-square)](https://github.com/ros/diagnostics/actions/workflows/lint.yaml) [![ROS2 Humble](https://img.shields.io/ros/v/humble/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#humble) [![ROS2 Iron](https://img.shields.io/ros/v/iron/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#iron) [![ROS2 Jazzy](https://img.shields.io/ros/v/jazzy/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#jazzy) [![ROS2 Rolling](https://img.shields.io/ros/v/rolling/diagnostics.svg?style=flat-square)](https://index.ros.org/r/diagnostics/#rolling) # Overview @@ -34,15 +34,80 @@ Diagnostics messages that are not aggregated can be visualized by [`rqt_runtime_ # Target Distribution -The [`ros2` branch](https://github.com/ros/diagnostics/tree/ros2) targets +- **Rolling Ridley** by the [`ros2` branch](https://github.com/ros/diagnostics/tree/ros2) +- **Humble Hawksbill** by the [`ros2-humble` branch](https://github.com/ros/diagnostics/tree/ros2-humble) +- **Jazzy Jalisco** by the [`ros2-jazzy` branch](https://github.com/ros/diagnostics/tree/ros2-jazzy) +- **Kilted Kaiju** by the [`ros2-kilted` branch](https://github.com/ros/diagnostics/tree/ros2-kilted) -- *Humble Hawksbill* -- *Iron Irwini* +## Workflow -The [`ros2-jazzy` branch](https://github.com/ros/diagnostics/tree/ros2-jazzy) targets +New features are to be developed in custom branches and then merged into the `ros2` branch. -- *Jazzy Jalisco* -- *Rolling Ridley* +From there, the changes are backported to the other branches. + +## Backport Tooling + +This tool has proven to be useful: [backport](https://www.npmjs.com/package/backport) + +Use this command to port a given PR of `PR_NUMBER` to the other branches: + +```bash +backport --pr PR_NUMBER -b ros2-humble ros2-jazzy ros2-kilted +``` + +## Versioning and Releases + +- (**X**.0.0) We use the major version number to indicate a breaking change. +- (0.**Y**.0) The minor version number is used to differentiate between different ROS distributions: + - x.**0**.z: Humble Hawksbill + - x.**1**.z: Iron Irwini + - x.**2**.z: Jazzy Jalisco + - x.**3**.z: Kilted Kaiju + - x.**4**.z: Rolling Ridley + - (Future releases will receive x.**4**.z and rolling will then be x.**5**.z) +- (0.0.**Z**) The patch version number is used for changes in the current ROS distribution that do not affect the API. + + # License diff --git a/diagnostic_aggregator/CHANGELOG.rst b/diagnostic_aggregator/CHANGELOG.rst index efda16d09..0af70b128 100644 --- a/diagnostic_aggregator/CHANGELOG.rst +++ b/diagnostic_aggregator/CHANGELOG.rst @@ -2,6 +2,25 @@ Changelog for package diagnostic_aggregator ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +4.0.2 (2025-02-10) +------------------ +* Checking licenses in CI (`#431 `_) (`#432 `_) + * Checking licenses in ci +* Add Windows support (`#426 `_) (`#428 `_) + Co-authored-by: Silvio Traversaro +* Support custom `rclcpp::NodeOptions` (`#417 `_) (`#422 `_) + * Support custom `rclcpp::NodeOptions` This eases static composition of multiple ROS 2 nodes + Co-authored-by: Patrick Roncagliolo +* Skipping flaky tests (`#413 `_) (`#414 `_) + * skipping flaky ntp test +* Contributors: Christian Henkel + +3.2.1 (2024-06-27) +------------------ +* Add add_analyzer functionality (`#329 `_) (`#359 `_) +* Aggregator: publish diagnostics_toplevel_state immediately on every degradation (`#324 `_) (`#355 `_) +* Contributors: Christian Henkel + 3.2.0 (2024-03-22) ------------------ * Avoid rolling up an ERROR state when empty GenericAnalyzer blocks are marked discard_stale, or when all of their items are STALE. (`#315 `_) diff --git a/diagnostic_aggregator/CMakeLists.txt b/diagnostic_aggregator/CMakeLists.txt index 12aac3b1f..d0c81c63c 100644 --- a/diagnostic_aggregator/CMakeLists.txt +++ b/diagnostic_aggregator/CMakeLists.txt @@ -9,11 +9,13 @@ endif() if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic) endif() +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) find_package(ament_cmake REQUIRED) find_package(diagnostic_msgs REQUIRED) find_package(pluginlib REQUIRED) find_package(rclcpp REQUIRED) +find_package(rcl_interfaces REQUIRED) find_package(std_msgs REQUIRED) add_library(${PROJECT_NAME} SHARED @@ -67,6 +69,10 @@ add_executable(aggregator_node src/aggregator_node.cpp) target_link_libraries(aggregator_node ${PROJECT_NAME}) +# Add analyzer +add_executable(add_analyzer src/add_analyzer.cpp) +ament_target_dependencies(add_analyzer rclcpp rcl_interfaces) + # Testing macro if(BUILD_TESTING) find_package(ament_lint_auto REQUIRED) @@ -77,6 +83,7 @@ if(BUILD_TESTING) find_package(launch_testing_ament_cmake REQUIRED) file(TO_CMAKE_PATH "${CMAKE_INSTALL_PREFIX}/lib/${PROJECT_NAME}/aggregator_node" AGGREGATOR_NODE) + file(TO_CMAKE_PATH "${CMAKE_INSTALL_PREFIX}/lib/${PROJECT_NAME}/add_analyzer" ADD_ANALYZER) file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test/test_listener.py" TEST_LISTENER) set(create_analyzers_tests "primitive_analyzers" @@ -124,15 +131,38 @@ if(BUILD_TESTING) ) endforeach() - add_launch_test( - test/test_critical_pub.py - TIMEOUT 30 - ) + set(add_analyzers_tests + "all_analyzers") + + foreach(test_name ${add_analyzers_tests}) + file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test/default.yaml" PARAMETER_FILE) + file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test/${test_name}.yaml" ADD_PARAMETER_FILE) + file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test/expected_output/add_${test_name}" EXPECTED_OUTPUT) - ament_add_pytest_test(test_discard_behavior - "${CMAKE_CURRENT_SOURCE_DIR}/test/test_discard_behavior.py" - TIMEOUT 60 - ) + configure_file( + "test/add_analyzers.launch.py.in" + "test_add_${test_name}.launch.py" + @ONLY + ) + add_launch_test( + "${CMAKE_CURRENT_BINARY_DIR}/test_add_${test_name}.launch.py" + TARGET "test_add_${test_name}" + TIMEOUT 30 + ENV + ) + endforeach() + + # SKIPPING FLAKY TEST + # add_launch_test( + # test/test_critical_pub.py + # TIMEOUT 30 + # ) + + # SKIPPING FLAKY TEST + # ament_add_pytest_test(test_discard_behavior + # "${CMAKE_CURRENT_SOURCE_DIR}/test/test_discard_behavior.py" + # TIMEOUT 60 + # ) endif() install( @@ -140,6 +170,11 @@ install( DESTINATION lib/${PROJECT_NAME} ) +install( + TARGETS add_analyzer + DESTINATION lib/${PROJECT_NAME} +) + install( TARGETS ${PROJECT_NAME} ${ANALYZERS} EXPORT ${PROJECT_NAME}Targets @@ -157,6 +192,7 @@ ament_python_install_package(${PROJECT_NAME}) # Install Example set(ANALYZER_PARAMS_FILEPATH "${CMAKE_INSTALL_PREFIX}/share/${PROJECT_NAME}/example_analyzers.yaml") +set(ADD_ANALYZER_PARAMS_FILEPATH "${CMAKE_INSTALL_PREFIX}/share/${PROJECT_NAME}/example_add_analyzers.yaml") configure_file(example/example.launch.py.in example.launch.py @ONLY) install( # launch descriptor FILES ${CMAKE_CURRENT_BINARY_DIR}/example.launch.py @@ -167,7 +203,7 @@ install( # example publisher DESTINATION lib/${PROJECT_NAME} ) install( # example aggregator configration - FILES example/example_analyzers.yaml + FILES example/example_analyzers.yaml example/example_add_analyzers.yaml DESTINATION share/${PROJECT_NAME} ) diff --git a/diagnostic_aggregator/README.md b/diagnostic_aggregator/README.md index 08178a066..fa0eea7a9 100644 --- a/diagnostic_aggregator/README.md +++ b/diagnostic_aggregator/README.md @@ -135,6 +135,33 @@ You can launch the `aggregator_node` like this (see [example.launch.py.in](examp ]) ``` +You can add analyzers at runtime using the `add_analyzer` node like this (see [example.launch.py.in](example/example.launch.py.in)): +``` + add_analyzer = launch_ros.actions.Node( + package='diagnostic_aggregator', + executable='add_analyzer', + output='screen', + parameters=[add_analyzer_params_filepath]) + return launch.LaunchDescription([ + add_analyzer, + ]) +``` +This node updates the parameters of the `aggregator_node` by calling the service `/analyzers/set_parameters_atomically`. +The `aggregator_node` will detect when a `parameter-event` has introduced new parameters to it. +When this happens the `aggregator_node` will reload all analyzers based on its new set of parameters. +Adding analyzers this way can be done at runtime and can be made conditional. + +In the example, `add_analyzer` will add an analyzer for diagnostics that are marked optional: +``` yaml +/**: + ros__parameters: + optional: + type: diagnostic_aggregator/GenericAnalyzer + path: Optional + startswith: [ '/optional' ] +``` +This will move the `/optional/runtime/analyzer` diagnostic from the "Other" to "Aggregation" where it will not go stale after 5 seconds and will be taken into account for the toplevel state. + # Basic analyzers The `diagnostic_aggregator` package provides a few basic analyzers that you can use to aggregate your diagnostics. diff --git a/diagnostic_aggregator/example/README.md b/diagnostic_aggregator/example/README.md index 10e9b2574..27c593be9 100644 --- a/diagnostic_aggregator/example/README.md +++ b/diagnostic_aggregator/example/README.md @@ -1,5 +1,7 @@ # Aggregator Example -This is a simple example to show the diagnostic_aggregator in action. It involves one python script producing dummy diagnostic data ([example_pub.py](./example_pub.py)), and one diagnostic aggregator configuration ([example.yaml](./example.yaml)) that provides analyzers aggregating it. +This is a simple example to show the diagnostic_aggregator and add_analyzer in action. It involves one python script producing dummy diagnostic data ([example_pub.py](./example_pub.py)), one diagnostic aggregator configuration ([example_analyzers.yaml](./example_analyzers.yaml)) and one add_analyzer configuration ([example_add_analyzers.yaml](./example_add_analyzers.yaml)). + +The aggregator will launch and load all the analyzers listed in ([example_analyzers.yaml](./example_analyzers.yaml)). Then the aggregator will be notified that there are additional analyzers that we also want to load in ([example_add_analyzers.yaml](./example_add_analyzers.yaml)). After this reload all analyzers will be active. Run the example with `ros2 launch diagnostic_aggregator example.launch.py` diff --git a/diagnostic_aggregator/example/example.launch.py.in b/diagnostic_aggregator/example/example.launch.py.in index 48cd62f66..81a749220 100644 --- a/diagnostic_aggregator/example/example.launch.py.in +++ b/diagnostic_aggregator/example/example.launch.py.in @@ -4,6 +4,7 @@ import launch import launch_ros.actions analyzer_params_filepath = "@ANALYZER_PARAMS_FILEPATH@" +add_analyzer_params_filepath = "@ADD_ANALYZER_PARAMS_FILEPATH@" def generate_launch_description(): @@ -12,11 +13,18 @@ def generate_launch_description(): executable='aggregator_node', output='screen', parameters=[analyzer_params_filepath]) + add_analyzer = launch_ros.actions.Node( + package='diagnostic_aggregator', + executable='add_analyzer', + output='screen', + parameters=[add_analyzer_params_filepath] + ) diag_publisher = launch_ros.actions.Node( package='diagnostic_aggregator', executable='example_pub.py') return launch.LaunchDescription([ aggregator, + add_analyzer, diag_publisher, launch.actions.RegisterEventHandler( event_handler=launch.event_handlers.OnProcessExit( diff --git a/diagnostic_aggregator/example/example_add_analyzers.yaml b/diagnostic_aggregator/example/example_add_analyzers.yaml new file mode 100644 index 000000000..1c6c264c7 --- /dev/null +++ b/diagnostic_aggregator/example/example_add_analyzers.yaml @@ -0,0 +1,6 @@ +/**: + ros__parameters: + optional: + type: diagnostic_aggregator/GenericAnalyzer + path: Optional + contains: [ '/optional' ] diff --git a/diagnostic_aggregator/example/example_pub.py b/diagnostic_aggregator/example/example_pub.py index 887dc18df..0c1b10436 100755 --- a/diagnostic_aggregator/example/example_pub.py +++ b/diagnostic_aggregator/example/example_pub.py @@ -81,6 +81,10 @@ def __init__(self): name='/sensors/front/cam', message='OK'), DiagnosticStatus(level=DiagnosticStatus.OK, name='/sensors/rear/cam', message='OK'), + + # Optional + DiagnosticStatus(level=DiagnosticStatus.OK, + name='/optional/runtime/analyzer', message='OK'), ] def timer_callback(self): diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/aggregator.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/aggregator.hpp index b901acdc0..1d2c7e638 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/aggregator.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/aggregator.hpp @@ -111,6 +111,13 @@ class Aggregator DIAGNOSTIC_AGGREGATOR_PUBLIC Aggregator(); + /*! + *\brief Constructor initializes with main prefix (ex: '/Robot') and custom node options + */ + DIAGNOSTIC_AGGREGATOR_PUBLIC + explicit Aggregator(rclcpp::NodeOptions options); + + DIAGNOSTIC_AGGREGATOR_PUBLIC virtual ~Aggregator(); @@ -133,6 +140,8 @@ class Aggregator rclcpp::Service::SharedPtr add_srv_; /// DiagnosticArray, /diagnostics rclcpp::Subscription::SharedPtr diag_sub_; + /// ParameterEvent, /parameter_events + rclcpp::Subscription::SharedPtr param_sub_; /// DiagnosticArray, /diagnostics_agg rclcpp::Publisher::SharedPtr agg_pub_; /// DiagnosticStatus, /diagnostics_toplevel_state @@ -165,6 +174,16 @@ class Aggregator /// Records all ROS warnings. No warnings are repeated. std::set ros_warnings_; + /* + *!\brief Checks for new parameters to trigger reinitialization of the AnalyzerGroup and OtherAnalyzer + */ + void parameterCallback(const rcl_interfaces::msg::ParameterEvent::SharedPtr param_msg); + + /* + *!\brief (re)initializes the AnalyzerGroup and OtherAnalyzer + */ + void initAnalyzers(); + /* *!\brief Checks timestamp of message, and warns if timestamp is 0 (not set) */ diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/analyzer.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/analyzer.hpp index e64d79a91..187f35159 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/analyzer.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/analyzer.hpp @@ -94,8 +94,7 @@ class Analyzer /*! *\brief Default constructor, called by pluginlib. */ - Analyzer() - : clock_(std::make_shared()) {} + Analyzer() {} virtual ~Analyzer() {} diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer.hpp index 7496780b0..4899f52ea 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer.hpp @@ -229,6 +229,7 @@ class GenericAnalyzer : public GenericAnalyzerBase virtual bool match(const std::string & name); private: + rclcpp::Node::SharedPtr node_; std::vector chaff_; /**< Removed from the start of node names. */ std::vector expected_; std::vector startswith_; diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer_base.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer_base.hpp index d7cb8a485..9f1193341 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer_base.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/generic_analyzer_base.hpp @@ -101,7 +101,8 @@ class GenericAnalyzerBase : public Analyzer * Must be initialized in order to prepend the path to all outgoing status messages. */ bool init( - const std::string & path, const std::string & breadcrumb, double timeout = -1.0, + const std::string & path, const std::string & breadcrumb, + const rclcpp::Node::SharedPtr node, double timeout = -1.0, int num_items_expected = -1, bool discard_stale = false) { num_items_expected_ = num_items_expected; @@ -109,6 +110,7 @@ class GenericAnalyzerBase : public Analyzer path_ = path + "/" + nice_name_; discard_stale_ = discard_stale; breadcrumb_ = breadcrumb; + clock_ = node->get_clock(); if (discard_stale_ && timeout <= 0) { RCLCPP_WARN( diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/other_analyzer.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/other_analyzer.hpp index 67d0f1b0a..d7d421ae5 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/other_analyzer.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/other_analyzer.hpp @@ -85,34 +85,15 @@ class OtherAnalyzer : public GenericAnalyzerBase *\param path Base path of Aggregator *\param breadcrumb Prefix for parameter getter. */ - bool init(const std::string & path, const std::string & breadcrumb = "") + bool init( + const std::string & path, const std::string & breadcrumb, + const rclcpp::Node::SharedPtr node) { (void)breadcrumb; nice_name_ = "Other"; path_ = path; - return GenericAnalyzerBase::init(path_, "", 5.0, -1, true); - } - - /* - *\brief OtherAnalyzer cannot be initialized with a NodeHandle - * - *\return False, since NodeHandle initialization isn't valid - */ - bool init( - const std::string & base_path, const std::string & breadcrumb, - const rclcpp::Node::SharedPtr node) - { - (void)base_path; - (void)breadcrumb; - (void)node; - - RCLCPP_ERROR( - rclcpp::get_logger( - "generic_analyzer_base"), - R"(OtherAnalyzer was attempted to initialize with a NodeHandle. - This analyzer cannot be used as a plugin.)"); - return false; + return GenericAnalyzerBase::init(path_, "", node, 5.0, -1, true); } /* diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/status_item.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/status_item.hpp index e11dad5d6..3c24590b2 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/status_item.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/status_item.hpp @@ -187,13 +187,16 @@ class StatusItem *\brief Constructed from const DiagnosticStatus* */ DIAGNOSTIC_AGGREGATOR_PUBLIC - explicit StatusItem(const diagnostic_msgs::msg::DiagnosticStatus * status); + StatusItem( + const diagnostic_msgs::msg::DiagnosticStatus * status, + rclcpp::Clock::SharedPtr clock); /*! *\brief Constructed from string of item name */ DIAGNOSTIC_AGGREGATOR_PUBLIC StatusItem( + rclcpp::Clock::SharedPtr clock, const std::string item_name, const std::string message = "Missing", const DiagnosticLevel level = Level_Stale); diff --git a/diagnostic_aggregator/include/diagnostic_aggregator/visibility_control.hpp b/diagnostic_aggregator/include/diagnostic_aggregator/visibility_control.hpp index 791232c03..aafcd10c1 100644 --- a/diagnostic_aggregator/include/diagnostic_aggregator/visibility_control.hpp +++ b/diagnostic_aggregator/include/diagnostic_aggregator/visibility_control.hpp @@ -1,16 +1,36 @@ -// Copyright 2015 Open Source Robotics Foundation, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2020, Karsten Knese + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Willow Garage nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ #ifndef DIAGNOSTIC_AGGREGATOR__VISIBILITY_CONTROL_HPP_ #define DIAGNOSTIC_AGGREGATOR__VISIBILITY_CONTROL_HPP_ diff --git a/diagnostic_aggregator/package.xml b/diagnostic_aggregator/package.xml index 677e04365..a14c049a6 100644 --- a/diagnostic_aggregator/package.xml +++ b/diagnostic_aggregator/package.xml @@ -2,7 +2,7 @@ diagnostic_aggregator - 3.2.0 + 4.0.2 diagnostic_aggregator Austin Hendrix Brice Rebsamen @@ -12,7 +12,7 @@ BSD-3-Clause http://www.ros.org/wiki/diagnostic_aggregator - + Kevin Watts Brice Rebsamen Arne Nordmann @@ -22,6 +22,7 @@ diagnostic_msgs pluginlib + rcl_interfaces rclcpp std_msgs diff --git a/diagnostic_aggregator/src/add_analyzer.cpp b/diagnostic_aggregator/src/add_analyzer.cpp new file mode 100644 index 000000000..42ff2b0aa --- /dev/null +++ b/diagnostic_aggregator/src/add_analyzer.cpp @@ -0,0 +1,110 @@ +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2024, Nobleo Technology + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ + +/**< \author Martin Cornelis */ + +#include + +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/srv/set_parameters_atomically.hpp" +#include "rcl_interfaces/msg/parameter.hpp" + +using namespace std::chrono_literals; + +class AddAnalyzer : public rclcpp::Node +{ +public: + AddAnalyzer() + : Node("add_analyzer_node", "", rclcpp::NodeOptions().allow_undeclared_parameters( + true).automatically_declare_parameters_from_overrides(true)) + { + client_ = this->create_client( + "/analyzers/set_parameters_atomically"); + } + + void send_request() + { + while (!client_->wait_for_service(1s)) { + if (!rclcpp::ok()) { + RCLCPP_ERROR(this->get_logger(), "Interrupted while waiting for the service. Exiting."); + return; + } + RCLCPP_INFO_ONCE(this->get_logger(), "service not available, waiting ..."); + } + auto request = std::make_shared(); + std::map parameters; + + if (!this->get_parameters("", parameters)) { + RCLCPP_ERROR(this->get_logger(), "Failed to retrieve parameters"); + } + for (const auto & [param_name, param] : parameters) { + // Find the suffix + size_t suffix_start = param_name.find_last_of('.'); + // Remove suffix if it exists + if (suffix_start != std::string::npos) { + std::string stripped_param_name = param_name.substr(0, suffix_start); + // Check in map if the stripped param name with the added suffix "path" exists + // This indicates the parameter is part of an analyzer description + if (parameters.count(stripped_param_name + ".path") > 0) { + auto parameter_msg = param.to_parameter_msg(); + request->parameters.push_back(parameter_msg); + } + } + } + + auto result = client_->async_send_request(request); + // Wait for the result. + if (rclcpp::spin_until_future_complete(this->get_node_base_interface(), result) == + rclcpp::FutureReturnCode::SUCCESS) + { + RCLCPP_INFO(this->get_logger(), "Parameters succesfully set"); + } else { + RCLCPP_ERROR(this->get_logger(), "Failed to set parameters"); + } + } + +private: + rclcpp::Client::SharedPtr client_; +}; + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + + auto add_analyzer = std::make_shared(); + add_analyzer->send_request(); + rclcpp::shutdown(); + + return 0; +} diff --git a/diagnostic_aggregator/src/aggregator.cpp b/diagnostic_aggregator/src/aggregator.cpp index d9576c737..9f0079318 100644 --- a/diagnostic_aggregator/src/aggregator.cpp +++ b/diagnostic_aggregator/src/aggregator.cpp @@ -57,9 +57,13 @@ using diagnostic_msgs::msg::DiagnosticStatus; * @todo(anordman): make aggregator a lifecycle node. */ Aggregator::Aggregator() +: Aggregator(rclcpp::NodeOptions()) {} + +Aggregator::Aggregator(rclcpp::NodeOptions options) : n_(std::make_shared( "analyzers", "", - rclcpp::NodeOptions().automatically_declare_parameters_from_overrides(true))), + options.allow_undeclared_parameters(true). + automatically_declare_parameters_from_overrides(true))), logger_(rclcpp::get_logger("Aggregator")), pub_rate_(1.0), history_depth_(1000), @@ -69,6 +73,36 @@ Aggregator::Aggregator() last_top_level_state_(DiagnosticStatus::STALE) { RCLCPP_DEBUG(logger_, "constructor"); + initAnalyzers(); + + diag_sub_ = n_->create_subscription( + "/diagnostics", rclcpp::SystemDefaultsQoS().keep_last(history_depth_), + std::bind(&Aggregator::diagCallback, this, _1)); + agg_pub_ = n_->create_publisher("/diagnostics_agg", 1); + toplevel_state_pub_ = + n_->create_publisher("/diagnostics_toplevel_state", 1); + + int publish_rate_ms = 1000 / pub_rate_; + publish_timer_ = rclcpp::create_timer( + n_, clock_, std::chrono::milliseconds(publish_rate_ms), + std::bind(&Aggregator::publishData, this)); + + param_sub_ = n_->create_subscription( + "/parameter_events", 1, std::bind(&Aggregator::parameterCallback, this, _1)); +} + +void Aggregator::parameterCallback(const rcl_interfaces::msg::ParameterEvent::SharedPtr msg) +{ + if (msg->node == "/" + std::string(n_->get_name())) { + if (msg->new_parameters.size() != 0) { + base_path_ = ""; + initAnalyzers(); + } + } +} + +void Aggregator::initAnalyzers() +{ bool other_as_errors = false; std::map parameters; @@ -101,26 +135,17 @@ Aggregator::Aggregator() RCLCPP_DEBUG( logger_, "Aggregator critical publisher configured to: %s", (critical_ ? "true" : "false")); - analyzer_group_ = std::make_unique(); - if (!analyzer_group_->init(base_path_, "", n_)) { - RCLCPP_ERROR(logger_, "Analyzer group for diagnostic aggregator failed to initialize!"); - } - - // Last analyzer handles remaining data - other_analyzer_ = std::make_unique(other_as_errors); - other_analyzer_->init(base_path_); // This always returns true - - diag_sub_ = n_->create_subscription( - "/diagnostics", rclcpp::SystemDefaultsQoS().keep_last(history_depth_), - std::bind(&Aggregator::diagCallback, this, _1)); - agg_pub_ = n_->create_publisher("/diagnostics_agg", 1); - toplevel_state_pub_ = - n_->create_publisher("/diagnostics_toplevel_state", 1); + { // lock the mutex while analyzer_group_ and other_analyzer_ are being updated + std::lock_guard lock(mutex_); + analyzer_group_ = std::make_unique(); + if (!analyzer_group_->init(base_path_, "", n_)) { + RCLCPP_ERROR(logger_, "Analyzer group for diagnostic aggregator failed to initialize!"); + } - int publish_rate_ms = 1000 / pub_rate_; - publish_timer_ = n_->create_wall_timer( - std::chrono::milliseconds(publish_rate_ms), - std::bind(&Aggregator::publishData, this)); + // Last analyzer handles remaining data + other_analyzer_ = std::make_unique(other_as_errors); + other_analyzer_->init(base_path_, "", n_); // This always returns true + } } void Aggregator::checkTimestamp(const DiagnosticArray::SharedPtr diag_msg) @@ -151,29 +176,12 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg) checkTimestamp(diag_msg); bool analyzed = false; + bool immediate_report = false; { // lock the whole loop to ensure nothing in the analyzer group changes during it. std::lock_guard lock(mutex_); for (auto j = 0u; j < diag_msg->status.size(); ++j) { analyzed = false; - - const bool top_level_state_transition_to_error = - (last_top_level_state_ != DiagnosticStatus::ERROR) && - (diag_msg->status[j].level == DiagnosticStatus::ERROR); - - if (critical_ && top_level_state_transition_to_error) { - RCLCPP_DEBUG( - logger_, "Received error message: %s, publishing error immediately", - diag_msg->status[j].name.c_str()); - DiagnosticStatus diag_toplevel_state; - diag_toplevel_state.name = "toplevel_state_critical"; - diag_toplevel_state.level = diag_msg->status[j].level; - toplevel_state_pub_->publish(diag_toplevel_state); - - // store the last published state - last_top_level_state_ = diag_toplevel_state.level; - } - - auto item = std::make_shared(&diag_msg->status[j]); + auto item = std::make_shared(&diag_msg->status[j], n_->get_clock()); if (analyzer_group_->match(item->getName())) { analyzed = analyzer_group_->analyze(item); @@ -182,8 +190,17 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg) if (!analyzed) { other_analyzer_->analyze(item); } + + // In case there is a degraded state, publish immediately + if (critical_ && item->getLevel() > last_top_level_state_) { + immediate_report = true; + } } } + + if (immediate_report) { + publishData(); + } } Aggregator::~Aggregator() diff --git a/diagnostic_aggregator/src/analyzer_group.cpp b/diagnostic_aggregator/src/analyzer_group.cpp index 0873ac4c5..9a564c93f 100644 --- a/diagnostic_aggregator/src/analyzer_group.cpp +++ b/diagnostic_aggregator/src/analyzer_group.cpp @@ -66,6 +66,7 @@ bool AnalyzerGroup::init( path_ = path; breadcrumb_ = breadcrumb; nice_name_ = path; + clock_ = n->get_clock(); std::map parameters; if (!n->get_parameters(breadcrumb_, parameters)) { @@ -128,7 +129,8 @@ bool AnalyzerGroup::init( RCLCPP_ERROR( logger_, "Failed to load analyzer %s, type %s. Caught exception: %s", ns.c_str(), an_type.c_str(), e.what()); - auto item = std::make_shared(ns, "Pluginlib exception loading analyzer"); + auto item = std::make_shared( + n->get_clock(), ns, "Pluginlib exception loading analyzer"); aux_items_.push_back(item); init_ok = false; continue; @@ -139,7 +141,7 @@ bool AnalyzerGroup::init( logger_, "Pluginlib returned a null analyzer for %s, namespace %s.", an_type.c_str(), n->get_namespace()); std::shared_ptr item( - new StatusItem(ns, "Pluginlib return NULL Analyzer for " + an_type)); + new StatusItem(n->get_clock(), ns, "Pluginlib return NULL Analyzer for " + an_type)); aux_items_.push_back(item); init_ok = false; continue; @@ -158,7 +160,8 @@ bool AnalyzerGroup::init( RCLCPP_ERROR( logger_, "Unable to initialize analyzer NS: %s, type: %s", n->get_namespace(), an_type.c_str()); - std::shared_ptr item(new StatusItem(ns, "Analyzer init failed")); + std::shared_ptr item( + new StatusItem(n->get_clock(), ns, "Analyzer init failed")); aux_items_.push_back(item); init_ok = false; continue; diff --git a/diagnostic_aggregator/src/generic_analyzer.cpp b/diagnostic_aggregator/src/generic_analyzer.cpp index 6fa8efddd..8eef914e6 100644 --- a/diagnostic_aggregator/src/generic_analyzer.cpp +++ b/diagnostic_aggregator/src/generic_analyzer.cpp @@ -56,9 +56,11 @@ GenericAnalyzer::GenericAnalyzer() {} bool GenericAnalyzer::init( const std::string & path, const std::string & breadcrumb, const rclcpp::Node::SharedPtr n) { + node_ = n; path_ = path; breadcrumb_ = breadcrumb; nice_name_ = breadcrumb; + clock_ = n->get_clock(); RCLCPP_DEBUG( rclcpp::get_logger("GenericAnalyzer"), "GenericAnalyzer, breadcrumb: %s", breadcrumb_.c_str()); @@ -114,7 +116,7 @@ bool GenericAnalyzer::init( rclcpp::get_logger("GenericAnalyzer"), "GenericAnalyzer '%s' found expected: %s", nice_name_.c_str(), pvalue.value_to_string().c_str()); for (auto exp : pvalue.as_string_array()) { - auto item = std::make_shared(exp); + auto item = std::make_shared(n->get_clock(), exp); this->addItem(exp, item); } } else if (pname.compare("regex") == 0) { @@ -178,7 +180,8 @@ bool GenericAnalyzer::init( my_path = "/" + my_path; } - return GenericAnalyzerBase::init(path_, breadcrumb_, timeout, num_items_expected, discard_stale); + return GenericAnalyzerBase::init( + path_, breadcrumb_, node_, timeout, num_items_expected, discard_stale); } GenericAnalyzer::~GenericAnalyzer() {} @@ -282,7 +285,7 @@ vector> GenericAnalyzer: // Add missing names to header ... for (unsigned int i = 0; i < expected_names_missing.size(); ++i) { - std::shared_ptr item(new StatusItem(expected_names_missing[i])); + std::shared_ptr item(new StatusItem(node_->get_clock(), expected_names_missing[i])); processed.push_back(item->toStatusMsg(path_, true)); } diff --git a/diagnostic_aggregator/src/status_item.cpp b/diagnostic_aggregator/src/status_item.cpp index 23921301c..5a33844c8 100644 --- a/diagnostic_aggregator/src/status_item.cpp +++ b/diagnostic_aggregator/src/status_item.cpp @@ -45,8 +45,10 @@ using std::string; using rclcpp::get_logger; -StatusItem::StatusItem(const diagnostic_msgs::msg::DiagnosticStatus * status) -: clock_(new rclcpp::Clock()) +StatusItem::StatusItem( + const diagnostic_msgs::msg::DiagnosticStatus * status, + rclcpp::Clock::SharedPtr clock) +: clock_(clock) { level_ = valToLevel(status->level); name_ = status->name; @@ -59,8 +61,12 @@ StatusItem::StatusItem(const diagnostic_msgs::msg::DiagnosticStatus * status) update_time_ = clock_->now(); } -StatusItem::StatusItem(const string item_name, const string message, const DiagnosticLevel level) -: clock_(new rclcpp::Clock()) +StatusItem::StatusItem( + rclcpp::Clock::SharedPtr clock, + const string item_name, + const string message, + const DiagnosticLevel level) +: clock_(clock) { RCLCPP_DEBUG(rclcpp::get_logger("StatusItem"), "StatusItem constructor from string"); name_ = item_name; diff --git a/diagnostic_aggregator/test/add_analyzers.launch.py.in b/diagnostic_aggregator/test/add_analyzers.launch.py.in new file mode 100644 index 000000000..8e4eae8da --- /dev/null +++ b/diagnostic_aggregator/test/add_analyzers.launch.py.in @@ -0,0 +1,74 @@ +import os + +import unittest + +from launch import LaunchDescription +from launch.actions import ExecuteProcess +from launch.events import matches_action +from launch.events.process import ShutdownProcess + +import launch_testing +import launch_testing.actions +import launch_testing.asserts +import launch_testing.util +import launch_testing_ros + + +def generate_test_description(): + os.environ['OSPL_VERBOSITY'] = '8' + os.environ['RCUTILS_CONSOLE_OUTPUT_FORMAT'] = '{message}' + + aggregator_node = ExecuteProcess( + cmd=[ + "@AGGREGATOR_NODE@", + "--ros-args", + "--params-file", + "@PARAMETER_FILE@" + ], + name='aggregator_node', + emulate_tty=True, + output='screen') + + add_analyzer = ExecuteProcess( + cmd=[ + "@ADD_ANALYZER@", + "--ros-args", + "--params-file", + "@ADD_PARAMETER_FILE@" + ], + name='add_analyzer', + emulate_tty=True, + output='screen') + + launch_description = LaunchDescription() + launch_description.add_action(aggregator_node) + launch_description.add_action(add_analyzer) + launch_description.add_action(launch_testing.util.KeepAliveProc()) + launch_description.add_action(launch_testing.actions.ReadyToTest()) + return launch_description, locals() + +class TestAggregator(unittest.TestCase): + + def test_processes_output(self, proc_output, aggregator_node): + """Check aggregator logging output for expected strings.""" + + from launch_testing.tools.output import get_default_filtered_prefixes + output_filter = launch_testing_ros.tools.basic_output_filter( + filtered_prefixes=get_default_filtered_prefixes() + ['service not available, waiting...'], + filtered_rmw_implementation='@rmw_implementation@' + ) + proc_output.assertWaitFor( + expected_output=launch_testing.tools.expected_output_from_file(path="@EXPECTED_OUTPUT@"), + process=aggregator_node, + output_filter=output_filter, + timeout=15 + ) + + import time + time.sleep(1) + +@launch_testing.post_shutdown_test() +class TestAggregatorShutdown(unittest.TestCase): + + def test_last_process_exit_code(self, proc_info, aggregator_node): + launch_testing.asserts.assertExitCodes(proc_info, process=aggregator_node) diff --git a/diagnostic_aggregator/test/all_analyzers.yaml b/diagnostic_aggregator/test/all_analyzers.yaml index 84b330e34..4cb012a83 100644 --- a/diagnostic_aggregator/test/all_analyzers.yaml +++ b/diagnostic_aggregator/test/all_analyzers.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: ros__parameters: path: BASIC prefix1: diff --git a/diagnostic_aggregator/test/analyzer_group.yaml b/diagnostic_aggregator/test/analyzer_group.yaml index 72bb5639d..14c938fff 100644 --- a/diagnostic_aggregator/test/analyzer_group.yaml +++ b/diagnostic_aggregator/test/analyzer_group.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: ros__parameters: path: TEST primary: diff --git a/diagnostic_aggregator/test/default.yaml b/diagnostic_aggregator/test/default.yaml new file mode 100644 index 000000000..2da82b92f --- /dev/null +++ b/diagnostic_aggregator/test/default.yaml @@ -0,0 +1,9 @@ +/**: + ros__parameters: + path: BASIC + prefix0: + type: diagnostic_aggregator/GenericAnalyzer + path: Zeroth + contains: [ + 'contain0a', + 'contain0b' ] \ No newline at end of file diff --git a/diagnostic_aggregator/test/empty_root_path.yaml b/diagnostic_aggregator/test/empty_root_path.yaml index 391de4e99..b4b25509f 100644 --- a/diagnostic_aggregator/test/empty_root_path.yaml +++ b/diagnostic_aggregator/test/empty_root_path.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: ros__parameters: primary: type: 'diagnostic_aggregator/AnalyzerGroup' diff --git a/diagnostic_aggregator/test/expected_output/add_all_analyzers.txt b/diagnostic_aggregator/test/expected_output/add_all_analyzers.txt new file mode 100644 index 000000000..9c5c2fc23 --- /dev/null +++ b/diagnostic_aggregator/test/expected_output/add_all_analyzers.txt @@ -0,0 +1,6 @@ +/BASIC/Zeroth +prefix0 +/BASIC/First +prefix1 +/BASIC/Third +prefix3 \ No newline at end of file diff --git a/diagnostic_aggregator/test/expected_stale_analyzers.yaml b/diagnostic_aggregator/test/expected_stale_analyzers.yaml index 9110f84d6..9546204d5 100644 --- a/diagnostic_aggregator/test/expected_stale_analyzers.yaml +++ b/diagnostic_aggregator/test/expected_stale_analyzers.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: my_path: type: diagnostic_aggregator/GenericAnalyzer path: My Path diff --git a/diagnostic_aggregator/test/multiple_match_analyzers.yaml b/diagnostic_aggregator/test/multiple_match_analyzers.yaml index 46153c6a7..3f0ec91f4 100644 --- a/diagnostic_aggregator/test/multiple_match_analyzers.yaml +++ b/diagnostic_aggregator/test/multiple_match_analyzers.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: my_path: type: diagnostic_aggregator/GenericAnalyzer path: Header1 diff --git a/diagnostic_aggregator/test/primitive_analyzers.yaml b/diagnostic_aggregator/test/primitive_analyzers.yaml index 9601cce77..fc98e2ee8 100644 --- a/diagnostic_aggregator/test/primitive_analyzers.yaml +++ b/diagnostic_aggregator/test/primitive_analyzers.yaml @@ -1,4 +1,4 @@ -analyzers: +/**: ros__parameters: log_level: debug primary: diff --git a/diagnostic_aggregator/test/test_critical_pub.py b/diagnostic_aggregator/test/test_critical_pub.py index 279e82957..adad7ed7d 100644 --- a/diagnostic_aggregator/test/test_critical_pub.py +++ b/diagnostic_aggregator/test/test_critical_pub.py @@ -67,41 +67,56 @@ def publish_message(self, level): rclpy.spin_once(self.node) return self.node.get_clock().now() - def test_critical_publisher(self): + def critical_publisher_test( + self, initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR + ): # Publish the ok message and wait till the toplevel state is received - state = DiagnosticStatus.OK - time_0 = self.publish_message(state) + time_0 = self.publish_message(initial_state) - assert (self.received_state[0] == state), \ + assert (self.received_state[0] == initial_state), \ ('Received state is not the same as the sent state:' - + f"'{self.received_state[0]}' != '{state}'") + + f"'{self.received_state[0]}' != '{initial_state}'") self.received_state.clear() # Publish the ok message and expect the toplevel state after 1 second period - time_1 = self.publish_message(state) + time_1 = self.publish_message(initial_state) assert (time_1 - time_0 > rclpy.duration.Duration(seconds=0.99)), \ 'OK message received too early' - assert (self.received_state[0] == state), \ + assert (self.received_state[0] == initial_state), \ ('Received state is not the same as the sent state:' - + f"'{self.received_state[0]}' != '{state}'") + + f"'{self.received_state[0]}' != '{initial_state}'") self.received_state.clear() # Publish the message and expect the critical error message immediately - state = DiagnosticStatus.ERROR - time_2 = self.publish_message(state) + time_2 = self.publish_message(new_state) assert (time_2 - time_1 < rclpy.duration.Duration(seconds=0.1)), \ 'Critical error message not received within 0.1 second' - assert (self.received_state[0] == state), \ + assert (self.received_state[0] == new_state), \ ('Received state is not the same as the sent state:' - + f"'{self.received_state[0]}' != '{state}'") + + f"'{self.received_state[0]}' != '{new_state}'") self.received_state.clear() # Next error message should be sent at standard 1 second rate - time_3 = self.publish_message(state) + time_3 = self.publish_message(new_state) assert (time_3 - time_1 > rclpy.duration.Duration(seconds=0.99)), \ 'Periodic error message received too early' - assert (self.received_state[0] == state), \ + assert (self.received_state[0] == new_state), \ ('Received state is not the same as the sent state:' - + f"'{self.received_state[0]}' != '{state}'") + + f"'{self.received_state[0]}' != '{new_state}'") + + def test_critical_publisher_ok_error(self): + self.critical_publisher_test( + initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR + ) + + def test_critical_publisher_ok_warn(self): + self.critical_publisher_test( + initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.WARN + ) + + def test_critical_publisher_warn_error(self): + self.critical_publisher_test( + initial_state=DiagnosticStatus.WARN, new_state=DiagnosticStatus.ERROR + ) diff --git a/diagnostic_aggregator/test/test_discard_behavior.py b/diagnostic_aggregator/test/test_discard_behavior.py index 16dee0162..18112b0eb 100644 --- a/diagnostic_aggregator/test/test_discard_behavior.py +++ b/diagnostic_aggregator/test/test_discard_behavior.py @@ -1,16 +1,34 @@ -# Copyright 2023 Open Source Robotics Foundation, Inc. +# Software License Agreement (BSD License) # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Copyright (c) 2024, Andrew Symington +# All rights reserved. # -# http://www.apache.org/licenses/LICENSE-2.0 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. # # DESCRIPTION # This test ensures that a parent AnalyzerGroup does not roll up an ERROR state when a diff --git a/diagnostic_aggregator/test/test_listener.py b/diagnostic_aggregator/test/test_listener.py index 2d8de0738..838d372a7 100644 --- a/diagnostic_aggregator/test/test_listener.py +++ b/diagnostic_aggregator/test/test_listener.py @@ -1,16 +1,34 @@ -# Copyright 2016 Open Source Robotics Foundation, Inc. +# Software License Agreement (BSD License) # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Copyright (c) 2020, Arne Nordmann +# All rights reserved. # -# http://www.apache.org/licenses/LICENSE-2.0 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. import os diff --git a/diagnostic_common_diagnostics/CHANGELOG.rst b/diagnostic_common_diagnostics/CHANGELOG.rst index 0c53e9ca1..347d6587b 100644 --- a/diagnostic_common_diagnostics/CHANGELOG.rst +++ b/diagnostic_common_diagnostics/CHANGELOG.rst @@ -2,6 +2,27 @@ Changelog for package diagnostic_common_diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +4.0.2 (2025-02-10) +------------------ +* common_diagnostics cleaned hostname string (`#405 `_) (`#419 `_) + * Hostnames are properly cleaned to only contain alphanumeric characters or underscore. + Co-authored-by: sjusner +* Add missing rclpy dependency to common_diagnostics to fix rosdoc2 output (`#402 `_) (`#406 `_) + Co-authored-by: R Kent James +* [ros2-humble] Port hd_monitor to ROS2 (`#334 `_) (`#381 `_) + * Port hd_monitor to ROS2 (`#334 `_) + Co-authored-by: Antoine Lima <7421319+limaanto@users.noreply.github.com> +* Contributors: Christian Henkel + +3.2.1 (2024-06-27) +------------------ +* refactor(sensors_monitor): ros2 port `#339 `_ (`#365 `_) +* refactor(ram_monitor): ros2 port (`#338 `_) +* NTP monitor improvements (`#342 `_) (`#350 `_) +* Using ubuntu ntp server in systemtest (`#346 `_) (`#347 `_) +* Fixing ntp launchtest (`#330 `_) +* Contributors: Christian Henkel, Rein Appeldoorn + 3.2.0 (2024-03-22) ------------------ * Port cpu_monitor to ROS2 (`#326 `_) diff --git a/diagnostic_common_diagnostics/CMakeLists.txt b/diagnostic_common_diagnostics/CMakeLists.txt index e62c86ec0..261253671 100644 --- a/diagnostic_common_diagnostics/CMakeLists.txt +++ b/diagnostic_common_diagnostics/CMakeLists.txt @@ -10,6 +10,9 @@ ament_python_install_package(${PROJECT_NAME}) install(PROGRAMS ${PROJECT_NAME}/cpu_monitor.py ${PROJECT_NAME}/ntp_monitor.py + ${PROJECT_NAME}/ram_monitor.py + ${PROJECT_NAME}/sensors_monitor.py + ${PROJECT_NAME}/hd_monitor.py DESTINATION lib/${PROJECT_NAME} ) @@ -27,6 +30,10 @@ if(BUILD_TESTING) test/systemtest/test_ntp_monitor_launchtest.py TARGET ntp_monitor_launchtest TIMEOUT 20) + add_launch_test( + test/systemtest/test_hd_monitor_launchtest.py + TARGET hd_monitor_launchtest + TIMEOUT 20) endif() ament_package() diff --git a/diagnostic_common_diagnostics/README.md b/diagnostic_common_diagnostics/README.md index 42df1c8dc..40857d353 100644 --- a/diagnostic_common_diagnostics/README.md +++ b/diagnostic_common_diagnostics/README.md @@ -67,13 +67,67 @@ Computer name in diagnostics output (ex: 'c1') Disable self test. ## hd_monitor.py -**To be ported** +Runs 'shutil.disk_usage' to check if there is enough space left on a given device. With default parameters, the following thresholds are used: +* Above 5% of free space left, an `OK` status will be published. +* Between 5% and 1%, a `WARN` status will be published, +* Below 1%, an `ERROR` status will be published. + +### Published Topics +#### /diagnostics +diagnostic_msgs/DiagnosticArray +The diagnostics information. + +### Parameters +#### path +(default: home directory "~") +Path in which to check remaining space. + +#### free_percent_low +(default: 5%) +Warning threshold. + +#### free_percent_crit +(default: 1%) +Error threshold. ## ram_monitor.py -**To be ported** +The `ram_monitor` module allows users to monitor the RAM usage of their system in real-time. +It publishes the usage percentage in a diagnostic message. + +* Name of the node is "ram_monitor_" + hostname. +* Uses the following args: + * warning_percentage: If the RAM usage is > warning_percentage, a WARN status will be published. + * window: the maximum length of the used collections.deque for queuing RAM readings. + +### Published Topics +#### /diagnostics +diagnostic_msgs/DiagnosticArray +The diagnostics information. + +### Parameters +#### warning_percentage +(default: 90) +warning percentage threshold. + +#### window +(default: 1) +Length of RAM readings queue. ## sensors_monitor.py -**To be ported** +The `sensors_monitor` module allows users to monitor the temperature, volt and fan speeds of their system in real-time. +It uses the [`LM Sensors` package](https://packages.debian.org/sid/utils/lm-sensors) to get the data. + +* Name of the node is "sensors_monitor_" + hostname. + +### Published Topics +#### /diagnostics +diagnostic_msgs/DiagnosticArray +The diagnostics information. + +### Parameters +#### ignore_fans +(default: false) +Whether to ignore the fan speed. ## tf_monitor.py **To be ported** diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py index 866629572..32dd60eb3 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py @@ -92,7 +92,11 @@ def main(args=None): # Create the node hostname = socket.gethostname() - node = Node(f'cpu_monitor_{hostname.replace("-", "_")}') + # Every invalid symbol is replaced by underscore. + # isalnum() alone also allows invalid symbols depending on the locale + cleaned_hostname = ''.join( + c if (c.isascii() and c.isalnum()) else '_' for c in hostname) + node = Node(f'cpu_monitor_{cleaned_hostname}') # Declare and get parameters node.declare_parameter('warning_percentage', 90) diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py new file mode 100755 index 000000000..14b968a71 --- /dev/null +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py @@ -0,0 +1,164 @@ +#! /usr/bin/env python3 +"""Hard Drive (or any other memory) monitor. Contains a the monitor node and its main function.""" +# -*- coding: utf-8 -*- +# +# Software License Agreement (BSD License) +# +# Copyright (c) 2009, Willow Garage, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# \author Kevin Watts +# \author Antoine Lima + +from pathlib import Path +from shutil import disk_usage +from socket import gethostname +from typing import List + +from diagnostic_msgs.msg import DiagnosticStatus, KeyValue +from diagnostic_updater import Updater +from rcl_interfaces.msg import ParameterDescriptor, SetParametersResult +import rclpy +from rclpy.node import Node + + +FREE_PERCENT_LOW = 5 +FREE_PERCENT_CRIT = 1 +DICT_STATUS = { + DiagnosticStatus.OK: 'OK', + DiagnosticStatus.WARN: 'Warning', + DiagnosticStatus.ERROR: 'Error', +} +DICT_USAGE = { + DiagnosticStatus.OK: 'OK', + DiagnosticStatus.WARN: 'Low Disk Space', + DiagnosticStatus.ERROR: 'Very Low Disk Space', +} + + +class HDMonitor(Node): + """ + Diagnostic node checking the remaining space on the specified hard drive. + + Three ROS parameters: + - path: Path on the filesystem to check (string, default: home directory) + - free_percent_low: Percentage at which to consider the space left as low + - free_percent_crit: Percentage at which to consider the space left as critical + """ + + def __init__(self): + hostname = gethostname() + # Every invalid symbol is replaced by underscore. + # isalnum() alone also allows invalid symbols depending on the locale + cleaned_hostname = ''.join( + c if (c.isascii() and c.isalnum()) else '_' for c in hostname) + super().__init__(f'hd_monitor_{cleaned_hostname}') + + self._path = '~' + self._free_percent_low = FREE_PERCENT_LOW + self._free_percent_crit = FREE_PERCENT_CRIT + + self.add_on_set_parameters_callback(self.callback_config) + self.declare_parameter('path', self._path, ParameterDescriptor( + description='Path in which to check remaining space.')) + self.declare_parameter( + 'free_percent_low', self._free_percent_low, ParameterDescriptor( + description='Warning threshold.', type=int())) + self.declare_parameter( + 'free_percent_crit', self._free_percent_crit, ParameterDescriptor( + description='Error threshold.', type=int())) + + self._updater = Updater(self) + self._updater.setHardwareID(hostname) + self._updater.add(f'{hostname} HD Usage', self.check_disk_usage) + + def callback_config(self, params: List[rclpy.Parameter]): + """ + Retrieve ROS parameters. + + see the class documentation for declared parameters. + """ + for param in params: + if param.name == 'path': + self._path = str( + Path(param.value).expanduser().resolve(strict=True) + ) + elif param.name == 'free_percent_low': + self._free_percent_low = param.value + elif param.name == 'free_percent_crit': + self._free_percent_crit = param.value + + return SetParametersResult(successful=True) + + def check_disk_usage(self, diag: DiagnosticStatus) -> DiagnosticStatus: + """ + Compute the disk usage and derive a status from it. + + Task periodically ran by the diagnostic updater. + """ + diag.level = DiagnosticStatus.OK + + total, _, free = disk_usage(self._path) + percent = free / total * 100.0 + + if percent > self._free_percent_low: + diag.level = DiagnosticStatus.OK + elif percent > self._free_percent_crit: + diag.level = DiagnosticStatus.WARN + else: + diag.level = DiagnosticStatus.ERROR + + total_go = total // (1024 * 1024) + diag.values.extend( + [ + KeyValue(key='Name', value=self._path), + KeyValue(key='Status', value=DICT_STATUS[diag.level]), + KeyValue(key='Total (Go)', value=str(total_go)), + KeyValue(key='Available (%)', value=str(round(percent, 1))), + ] + ) + + diag.message = DICT_USAGE[diag.level] + return diag + + +def main(args=None): + """Run the HDMonitor class.""" + rclpy.init(args=args) + + node = HDMonitor() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + + +if __name__ == '__main__': + main() diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ntp_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ntp_monitor.py index 9462cebb3..461d17bde 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ntp_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ntp_monitor.py @@ -37,9 +37,7 @@ import threading import diagnostic_updater as DIAG - import ntplib - import rclpy from rclpy.node import Node @@ -52,6 +50,8 @@ def __init__(self, ntp_hostname, ntp_port, offset=500, self_offset=500, do_self_test=True): """Initialize the NTPMonitor.""" super().__init__(__class__.__name__) + self.declare_parameter('frequency', 10.0) + frequency = self.get_parameter('frequency').get_parameter_value().double_value self.ntp_hostname = ntp_hostname self.ntp_port = ntp_port @@ -87,8 +87,8 @@ def __init__(self, ntp_hostname, ntp_port, offset=500, self_offset=500, # we need to periodically republish this self.current_msg = None - self.pubtimer = self.create_timer(0.1, self.pubCB) - self.checktimer = self.create_timer(0.1, self.checkCB) + self.pubtimer = self.create_timer(1/frequency, self.pubCB) + self.checktimer = self.create_timer(1/frequency, self.checkCB) def pubCB(self): with self.mutex: @@ -97,6 +97,7 @@ def pubCB(self): def checkCB(self): new_msg = DIAG.DiagnosticArray() + new_msg.header.stamp = self.get_clock().now().to_msg() st = self.ntp_diag(self.stat) if st is not None: @@ -145,23 +146,23 @@ def add_kv(stat_values, key, value): if (abs(measured_offset) > self.offset): st.level = DIAG.DiagnosticStatus.WARN st.message = \ - f'NTP offset above threshold: {measured_offset}>'\ + f'NTP offset above threshold: abs({measured_offset})>'\ f'{self.offset} us' if (abs(measured_offset) > self.error_offset): st.level = DIAG.DiagnosticStatus.ERROR st.message = \ - f'NTP offset above error threshold: {measured_offset}>'\ + f'NTP offset above error threshold: abs({measured_offset})>'\ f'{self.error_offset} us' if (abs(measured_offset) < self.offset): st.level = DIAG.DiagnosticStatus.OK - st.message = f'NTP Offset OK: {measured_offset} us' + st.message = f'NTP Offset OK: abs({measured_offset}) us' return st def ntp_monitor_main(argv=sys.argv[1:]): # filter out ROS args - argv = [a for a in argv if not a.startswith('__') and not a == '--ros-args' and not a == '-r'] + argv = argv[:argv.index('--ros-args')] if '--ros-args' in argv else argv import argparse parser = argparse.ArgumentParser() diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py new file mode 100755 index 000000000..da59a6d25 --- /dev/null +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# +# Software License Agreement (BSD License) +# +# Copyright (c) 2017, TNO IVS, Helmond, Netherlands +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the TNO IVS nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# \author Rein Appeldoorn + +import collections +import socket + +from diagnostic_msgs.msg import DiagnosticStatus + +from diagnostic_updater import DiagnosticTask, Updater + +import psutil + +import rclpy + + +class RamTask(DiagnosticTask): + + def __init__(self, warning_percentage, window): + DiagnosticTask.__init__(self, 'RAM Information') + self._warning_percentage = int(warning_percentage) + self._readings = collections.deque(maxlen=window) + + def run(self, stat): + self._readings.append(psutil.virtual_memory().percent) + ram_average = sum(self._readings) / len(self._readings) + + stat.add('RAM Load Average', f'{ram_average:.2f}') + + if ram_average > self._warning_percentage: + stat.summary( + DiagnosticStatus.WARN, + f'RAM Average exceeds {self._warning_percentage:d} percent', + ) + else: + stat.summary(DiagnosticStatus.OK, f'RAM Average {ram_average:.2f} percent') + + return stat + + +def main(): + hostname = socket.gethostname() + # Every invalid symbol is replaced by underscore. + # isalnum() alone also allows invalid symbols depending on the locale + cleaned_hostname = ''.join( + c if (c.isascii() and c.isalnum()) else '_' for c in hostname) + rclpy.init() + node = rclpy.create_node(f'ram_monitor_{cleaned_hostname}') + + updater = Updater(node) + updater.setHardwareID(hostname) + updater.add( + RamTask( + node.declare_parameter('warning_percentage', 90).value, + node.declare_parameter('window', 1).value, + ) + ) + + rclpy.spin(node) + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/sensors_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/sensors_monitor.py new file mode 100755 index 000000000..8bcd1aa9a --- /dev/null +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/sensors_monitor.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 + +# Software License Agreement (BSD License) +# +# Copyright (c) 2012, Willow Garage, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from __future__ import division, with_statement + +from io import StringIO +import math +import re +import socket +import subprocess + +from diagnostic_msgs.msg import DiagnosticStatus + +import diagnostic_updater as DIAG + +import rclpy +from rclpy.node import Node + + +class Sensor(object): + + def __init__(self): + self.critical = None + self.min = None + self.max = None + self.input = None + self.name = None + self.type = None + self.high = None + self.alarm = None + + def __repr__(self): + return 'Sensor object (name: {}, type: {})'.format(self.name, self.type) + + def getCrit(self): + return self.critical + + def getMin(self): + return self.min + + def getMax(self): + return self.max + + def getInput(self): + return self.input + + def getName(self): + return self.name + + def getType(self): + return self.type + + def getHigh(self): + return self.high + + def getAlarm(self): + return self.alarm + + def __str__(self): + lines = [] + lines.append(str(self.name)) + lines.append('\t' + 'Type: ' + str(self.type)) + if self.input: + lines.append('\t' + 'Input: ' + str(self.input)) + if self.min: + lines.append('\t' + 'Min: ' + str(self.min)) + if self.max: + lines.append('\t' + 'Max: ' + str(self.max)) + if self.high: + lines.append('\t' + 'High: ' + str(self.high)) + if self.critical: + lines.append('\t' + 'Crit: ' + str(self.critical)) + lines.append('\t' + 'Alarm: ' + str(self.alarm)) + return '\n'.join(lines) + + +def parse_sensor_line(line): + sensor = Sensor() + line = line.lstrip() + [name, reading] = line.split(':') + + try: + [sensor.name, sensor.type] = name.rsplit(' ', 1) + except ValueError: + return None + + if sensor.name == 'Core': + sensor.name = name + sensor.type = 'Temperature' + elif sensor.name.find('Physical id') != -1: + sensor.name = name + sensor.type = 'Temperature' + + try: + [reading, params] = reading.lstrip().split('(') + except ValueError: + return None + + sensor.alarm = False + if line.find('ALARM') != -1: + sensor.alarm = True + + if reading.find('°C') == -1: + sensor.input = float(reading.split()[0]) + else: + sensor.input = float(reading.split('°C')[0]) + + params = params.split(',') + for param in params: + m = re.search('[0-9]+.[0-9]*', param) + if param.find('min') != -1: + sensor.min = float(m.group(0)) + elif param.find('max') != -1: + sensor.max = float(m.group(0)) + elif param.find('high') != -1: + sensor.high = float(m.group(0)) + elif param.find('crit') != -1: + sensor.critical = float(m.group(0)) + + return sensor + + +def _rads_to_rpm(rads): + return rads / (2 * math.pi) * 60 + + +def _rpm_to_rads(rpm): + return rpm * (2 * math.pi) / 60 + + +def parse_sensors_output(node: Node, output): + out = StringIO(output if isinstance(output, str) else output.decode('utf-8')) + + sensorList = [] + for line in out.readlines(): + # Check for a colon + if ':' in line and 'Adapter' not in line: + s = None + try: + s = parse_sensor_line(line) + except Exception as exc: + node.get_logger().warn( + 'Unable to parse line "%s", due to %s', line, exc + ) + if s is not None: + sensorList.append(s) + return sensorList + + +def get_sensors(): + p = subprocess.Popen( + 'sensors', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) + (o, e) = p.communicate() + if not p.returncode == 0: + return '' + if not o: + return '' + return o + + +class SensorsMonitor(object): + + def __init__(self, node: Node, hostname): + self.node = node + self.hostname = hostname + self.ignore_fans = node.declare_parameter('ignore_fans', False).value + node.get_logger().info('Ignore fanspeed warnings: %s' % self.ignore_fans) + + self.updater = DIAG.Updater(node) + self.updater.setHardwareID('none') + self.updater.add('%s Sensor Status' % self.hostname, self.monitor) + + def monitor(self, stat): + try: + stat.summary(DiagnosticStatus.OK, 'OK') + for sensor in parse_sensors_output(self.node, get_sensors()): + if sensor.getType() == 'Temperature': + if sensor.getInput() > sensor.getCrit(): + stat.mergeSummary( + DiagnosticStatus.ERROR, 'Critical Temperature' + ) + elif sensor.getInput() > sensor.getHigh(): + stat.mergeSummary(DiagnosticStatus.WARN, 'High Temperature') + stat.add( + ' '.join([sensor.getName(), sensor.getType()]), + str(sensor.getInput()), + ) + elif sensor.getType() == 'Voltage': + if sensor.getInput() < sensor.getMin(): + stat.mergeSummary(DiagnosticStatus.ERROR, 'Low Voltage') + elif sensor.getInput() > sensor.getMax(): + stat.mergeSummary(DiagnosticStatus.ERROR, 'High Voltage') + stat.add( + ' '.join([sensor.getName(), sensor.getType()]), + str(sensor.getInput()), + ) + elif sensor.getType() == 'Speed': + if not self.ignore_fans: + if sensor.getInput() < sensor.getMin(): + stat.mergeSummary(DiagnosticStatus.ERROR, 'No Fan Speed') + stat.add( + ' '.join([sensor.getName(), sensor.getType()]), + str(sensor.getInput()), + ) + except Exception: + import traceback + + self.node.get_logger().error('Unable to process lm-sensors data') + self.node.get_logger().error(traceback.format_exc()) + return stat + + +if __name__ == '__main__': + rclpy.init() + hostname = socket.gethostname() + # Every invalid symbol is replaced by underscore. + # isalnum() alone also allows invalid symbols depending on the locale + cleaned_hostname = ''.join( + c if (c.isascii() and c.isalnum()) else '_' for c in hostname) + node = rclpy.create_node('sensors_monitor_%s' % cleaned_hostname) + + monitor = SensorsMonitor(node, hostname) + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass diff --git a/diagnostic_common_diagnostics/mainpage.dox b/diagnostic_common_diagnostics/mainpage.dox index 7aa872cef..f9cd677de 100644 --- a/diagnostic_common_diagnostics/mainpage.dox +++ b/diagnostic_common_diagnostics/mainpage.dox @@ -5,6 +5,7 @@ \b diagnostic_common_diagnostics contains a few common diagnostic nodes - cpu_monitor publishes diagnostic messages with the CPU usage of the system. +- hd_monitor publishes diagnostic messages related to the available space on a given storage device. - ntp_monitor publishes diagnostic messages for how well the NTP time sync is working. - tf_monitor used to publish diagnostic messages reporting on the health of the TF tree. It is based on tfwtf. It is not ported to ROS2. diff --git a/diagnostic_common_diagnostics/package.xml b/diagnostic_common_diagnostics/package.xml index 8496593a0..7d7742f68 100644 --- a/diagnostic_common_diagnostics/package.xml +++ b/diagnostic_common_diagnostics/package.xml @@ -2,7 +2,7 @@ diagnostic_common_diagnostics - 3.2.0 + 4.0.2 diagnostic_common_diagnostics Austin Hendrix Brice Rebsamen @@ -19,8 +19,10 @@ ament_cmake_python diagnostic_updater + lm-sensors python3-ntplib python3-psutil + rclpy ament_lint_auto