-
Notifications
You must be signed in to change notification settings - Fork 249
Description
Hi Rclpy Team,
Please see self-contained code example below which shows a simplified case of the issue. I am on Ubuntu Noble with Jazzy Distro
Here is a simple node that publishes to /clock
#!/usr/bin/env python3
import rclpy
from rclpy.node import Node
from rosgraph_msgs.msg import Clock
class ClockPublisher(Node):
def __init__(self):
super().__init__('clock_publisher')
self.publisher_ = self.create_publisher(Clock, '/clock', 10)
publish_hz = 10
self.timer = self.create_timer(1/publish_hz, self.timer_callback)
self.get_logger().info('Publishing rosgraph_msgs/Clock to /clock')
def timer_callback(self):
now = self.get_clock().now().to_msg()
clock_msg = Clock(clock=now)
self.publisher_.publish(clock_msg)
self.get_logger().debug(f'Published Clock: {clock_msg.clock.sec}.{clock_msg.clock.nanosec:09d}')
def main(args=None):
rclpy.init(args=args)
node = ClockPublisher()
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()
Outputs
(venv) bam@bam-GPU:~/bam_ws/src$ ros2 topic hz /clock
average rate: 10.001
min: 0.100s max: 0.100s std dev: 0.00016s window: 11
average rate: 10.001
min: 0.100s max: 0.100s std dev: 0.00014s window: 22
average rate: 10.001
min: 0.100s max: 0.100s std dev: 0.00014s window: 33
average rate: 10.001
min: 0.100s max: 0.100s std dev: 0.00014s window: 43
average rate: 10.000
min: 0.100s max: 0.100s std dev: 0.00013s window: 53
(venv) bam@bam-GPU:~/bam_ws/src$ ros2 topic echo /clock
clock:
sec: 1746021058
nanosec: 425310669
---
clock:
sec: 1746021058
nanosec: 525296735
---
clock:
sec: 1746021058
nanosec: 625305596
Here is a MultiThreadedExecutor
that subscribes to the /clock
and also internally runs a timer at 10hz
#!/usr/bin/env python3
import rclpy
from rclpy.node import Node
from rclpy.executors import MultiThreadedExecutor
from rosgraph_msgs.msg import Clock
import time
from collections import deque
import numpy as np
class ClockSubscriberWithTimer(Node):
def __init__(self):
super().__init__('clock_sub_with_timer')
# Subscribe to /clock
if True:
self.subscription = self.create_subscription(
Clock,
'/clock',
self.clock_callback,
10
)
# Timer
timer_hz = 10
self.timer_period = 1/timer_hz # seconds
self.timer = self.create_timer(self.timer_period, self.timer_callback)
# Period tracking
self.timer_periods = deque(maxlen=10)
self.clock_periods = deque(maxlen=10)
# Last timestamps
self.last_timer_time = time.time()
self.last_clock_time = time.time()
# Throttle
self.log_throttle_sec = 5.0
self.get_logger().info("Node started with multithreaded executor")
def timer_callback(self):
now = time.time()
period = now - self.last_timer_time
self.last_timer_time = now
self.timer_periods.append(period)
if len(self.timer_periods) < 5:
return
avg = np.average(self.timer_periods)
std = np.std(self.timer_periods)
freq = 1.0 / avg if avg > 0 else float('inf')
self.get_logger().info(
f"[timer] freq avg: {freq:.2f} Hz | period avg: {avg * 1000:.1f} ms ± {std * 1000:.1f} ms",
throttle_duration_sec=self.log_throttle_sec
)
def clock_callback(self, msg: Clock):
now = time.time()
period = now - self.last_clock_time
self.last_clock_time = now
self.clock_periods.append(period)
if len(self.clock_periods) < 5:
return
avg = np.average(self.clock_periods)
std = np.std(self.clock_periods)
freq = 1.0 / avg if avg > 0 else float('inf')
self.get_logger().info(
f"[clock] freq avg: {freq:.2f} Hz | period avg: {avg * 1000:.1f} ms ± {std * 1000:.1f} ms",
throttle_duration_sec=self.log_throttle_sec
)
def main(args=None):
rclpy.init(args=args)
node = ClockSubscriberWithTimer()
executor = MultiThreadedExecutor(num_threads=4)
executor.add_node(node)
try:
executor.spin()
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
exit(0)
if __name__ == '__main__':
main()
Outputs
venv(venv) bam@bam-GPU:~/bam_ws/src$ /home/bam/bam_ws/venv/bin/python /home/bam/bam_ws/src/bam_dev/tests/multithreaded_node.py
[INFO] [1746022020.321751585] [clock_sub_with_timer]: Node started with multithreaded executor
[INFO] [1746022020.741687309] [clock_sub_with_timer]: [clock] freq avg: 11.64 Hz | period avg: 85.9 ms ± 36.0 ms
[INFO] [1746022020.811861409] [clock_sub_with_timer]: [timer] freq avg: 9.99 Hz | period avg: 100.1 ms ± 0.1 ms
[INFO] [1746022026.015217911] [clock_sub_with_timer]: [clock] freq avg: 4.90 Hz | period avg: 204.1 ms ± 408.7 ms
[INFO] [1746022026.018357355] [clock_sub_with_timer]: [timer] freq avg: 2.55 Hz | period avg: 392.0 ms ± 476.3 ms
[INFO] [1746022031.116420786] [clock_sub_with_timer]: [clock] freq avg: 8.12 Hz | period avg: 123.2 ms ± 167.2 ms
[INFO] [1746022031.117956087] [clock_sub_with_timer]: [timer] freq avg: 3.33 Hz | period avg: 300.6 ms ± 286.7 ms
[INFO] [1746022036.654749105] [clock_sub_with_timer]: [clock] freq avg: 5.08 Hz | period avg: 196.8 ms ± 390.3 ms
[INFO] [1746022036.658352785] [clock_sub_with_timer]: [timer] freq avg: 3.72 Hz | period avg: 268.8 ms ± 357.0 ms
[INFO] [1746022041.711617864] [clock_sub_with_timer]: [timer] freq avg: 2.44 Hz | period avg: 410.0 ms ± 428.7 ms
Now... if I unsubscribe from the clock topic
class ClockSubscriberWithTimer(Node):
def __init__(self):
super().__init__('clock_sub_with_timer')
# Subscribe to /clock
if False:
self.subscription = self.create_subscription(
Clock,
'/clock',
self.clock_callback,
10
)
The timer works basically perfectly!
venv(venv) bam@bam-GPU:~/bam_ws/src$ /home/bam/bam_ws/venv/bin/python /home/bam/bam_ws/src/bam_dev/tests/multithreaded_node.py
[INFO] [1746022154.717450586] [clock_sub_with_timer]: Node started with multithreaded executor
[INFO] [1746022155.208188099] [clock_sub_with_timer]: [timer] freq avg: 9.99 Hz | period avg: 100.1 ms ± 0.2 ms
[INFO] [1746022160.307643213] [clock_sub_with_timer]: [timer] freq avg: 10.00 Hz | period avg: 100.0 ms ± 0.1 ms
[INFO] [1746022165.307875869] [clock_sub_with_timer]: [timer] freq avg: 10.00 Hz | period avg: 100.0 ms ± 0.1 ms
[INFO] [1746022170.407949066] [clock_sub_with_timer]: [timer] freq avg: 10.00 Hz | period avg: 100.0 ms ± 0.2 ms
[INFO] [1746022175.408043497] [clock_sub_with_timer]: [timer] freq avg: 10.00 Hz | period avg: 100.0 ms ± 0.1 ms
[INFO] [1746022180.507926679] [clock_sub_with_timer]: [timer] freq avg: 10.00 Hz | period avg: 100.0 ms ± 0.1 ms
In the case of having the timer + the subscription, the performance of the node is really poor. It seems that the executor round robin is not working properly.
I remember speaking to @clalancette at last year's foxglove actuate summit, and he mentioned the team was aware of rclpy performance issues, I am not sure if this is related?
For now my workaround is to create c++ buffer nodes and then make a service call from the rclpy node as needed. This fixes the problem for now. I do this for /jointstates, /tf, /camera/color, /camera/depth, basically all topics, as it seems making any subscription from any multithreaded rclpy node nukes the performance.
Here is an example buffer node for /clock
#include <memory>
#include <mutex>
#include "rclcpp/rclcpp.hpp"
#include "rosgraph_msgs/msg/clock.hpp"
#include "bam_msgs/srv/get_clock.hpp"
using std::placeholders::_1;
using std::placeholders::_2;
class ClockBufferNode : public rclcpp::Node
{
public:
ClockBufferNode()
: Node("clock_buffer", "bam_GPU")
{
RCLCPP_INFO(this->get_logger(), "Setting up ClockBufferNode...");
// Subscription to /clock
subscription_ = this->create_subscription<rosgraph_msgs::msg::Clock>(
"/clock", rclcpp::QoS(10),
std::bind(&ClockBufferNode::clock_callback, this, _1));
// Service to get latest clock
service_ = this->create_service<bam_msgs::srv::GetClock>(
"get_clock",
std::bind(&ClockBufferNode::handle_get_clock, this, _1, _2));
RCLCPP_INFO(this->get_logger(), "ClockBufferNode is ready");
}
private:
void clock_callback(const rosgraph_msgs::msg::Clock::SharedPtr msg)
{
std::lock_guard<std::mutex> lock(mutex_);
latest_clock_ = *msg;
}
void handle_get_clock(
const std::shared_ptr<bam_msgs::srv::GetClock::Request> /*request*/,
std::shared_ptr<bam_msgs::srv::GetClock::Response> response)
{
std::lock_guard<std::mutex> lock(mutex_);
response->clock = latest_clock_;
RCLCPP_INFO(this->get_logger(), "Returning latest clock time: %.3f",
rclcpp::Time(latest_clock_.clock).seconds());
}
rclcpp::Subscription<rosgraph_msgs::msg::Clock>::SharedPtr subscription_;
rclcpp::Service<bam_msgs::srv::GetClock>::SharedPtr service_;
rosgraph_msgs::msg::Clock latest_clock_;
std::mutex mutex_;
};
int main(int argc, char ** argv)
{
rclcpp::init(argc, argv);
auto node = std::make_shared<ClockBufferNode>();
rclcpp::executors::SingleThreadedExecutor executor;
executor.add_node(node);
executor.spin();
rclcpp::shutdown();
return 0;
}
Previously I had experinced this performance issue when making subscriptions to /camera/color (which I thought was also due to the large msg size) and /tf and /jointstates (which I thought was primarily due to the very high frequency).
I also experinced it when using set use_sim_time:=true
for a rclpy multithreaded node, and the performance became terrible. I realized this is beacuse it created a subscription to the /clock.
I think there may be some more serious underlying issues as in this example, its unable to keep up with a simple /clock signal at 10hz.
--
It would be helpful to get your thoughts on:
- What is the best practice for making subscriptions to topics from multi-threaded executors in rclpy?
- What are your thoughts using c++ buffer nodes and making service calls to deal with this issue? ( I got this idea after seeing the
tf2_buffer_client
) - Longer term I imagine I could implement the nodes in c++, but I love rclpy for the pythonic similicity. Would using rclcpp fix the issues I am expericing right now? Is there a way to get better performance in rclpy?
Thank you for all your work on this excellent library! I ❤️ ROS2 :)
Best,
Zach