Update multi-dataset benchmark definition (#117)

Update multi-dataset benchmark definition Signed-off-by: Gerardo Puga <[email protected]>
Ekumen-OS · Dec 2, 2024 · 86a6c33 · 86a6c33
1 parent b040703
commit 86a6c33
Show file tree

Hide file tree

Showing 14 changed files with 107 additions and 115 deletions.
diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/Earthfile b/src/benchmarks/beluga_vs_nav2_multi_dataset/Earthfile
@@ -26,7 +26,6 @@ devel:
     FROM lambkin+embed-ubuntu-devel --distro=${distro}  --rosdistro=${rosdistro}
     RUN mkdir -p /workspace/src
     WORKDIR /workspace
-    RUN cd src && git clone https://github.com/Ekumen-OS/beluga
     COPY package.xml src/lambkin/benchmarks/beluga_vs_nav2_multi_dataset/package.xml
     RUN . /etc/profile && apt update && rosdep update && \
         rosdep install -y -i --from-paths src \
@@ -53,7 +52,6 @@ build:
     FROM lambkin+embed-ubuntu-devel --distro=${distro} --rosdistro=${rosdistro} --components="external/ros2"
     RUN mkdir -p /workspace/src
     WORKDIR /workspace
-    RUN cd src && git clone https://github.com/Ekumen-OS/beluga
     COPY . src/beluga_vs_nav2_multi_dataset
     RUN . /etc/profile && apt update && rosdep update && \
         rosdep install -y -i --from-paths src -t build -t buildtool -t test \

diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/launch/beluga_vs_nav2_multi_dataset.launch b/src/benchmarks/beluga_vs_nav2_multi_dataset/launch/beluga_vs_nav2_multi_dataset.launch
@@ -22,7 +22,7 @@
 
   <set_parameter name="use_sim_time" value="$(var use_sim_time)"/>
 
-  <node pkg="nav2_amcl" exec="amcl" name="nav2_amcl" launch-prefix="$(env nav2_amcl_PREFIX '')" sigkill_timeout="20.0" sigterm_timeout="40.0" >
+  <node pkg="nav2_amcl" exec="amcl" name="nav2_amcl" launch-prefix="$(env nav2_amcl_PREFIX '')" >
     <param from="$(find-pkg-share beluga_vs_nav2_multi_dataset)/params/amcl.yaml"/>
     <param name="laser_model_type" value="$(var laser_model_type)"/>
     <param name="max_particles" value="$(var max_particles)"/>
@@ -44,7 +44,7 @@
     <remap from="particle_cloud" to="/nav2_amcl/particle_cloud"/>
   </node>
 
-  <node pkg="beluga_amcl" exec="amcl_node" name="beluga_amcl" launch-prefix="$(env beluga_amcl_PREFIX '')" sigkill_timeout="20.0" sigterm_timeout="40.0" >
+  <node pkg="beluga_amcl" exec="amcl_node" name="beluga_amcl" launch-prefix="$(env beluga_amcl_PREFIX '')" >
     <param from="$(find-pkg-share beluga_vs_nav2_multi_dataset)/params/amcl.yaml"/>
     <param name="laser_model_type" value="$(var laser_model_type)"/>
     <param name="max_particles" value="$(var max_particles)"/>
@@ -65,6 +65,7 @@
 
     <remap from="pose" to="/beluga_amcl/pose"/>
     <remap from="particle_cloud" to="/beluga_amcl/particle_cloud"/>
+    <remap from="particle_markers" to="/beluga_amcl/particle_markers"/>
   </node>
 
   <node pkg="nav2_map_server" exec="map_server" name="map_server">

diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/package.xml b/src/benchmarks/beluga_vs_nav2_multi_dataset/package.xml
@@ -21,6 +21,7 @@
 
   <exec_depend>nav2_amcl</exec_depend>
   <exec_depend>nav2_lifecycle_manager</exec_depend>
+  <exec_depend>nav2_map_server</exec_depend>
 
   <exec_depend>beluga_amcl</exec_depend>
 

diff --git a/..._multi_dataset/reports/nominal_report/assets/representative_torwic_slam_map.png b/..._multi_dataset/reports/nominal_report/assets/representative_torwic_slam_map.png
diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/conf.py b/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/conf.py
@@ -22,12 +22,12 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'Nominal Beluga AMCL Benchmark Report'
-copyright = '2023, Ekumen Inc.'
+project = 'Beluga AMCL Performance Benchmarking Results'
+copyright = '2024, Ekumen Inc.'
 author = 'Ekumen Inc.'
 
 version = '0.1.0'
-release = '0.1.0-alpha'
+release = '0.1.0'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/index.rst b/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/index.rst
@@ -8,8 +8,8 @@
     os.makedirs('_generated', exist_ok=True)
 
 
-Nominal Beluga AMCL vs Nav2 AMCL benchmark
-==========================================
+Beluga AMCL Performance Benchmarking Results
+============================================
 
 .. toctree::
    :maxdepth: 2

diff --git a/..._multi_dataset/reports/nominal_report/sections/datasets/magazino_datasets_description.inc b/..._multi_dataset/reports/nominal_report/sections/datasets/magazino_datasets_description.inc
@@ -13,12 +13,12 @@ The set contains only two bagfiles, corresponding to two different trajectories
 "Hallway Localization" is a bit over 2 minutes long, while "Hallway Return" is a little below 4
 minutes long.
 
-The following is a representative map from the set, to exemplify the type of environment:
+The following is a representative map from the set:
 
 .. figure:: assets/representative_cartographer_magazino_map.png
    :scale: 99 %
 
-   Localization map of one of the trajectories in the Cartographer Magazino dataset, representative of the environment.
+   Localization map of one of the trajectories in the Cartographer Magazino dataset.
 
 Transformations to the original dataset
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/...2_multi_dataset/reports/nominal_report/sections/datasets/openloris_office_description.inc b/...2_multi_dataset/reports/nominal_report/sections/datasets/openloris_office_description.inc
@@ -28,13 +28,12 @@ About the hardware used to record the dataset,
 
 This analysis only uses the "Office" subset of the OpenLORIS-Scene dataset.
 
-The following is a representative map from the set, to exemplify the type of environment:
+The following is a representative map from the set:
 
 .. figure:: assets/representative_openloris_office_map.png
    :scale: 99 %
 
-   Localization map of one of the trajectories in the OpenLORIS Scene, representative of the environment.
-
+   Localization map of a representative example in the  OpenLORIS Scene dataset.
 
 Transformations to the original dataset
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/...av2_multi_dataset/reports/nominal_report/sections/datasets/torwic_mapping_description.inc b/...av2_multi_dataset/reports/nominal_report/sections/datasets/torwic_mapping_description.inc
@@ -37,12 +37,12 @@ Moving fences              3             Fences are shifted outwards and inwards
 Adding new boxes           1             The four fences are covered with stacks of boxes.
 =========================  ============  ============================
 
-The following is a representative map from the set, to exemplify the type of environment:
+The following is a representative map from the set:
 
 .. figure:: assets/representative_torwic_mapping_map.png
    :scale: 99 %
 
-   Localization map of one of the "baseline" trajectory in the TorWIC Mapping dataset.
+   Localization map of the "baseline" trajectory in the TorWIC Mapping dataset.
 
 
 Transformations to the original dataset

diff --git a/...s_nav2_multi_dataset/reports/nominal_report/sections/datasets/torwic_slam_description.inc b/...s_nav2_multi_dataset/reports/nominal_report/sections/datasets/torwic_slam_description.inc
@@ -48,12 +48,13 @@ Jun. 23, 2022     7
 Oct. 12, 2022     6
 ================  ===========================
 
-The following is a representative map from the set, to exemplify the type of environment:
+The following is a representative map from the set:
 
 .. figure:: assets/representative_torwic_slam_map.png
    :scale: 50 %
 
-   Localization map of one of the TorWIC SLAM dataset trajectories, representative of the environment.
+   Localization map of a representative example in the TorWIC SLAM dataset.
+
 
 Transformations to the original dataset
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -64,5 +65,4 @@ To repurpose the original dataset for 2D localization evaluation, we made the fo
 - All topics not related to lidar-based 2D localization were removed to reduce file size.
 - The map->odom TF transformations were removed from the transform tree.
 - A reference ground-truth and a matching occupancy map were generated for each bagfile using SLAM.
-
-
+- The SLAM-generated occupancy maps were manually cleaned to remove "unknonwn" patches and small obstacles along the groundtruth trajectory caused by the robot operator's presence.
diff --git a/...nav2_multi_dataset/reports/nominal_report/sections/datasets/willow_garage_description.inc b/...nav2_multi_dataset/reports/nominal_report/sections/datasets/willow_garage_description.inc
@@ -8,7 +8,7 @@ dataset is no longer available online, but it was described in the following ref
     “`An Object-Based Semantic World Model for Long-Term Change Detection and Semantic
     Querying. <https://web.archive.org/web/20151207202459id_/http://cgi.cs.duke.edu/~mac/papers/iros12_sm.pdf>`_”, by Julian Mason and Bhaskara Marthi, IROS 2012.
 
-About the hardware used to record the dataset, as described in the reference quoted above:
+The dataset was collected using a PR2 omnidirectional mobile robot developed by Willow Garage.
 
     Our mobile robot is a `Willow Garage PR2 <https://robotsguide.com/robots/pr2>`_ with a headmounted Microsoft Kinect. When the robot is in motion,
     the Kinect is roughly 1.5 meters off the floor, and captures
@@ -22,16 +22,16 @@ About the hardware used to record the dataset, as described in the reference quo
     do require that the sensor be fairly far from the floor, in order
     to perceive supporting surfaces and the objects on them
 
-The dataset provides 67 trajectories distributed over multiple days and time periods. All of the trajectories
-are in the same office-like environment, and represent on average 15 minutes of robot operation. The shortest
+The dataset provides 67 trajectories recorded over multiple days and time periods. All of the trajectories
+are in the same office-like environment, and each represents about 15 minutes of robot operation on average. The shortest
 trajectory is 8.5 minutes long, and the longest is 30 minutes long.
 
-The following is a representative map from the set, to exemplify the type of environment:
+The following is a representative map from the set:
 
 .. figure:: assets/representative_willow_garage_map.png
    :scale: 99 %
 
-   Localization map of one of the trajectories in the Willow Garage dataset, representative of the environment.
+   Localization map of a representative example in the Willow Garage dataset.
 
 Transformations to the original dataset
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/sections/objective.inc b/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/sections/objective.inc
@@ -6,19 +6,21 @@ Objective
 
 To gather data on the localization performance and resource usage of both
 `Beluga AMCL <https://github.com/Ekumen-OS/beluga>`_ and `Nav2 AMCL <https://github.com/ros-planning/navigation2/tree/main/nav2_amcl>`_
-when used in a diversity of pre-recorded environments.
+when used in a variety of pre-recorded environments.
 
 This data will be used to further improve the state of localization in ROS 2 and detect potential issues in the current implementations.
 
 Scope
 ^^^^^
 
-This document is limited to the presentation of the methodology and the results used to gather the performance data.
+This document is limited to the presentation of the methodology, description of the input datasets, and enumeration of the results.
+Analysis and interpretation of the results will be performed in the article or blog post that will be created
+to make these results public.
 
-No root-cause analysis of issues or performance tuning is performed in this document.
-
-For the sake of brevity, a limited number of configuration variants are used in the experiments: the sensor model and the motion model.
-The rest of the configuration parameters are kept at their default values, which are the same for both AMCL implementations.
+For the sake of brevity, a limited number of configuration variants are tested in this benchmark. For each dataset both
+the beam and the likelihood field sensor models are evaluated. Both differential drive
+and omnidirectional robots are represented in the input datasets.
+The rest of the configuration parameters are kept at their default values, which are the same for both Beluga AMCL and Nav2 AMCL.
 
 
 

diff --git a/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/sections/platform.inc b/src/benchmarks/beluga_vs_nav2_multi_dataset/reports/nominal_report/sections/platform.inc
@@ -5,7 +5,7 @@ Datasets used
 ^^^^^^^^^^^^^
 
 A number of pre-recorded datasets were selected to evaluate the performance of the Beluga AMCL and Nav2 AMCL localization systems.
-A number of public datasets were considered for this evaluation, looking for a diversity of environments and robots that were representative of a wide range of real-world scenarios.
+A number of public datasets were considered for this evaluation, looking for a variety of environments and robots that were representative of a wide range of real-world scenarios.
 
 The following datasets were selected based on their characteristics and availability for this evaluation:
 
@@ -43,22 +43,19 @@ These real-world datasets have two more limitations worth mentioning:
 - The longest run-time of any of the real-world datasets listed above is around 30 minutes, which may fail to flag longer-term resource usage issues, such as memory leaks.
 - In all cases the datasets were recorded using differential-drive robots.
 
-To somewhat mitigate these issues two additional synthetic datasets were added to the evaluation:
+To somewhat mitigate these issues an additional synthetic dataset was added to the evaluation:
 
 .. list-table:: Synthetic datasets
    :widths: 25 70
    :header-rows: 1
 
    * - Name
      - Description
-   * - Diff Drive Sim 24hs
-     - Gazebo Classic simulation of a diff-drive robot randomly wandering around a 450m^2 office environment with both mapped and unmapped furniture for just over 24hs. The simulated robot is modelled on a Kobuki platform with a mounted RPLidar A1.
    * - Omni Drive Sim 24hs
      - Gazebo Sim simulation of a omni-drive robot randomly wandering around the `AWS Robomaker Bookstore World <https://github.com/aws-robotics/aws-robomaker-bookstore-world>`_ with both mapped and unmapped furniture for just over 24hs. The simulated robot is modelled after a customized Robomaster EP with a mounted RPLidar A2 M12.
 
-In both cases small imperfections were added to the simulated models to cause a small amount of drift in the odometry. Ground-truth was provided by Gazebo plugins from
-world state information, and the occupancy grid maps were generated with using `SLAM Toolbox <https://github.com/SteveMacenski/slam_toolbox>`_.
-
+For these simulations, small imperfections were added to the robot models to cause a small amount of drift in the odometry. Ground-truth was generated from actual
+world-state information, and the occupancy grid maps were generated using `SLAM Toolbox <https://github.com/SteveMacenski/slam_toolbox>`_.
 
 
 Alterations to the real robot datasets
@@ -75,34 +72,32 @@ such as map/odom transforms, were removed to prevent conflicts with the systems
 Additionally, all topics not related to lidar-based 2D localization were removed to reduce file size. This was needed because
 the evaluation process is very intensive in terms of storage requirements.
 
-The two synthetic datasets were constructed for this evaluation and therefore were ROS 2 native with no missing data.
-
 Evaluation procedure
 ^^^^^^^^^^^^^^^^^^^^
 
-The evaluation was performed using the `LAMBKIN <https://gitlab.com/ternaris/lambkin>`_ framework,
-which is tool described as "a mixture of automation and conventions to facilitate reproducible
-benchmarking and evaluation of localization and mapping systems".
+The evaluation was performed using the `LAMBKIN <https://gitlab.com/ternaris/lambkin>`_ toolkit,
+which provides a mixture of automation tools and conventions to facilitate reproducible
+benchmarking and evaluation of localization and mapping systems.
 
 For each bagfile in each dataset and each tested configuration (likelihood or beam), LAMBKIN
 replays the data through both Beluga AMCL and Nav2 AMCL at the same time, recording the output of both
 in a new bagfile. This bagfile is then processed using the `evo <https://github.com/MichaelGrupp/evo>`_ tool to
 summarize the localization performance of both systems against the ground-truth data.
 
-During execution LAMBKIN also instruments both localization nodes using `timememory <https://timemory.readthedocs.io/en/develop/>`_ to
-collect resource usage metrics such as CPU time, memory usage, and other system-level metrics. This information is stored along with the
-results of the evo.
+During execution LAMBKIN also wraps both localization nodes using the `timem` command line tool from
+the `timememory <https://timemory.readthedocs.io/en/develop/>`_ toolkit to
+collect average CPU and peak RSS (Resident Set Size). This information is stored along with the results of the evo.
 
 Each evaluation can be iterated multiple times to improve the statistical significance of the results. This comes
 at the expense of increased execution time and storage requirements for the results, which can be substantial. As a
-compromise, the results in this report are based on 3 iterations of each bagfile/configuration combination for real robot datasets,
-and a single iteration for the synthetic datasets accounting for their very long length.
+compromise, the results in this report are based on a single iteration of each bagfile/configuration combination
+for real robot datasets. The one exception to this is the Open LORIS dataset, which was evaluated using 5 independent 
+iterations due to the very short duration of the bagfiles.
 
 For each bagfile/configuration combination, the resulting APE metrics for all iterations are processed to produce
 the median, mean, standard deviation and worst-case value. These are the values reported in the following pages.
 
-We also reported the peak RSS (Resident Set Size) and CPU use across all iterations for each bagfile/configuration combination, as
-recorded by timememory.
+The maximum values of both peak RSS and average CPU usage across all iterations are reported as well.
 
 
 Evaluation Host Platform