diff --git a/.bazelignore b/.bazelignore new file mode 100644 index 00000000..87508c8e --- /dev/null +++ b/.bazelignore @@ -0,0 +1,4 @@ +#ignore typical cmake build folders +build +out +cmake-build-debug diff --git a/.bazelrc b/.bazelrc index adcf592b..ce4a80e3 100644 --- a/.bazelrc +++ b/.bazelrc @@ -26,6 +26,8 @@ build:ci --announce_rc #build:linux --copt="-O1" #build:linux --copt="-march=skylake" +#build:linux --copt="-march=haswell" +#build:linux --copt="-march=native" build:linux --copt="-fvisibility=hidden" build:linux --copt="-fno-omit-frame-pointer" # for friendlier stack traces build:linux --copt="-Wno-error" @@ -34,6 +36,8 @@ build:linux --copt="-Wextra" build:linux --copt="-Werror=return-type" build:linux --copt="-Werror=switch" build:linux --copt="-mavx" +# Enable CLZ (count leading zeros). This is equivalent to "-march=haswell" +build:linux --copt="-mbmi2" build:linux --copt="-Wsequence-point" build:linux --copt="-Wsign-compare" build:linux --cxxopt="-std=c++17" @@ -101,9 +105,16 @@ build:ubsan --linkopt="-lubsan" test:ubsan --run_under=//tools/runners/sanitizers/ubsan # MSAN is disabled for now, as there are false positives and we can't suppress them easily. -#build:msan --config=base-sanitizer -#build:msan --copt="-fsanitize=memory" -#build:msan --linkopt="-fsanitize=memory" -#test:msan --run_under=//tools/runners/sanitizers/msan +build:msan --config=base-sanitizer +build:msan --copt="-fsanitize=memory" +build:msan --linkopt="-fsanitize=memory" +test:msan --run_under=//tools/runners/sanitizers/msan build:lint --define linting_only=true + +build:fuzz --action_env=CC=clang +build:fuzz --action_env=CXX=clang++ +build:fuzz --config=base-sanitizer +build:fuzz --copt="-g" +build:fuzz --copt="-fsanitize=fuzzer" +build:fuzz --linkopt="-fsanitize=fuzzer" diff --git a/.bazelversion b/.bazelversion deleted file mode 100644 index 078bf8b7..00000000 --- a/.bazelversion +++ /dev/null @@ -1 +0,0 @@ -4.2.2 \ No newline at end of file diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 030eaaea..8218b7b0 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -1,6 +1,6 @@ name: Bazel build -on: [push, pull_request] +on: [ push ] jobs: build: @@ -13,14 +13,25 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup bazel - # install bazelisk to install the appropriate bazel version - run: | - export PATH=$PATH:$HOME/bin && mkdir -p $HOME/bin - wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 $HOME/bin/bazel - wget https://github.com/bazelbuild/buildtools/releases/download/0.22.0/buildifier && chmod +x buildifier && mv buildifier $HOME/bin/ + uses: bazelbuild/setup-bazelisk@v2 + + # This causes build failures + # - name: Mount bazel cache # Optional + # uses: actions/cache@v3 + # with: + # path: "~/.cache/bazel" + # key: bazel + + - name: Clang format + shell: bash + run: ./ci/linting/clang-format.sh + + - name: Bazel format + shell: bash + run: ./ci/linting/buildifier.sh - name: Build shell: bash @@ -29,3 +40,7 @@ jobs: - name: Test shell: bash run: bazel test ... + + - name: Test + shell: bash + run: bazel test //test:phtree_test --config=asan diff --git a/.github/workflows/cmake-codecov.yml b/.github/workflows/cmake-codecov.yml new file mode 100644 index 00000000..6b970aea --- /dev/null +++ b/.github/workflows/cmake-codecov.yml @@ -0,0 +1,49 @@ +name: CMake Codecov + +on: [ push ] + +env: + BUILD_TYPE: Debug + +defaults: + run: + shell: bash + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - name: Install lcov + run: sudo apt-get install lcov -y + + - name: Create Build Environment + run: | + cmake -E make_directory ${{github.workspace}}/build + cd build + + - name: Configure CMake + working-directory: ${{github.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DPHTREE_CODE_COVERAGE=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config $BUILD_TYPE -j2 + + - name: Run tests + working-directory: ${{github.workspace}}/build + run: ctest -C $BUILD_TYPE + + - name: Create and upload coverage + working-directory: ${{github.workspace}}/build + run: | + cd test/CMakeFiles/all_tests.dir/ + lcov --directory . --capture -o coverage.info + lcov -r coverage.info */build/* */test/* */c++/* */gtest/* -o coverageFiltered.info + lcov --list coverageFiltered.info + bash <(curl -s https://codecov.io/bash) -f coverageFiltered.info || echo "Upload failed" + diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml new file mode 100644 index 00000000..7cf6c607 --- /dev/null +++ b/.github/workflows/cmake-windows.yml @@ -0,0 +1,32 @@ +name: CMake Windows build + +on: [ push ] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - uses: ilammy/msvc-dev-cmd@v1 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}\out + + - name: Configure CMake + working-directory: ${{github.workspace}}\out + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -S ${{github.workspace}} -B ${{github.workspace}}\out -DPHTREE_BUILD_EXAMPLES=ON -DPHTREE_BUILD_TESTS=ON + + - name: Build + working-directory: ${{github.workspace}}\out + run: cmake --build . --config ${env:BUILD_TYPE} -j2 + + - name: Test + working-directory: ${{github.workspace}}\out + run: ctest -C ${env:BUILD_TYPE} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 22599941..de6e9884 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -1,40 +1,41 @@ name: CMake build -on: [push, pull_request] +on: [ push ] env: BUILD_TYPE: Release +defaults: + run: + shell: bash + jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - - name: Create Build Environment - run: cmake -E make_directory ${{github.workspace}}/build - - - name: Configure CMake - # Use a bash shell so we can use the same syntax for environment variable - # access regardless of the host operating system - shell: bash - working-directory: ${{github.workspace}}/build - # Note the current convention is to use the -S and -B options here to specify source - # and build directories, but this is only available with CMake 3.13 and higher. - # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE - - - name: Build - working-directory: ${{github.workspace}}/build - shell: bash - # Execute the build. You can specify a specific target with "--target " - run: cmake --build . --config $BUILD_TYPE - - - name: Test - working-directory: ${{github.workspace}}/build - shell: bash - # Execute tests defined by the CMake configuration. - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - # TODO Currently tests are run via bazel only. - run: ctest -C $BUILD_TYPE + - uses: actions/checkout@v3 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}/build + + - name: Configure CMake + working-directory: ${{github.workspace}}/build + # Note the current convention is to use the -S and -B options here to specify source + # and build directories, but this is only available with CMake 3.13 and higher. + # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DPHTREE_BUILD_ALL=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config $BUILD_TYPE -j2 + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C $BUILD_TYPE + + - name: Example + working-directory: ${{github.workspace}}/build + run: examples/Example diff --git a/.github/workflows/codcecov.yml_old b/.github/workflows/codcecov.yml_old new file mode 100644 index 00000000..e5eca13a --- /dev/null +++ b/.github/workflows/codcecov.yml_old @@ -0,0 +1,42 @@ +name: Upload CodeCov Report +on: [ push ] +jobs: + run: + runs-on: windows-latest + name: Build, Test , Upload Code Coverage Report + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + fetch-depth: ‘2’ + id: checkout_code + - name: Setup MSBuild and add to PATH + uses: microsoft/setup-msbuild@v1.0.2 + id: setup_msbuild + + - name: Generate Solution + run: cmake -G "Visual Studio 17 2022" -A x64 . -DPHTREE_CODE_COVERAGE=ON -DCMAKE_BUILD_TYPE=Debug + + - name: Run MSBuild + id: run_msbuild + run: msbuild /p:Configuration=Debug /p:Platform=x64 /p:gtest_force_shared_crt=on phtree.sln + - name: Setup VSTest and add to PATH + uses: darenm/Setup-VSTest@v1 + id: setup_vstest + + - name: Setup OpenCppCoverage and add to PATH + id: setup_opencppcoverage + run: | + choco install OpenCppCoverage -y + echo "C:\Program Files\OpenCppCoverage" >> $env:GITHUB_PATH + + - name: Generate Report + id: generate_test_report + shell: cmd + run: OpenCppCoverage.exe --modules phtree --export_type cobertura:phtree.xml -- "vstest.console.exe" test\Debug\all_tests.exe + - name: Upload Report to Codecov + uses: codecov/codecov-action@v3 + with: + files: ./phtree.xml + fail_ci_if_error: true + functionalities: fix diff --git a/.gitignore b/.gitignore index 55098c94..d02781ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,16 @@ .* !.bazelrc -!.bazelversion !.clang-format !.gitignore !.github +!*.yml bazel-* !bazel-*.sh compile_commands.json perf.data* build +out +cygwin +CMakeSettings.json +**/cmake-build-debug/ diff --git a/BUILD b/BUILD index 0bf4e407..d4d693db 100644 --- a/BUILD +++ b/BUILD @@ -1,30 +1,37 @@ package(default_visibility = ["//visibility:public"]) +licenses(["notice"]) # Apache 2.0 + +# Expose license for external usage through bazel. +exports_files([ + "LICENSE", +]) + # Platform configuration definitions for select() config_setting( name = "linux", - constraint_values = ["@bazel_tools//platforms:linux"], + constraint_values = ["@platforms//os:linux"], ) config_setting( name = "macos", - constraint_values = ["@bazel_tools//platforms:osx"], + constraint_values = ["@platforms//os:osx"], ) config_setting( name = "macos_not_ios", - constraint_values = ["@bazel_tools//platforms:osx"], + constraint_values = ["@platforms//os:osx"], ) config_setting( name = "windows", - constraint_values = ["@bazel_tools//platforms:windows"], + constraint_values = ["@platforms//os:windows"], ) config_setting( name = "windows_debug", - constraint_values = ["@bazel_tools//platforms:windows"], + constraint_values = ["@platforms//os:windows"], values = { "compilation_mode": "dbg", }, @@ -32,7 +39,7 @@ config_setting( config_setting( name = "windows_release", - constraint_values = ["@bazel_tools//platforms:windows"], + constraint_values = ["@platforms//os:windows"], values = { "compilation_mode": "opt", }, @@ -40,7 +47,7 @@ config_setting( config_setting( name = "windows-x86_64", - constraint_values = ["@bazel_tools//platforms:windows"], + constraint_values = ["@platforms//os:windows"], ) # Buildifier @@ -62,3 +69,30 @@ filegroup( name = "dot_clang_format", srcs = [".clang-format"], ) + +cc_library( + name = "phtree", + srcs = glob( + include = [ + "include/**/*.h", + ], + ), + hdrs = [ + "include/phtree/converter.h", + "include/phtree/distance.h", + "include/phtree/filter.h", + "include/phtree/phtree.h", + "include/phtree/phtree_multimap.h", + ], + includes = [ + "include", + ], + linkstatic = True, + visibility = [ + "//visibility:public", + ], + deps = [ + "//include/phtree/common", + "//include/phtree/v16", + ], +) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd2a904..ed4a5025 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,138 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] Nothing yet. +## [1.5.0] - 2023-02-09 +### Added +- Added B+tree multimap for internal (future) use. [#93](https://github.com/tzaeschke/phtree-cpp/issues/93) +- Added some fuzz tests. Not that these require manual compilation, see [fuzzer/README.md](fuzzer/README.md). + [#114](https://github.com/tzaeschke/phtree-cpp/pull/114) +- Added float-32 variants to multimap: PhTreeMultiMapF, PhTreeMultiMapBoxF. + [#117](https://github.com/tzaeschke/phtree-cpp/pull/117) + +### Changed +- Clean up array_map. [#107](https://github.com/tzaeschke/phtree-cpp/issues/107), +- Fixed compatibility with bazel 6.0.0. [#109](https://github.com/tzaeschke/phtree-cpp/issues/109), +- Added missing compiler flag for TZCNT/CTZ (count trailing zeros). This should be much faster on haswell or later CPUs. + [#103](https://github.com/tzaeschke/phtree-cpp/issues/103), +- Rewrote relocate(). This should be much cleaner now and slightly faster. + [#98](https://github.com/tzaeschke/phtree-cpp/pull/98), + [#99](https://github.com/tzaeschke/phtree-cpp/pull/99), + [#101](https://github.com/tzaeschke/phtree-cpp/pull/101), + [#104](https://github.com/tzaeschke/phtree-cpp/pull/104), + [#115](https://github.com/tzaeschke/phtree-cpp/issues/115) +- Cleaned up HandleCollision() and key comparison functions. [#97](https://github.com/tzaeschke/phtree-cpp/pull/97) +- Improved performance by eliminating memory indirection for DIM > 3. + This was enabled by referencing "Node" directly in "Entry" which was enabled by + implanting an indirection in array_map. [#96](https://github.com/tzaeschke/phtree-cpp/pull/96) +- Improved performance of window queries by executing them partially as point queries. + This works best for point datasets, and somewhat for box datasets with "include" queries. + There is no benefit for "intersection" queries. [#88](https://github.com/tzaeschke/phtree-cpp/issues/88) +- Improved benchmarks for insert and query to use a more compact format. + [#91](https://github.com/tzaeschke/phtree-cpp/pull/91) +- Improved performance of window queries by optimizing calculation of min/max masks. + Improved performance of queries and updates by changing bit-width of min/max masks and + hc_pos_t. [#95](https://github.com/tzaeschke/phtree-cpp/pull/95) + +### Removed +- bazel version requirement file `.bazelversion`. [#89](https://github.com/tzaeschke/phtree-cpp/issues/89) + +### +- Fixed copy cstr/assignment of B+trees, see also #102. [#119](https://github.com/tzaeschke/phtree-cpp/pull/119) +- Fixed numerous warnings when compiling with MSVC. [#120](https://github.com/tzaeschke/phtree-cpp/issues/120) + +## [1.4.0] - 2022-09-09 +### Added +- Added build features: [#53](https://github.com/tzaeschke/phtree-cpp/issues/53) + - linting for C++ and bazel files. + - Added CI status badges. + - Added test coverage +- Added support for cmake `FetchContent`. + See README for details. [#75](https://github.com/tzaeschke/phtree-cpp/issues/75) +- Added support for cmake `find_packet()` and direct import via `add_sub_directory()`. + See README for details. [#83](https://github.com/tzaeschke/phtree-cpp/issues/83) + +### Changed +- Cleaned up build scripts. [#53](https://github.com/tzaeschke/phtree-cpp/issues/53) +- Fixed code coverage + migrate to linux. [#80](https://github.com/tzaeschke/phtree-cpp/issues/80) +- ***BREAKING CHANGE*** The project has been restructured to have a more "standard" directory structure. + This affects how **bazel** dependencies work (use `deps = ["@phtree//:phtree",]`) and enables **cmake FetchContent_**. + See README for details. [#75](https://github.com/tzaeschke/phtree-cpp/issues/75) + +### Removed +- Nothing. + +### Fixed +- Nothing. + +## [1.3.0] - 2022-08-28 +### Added +- Added flag to relocate() allow short cutting in case of identical keys. + [#68](https://github.com/tzaeschke/phtree-cpp/issues/68) +- Added tested support for move-only and copy-only value objects. + [#56](https://github.com/tzaeschke/phtree-cpp/issues/56) +- Added custom bucket implementation (similar to std::unordered_set). This improves update performance by 5%-20%. + [#44](https://github.com/tzaeschke/phtree-cpp/issues/44) +- Added `PhTree.relocate(old_key, new_key)` and `PhTree.relocate_if(old_key, new_key, predicate)`. + This is **a lot faster** than using other methods. + [#43](https://github.com/tzaeschke/phtree-cpp/issues/43) +- Added try_emplace(key, value) and try_emplace(iter_hint, key, value) + [#40](https://github.com/tzaeschke/phtree-cpp/issues/40) +- Added FilterBoxAABB and FilterSphereAABB as examples for filtering a PH-Tree with box keys + [#33](https://github.com/tzaeschke/phtree-cpp/issues/33) +### Changed +- Moved tests and benchmarks into separate folders. [#67](https://github.com/tzaeschke/phtree-cpp/pull/67) +- Cleaned up unit tests. [#54](https://github.com/tzaeschke/phtree-cpp/pull/54) +- Simplified internals of `erase()`. [#47](https://github.com/tzaeschke/phtree-cpp/pull/47) +- Removed internal use of `std::optional()` to slightly reduce memory overhead + [#38](https://github.com/tzaeschke/phtree-cpp/issues/38) +- Removed restrictions on bazel version [#35](https://github.com/tzaeschke/phtree-cpp/issues/35) +- **API BREAKING CHANGE**: API of filters have been changed to be more correct, explicit and flexible. + [#21](https://github.com/tzaeschke/phtree-cpp/issues/21) + - Correctness: Converters and distance functions are not copied unnecessarily anymore. + - Explicit: + Filters *must* have a mandatory parameter for a converter reference. This ensures that the correct + converter is used, probably `tree.converter()`. + - Flexible: + Distance functions can be provided through a universal reference (forwarding reference). + Also, filters are now movable and copyable. + +- **API BREAKING CHANGE**: Allow filtering on buckets in multimaps. Multimap filters have different functions + and function signatures than normal `PhTree` filters. [#26](https://github.com/tzaeschke/phtree-cpp/issues/26) + +### Fixed +- Fixed compiler warnings when compiling with Visual Studio 2019. + [#74](https://github.com/tzaeschke/phtree-cpp/issues/74) +- Fixed cmake to work with Visual Studio 2019. Added tests and benchmarks to cmake. + (benchmarks still do not work with VS at the moment). + [#62](https://github.com/tzaeschke/phtree-cpp/issues/62) +- Fixed compilation problems and a memory leak when compiling with Visual Studio 2019. + (also added `msan` support). [#64](https://github.com/tzaeschke/phtree-cpp/pull/64) + +## [1.2.0] - 2022-04-14 +### Changed +- Bugfix: FilterSphere was not working correctly. [#27](https://github.com/tzaeschke/phtree-cpp/issues/27) +- Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to + accept universal/forwarding references. + Also changed filters and callback to not require `const` methods. + [#22](https://github.com/tzaeschke/phtree-cpp/issues/22) +- Clean up iterator implementations. [#19](https://github.com/tzaeschke/phtree-cpp/issues/19) +- Make PhTree and PhTreeMultimap movable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) +- Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: + Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace + on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` + had to be changed to accept `uint32_t` instead of `int`. This may break some implementations. + [#17](https://github.com/tzaeschke/phtree-cpp/pull/17) +- DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g. + window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue. + [#14](https://github.com/tzaeschke/phtree-cpp/issues/14) +- postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%. + operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11) +- Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9) +- Avoid unnecessary find() when removing a node. [#5](https://github.com/tzaeschke/phtree-cpp/issues/5) +- Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) +- for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) +- Build improvements for bazel/cmake + ## [1.1.1] - 2022-01-30 ### Changed - Replaced size() in filters with DIM [#26](https://github.com/improbable-eng/phtree-cpp/pull/26) @@ -70,7 +202,11 @@ Nothing yet. - Nothing. -[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.1...HEAD +[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.5.0...HEAD +[1.5.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.4.0...v1.5.0 +[1.4.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.3.0...v1.4.0 +[1.3.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.2.0 [1.1.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.1.1 [1.1.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.1.0 [1.0.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.0.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a5da8a..f9d540ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,163 @@ cmake_minimum_required(VERSION 3.14) -# set the project name -project(PH_Tree_Main VERSION 1.1.1 +project(phtree VERSION 1.5.0 DESCRIPTION "PH-Tree C++" + HOMEPAGE_URL "https://github.com/tzaeschke/phtree-cpp" LANGUAGES CXX) -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) -endif() + +cmake_policy(SET CMP0077 NEW) + +# --------------------------------------------------------------------------------------- +# Set default build to release +# --------------------------------------------------------------------------------------- +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose Release or Debug" FORCE) +endif () + + +# --------------------------------------------------------------------------------------- +# Build options +# --------------------------------------------------------------------------------------- +option(PHTREE_BUILD_ALL "Build examples, tests and benchmarks" OFF) + +# example options +option(PHTREE_BUILD_EXAMPLES "Build examples" OFF) + +# testing options +option(PHTREE_BUILD_TESTS "Build tests" OFF) +option(PHTREE_CODE_COVERAGE "Collect coverage from test library" OFF) +if (PHTREE_CODE_COVERAGE) + set(PHTREE_BUILD_TESTS ON) +endif () + +# bench options +option(PHTREE_BUILD_BENCHMARKS "Build benchmarks (Requires https://github.com/google/benchmark.git to be installed)" OFF) + +# install options +option(PHTREE_INSTALL "Generate the install target" OFF) + + +# --------------------------------------------------------------------------------------- +# Compiler config +# --------------------------------------------------------------------------------------- +find_program(CCACHE_FOUND ccache) +if (CCACHE_FOUND) + message("CCACHE is found") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +else (CCACHE_FOUND) + message("CCACHE is NOT found") +endif (CCACHE_FOUND) # specify the C++ standard -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -Werror") -set(CMAKE_CXX_FLAGS_RELEASE "-O3") +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif () + +if (MSVC) + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Wall") + set(CMAKE_CXX_FLAGS_RELEASE "/O2") + + if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + add_compile_options(/bigobj) + endif () + + # For google benchmark + if (PHTREE_BUILD_BENCHMARKS) # OR PHTREE_BUILD_ALL) + # This still doesn't work. This also breaks gtest + # See for example + # https://stackoverflow.com/questions/55376111/how-to-build-and-link-google-benchmark-using-cmake-in-windows + # https://github.com/google/benchmark/issues/1348 + # https://github.com/google/benchmark/issues/639 + # set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + # set(BUILD_SHARED_LIBS TRUE) #=TRUE + # set(BENCHMARK_DOWNLOAD_DEPENDENCIES on) + # set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + endif () +else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror") + if (PHTREE_BUILD_BENCHMARKS) + # Enable vectorization and TZCNT/CTZ + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx -mbmi2 -pthread") + else () + # Enable vectorization and TZCNT/CTZ + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx -mbmi2 ") + endif () + if (PHTREE_CODE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") # -Wa,-mbig-obj") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage") + endif () +endif () + +# --------------------------------------------------------------------------------------- +# Build binaries +# --------------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------------- +# Header only version +# --------------------------------------------------------------------------------------- +add_library(phtree INTERFACE) +add_library(phtree::phtree ALIAS phtree) +target_compile_features(phtree INTERFACE cxx_std_17) + +target_include_directories(phtree INTERFACE + $ + $) + +if (PHTREE_BUILD_EXAMPLES OR PHTREE_BUILD_ALL) + message(STATUS "Generating examples") + add_subdirectory(examples) +endif () + +if ((PHTREE_BUILD_BENCHMARKS OR PHTREE_BUILD_ALL) AND NOT MSVC) + message(STATUS "Generating benchmarks") + add_subdirectory(benchmark) +endif () + +if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + message(STATUS "Generating tests") + enable_testing() + include(GoogleTest) + add_subdirectory(test) +endif () + +# --------------------------------------------------------------------------------------- +# Install +# --------------------------------------------------------------------------------------- +if (PHTREE_INSTALL) + include(GNUInstallDirs) + + install(TARGETS phtree + EXPORT ${PROJECT_NAME}_Targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + + include(CMakePackageConfigHelpers) + write_basic_package_version_file("phtreeConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + + configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION + ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(EXPORT ${PROJECT_NAME}_Targets + FILE ${PROJECT_NAME}Targets.cmake + NAMESPACE phtree:: + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/phtree + DESTINATION include + PATTERN "BUILD" EXCLUDE + PATTERN "*.md" EXCLUDE) -add_subdirectory(phtree) -add_subdirectory(examples) +endif () diff --git a/LICENSE b/LICENSE index e46c5961..13cd100a 100644 --- a/LICENSE +++ b/LICENSE @@ -188,6 +188,7 @@ identification within third-party archives. Copyright 2020 Improbable Worlds Limited + Copyright 2022 Tilmann Zäschke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fad24140..0812b418 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,28 @@ -**Note: for updates please also check the [fork](https://github.com/tzaeschke/phtree-cpp) by the original PH-Tree developer.** +**This is a fork of [Improbable's (currently unmaintained) PH-tree](https://github.com/improbable-eng/phtree-cpp)**. + +Multi-dimensional / spatial index with very fast insert/erase/relocate operations and scalability with large datasets. +This library is C++ / header only. + +![Bazel Linux build](https://github.com/tzaeschke/phtree-cpp/actions/workflows/bazel.yml/badge.svg) +![CMake Linux build](https://github.com/tzaeschke/phtree-cpp/actions/workflows/cmake.yml/badge.svg) +![CMake MSBuild 17.3.1](https://github.com/tzaeschke/phtree-cpp/actions/workflows/cmake-windows.yml/badge.svg) +[![codecov](https://codecov.io/gh/tzaeschke/phtree-cpp/branch/master/graph/badge.svg?token=V5XVRQG754)](https://codecov.io/gh/tzaeschke/phtree-cpp) +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) # PH-Tree C++ -The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each dimension is (by default) +The [PH-Tree](https://tzaeschke.github.io/phtree-site/) is an ordered index on an n-dimensional space +(quad-/oct-/2^n-tree) where each dimension is (by default) indexed by a 64bit integer. The index order follows z-order / Morton order. The default implementation is effectively -a 'map', i.e. *each key is associated with at most one value.* +a 'map', i.e. *each key is associated with at most one value.* For convenience there is also a multimap implementations +that supports multiple entries with identical keys. Keys are points or boxes in n-dimensional space. Two strengths of PH-Trees are fast insert/removal operations and scalability with large datasets. It also provides fast window queries and _k_-nearest neighbor queries, and it scales well with higher dimensions. The default implementation is limited to 63 dimensions. -The API ist mostly analogous to STL's `std::map`, see function descriptions for details. +The API ist mostly analogous to STL's `std::map` and `std::multimap`, see function descriptions for details. Theoretical background is listed [here](#theory). @@ -23,45 +34,33 @@ More information about PH-Trees (including a Java implementation) is available [ ### API Usage -[Key Types](#key-types) - -[Basic operations](#basic-operations) - -[Queries](#queries) - -* [for_each](#for-each-example) - -* [Iterators](#iterator-examples) - -* [Filters](#filters) - -* [Distance Functions](#distance-functions) - -[Converters](#converters) - -[Custom Key Types](#custom-key-types) - -[Restrictions](#restrictions) - -[Troubleshooting / FAQ](#troubleshooting-faq) +* [Key Types](#key-types) +* [Basic operations](#basic-operations) +* [Queries](#queries) + * [for_each](#for-each-example) + * [Iterators](#iterator-examples) + * [Filters](#filters) + * [Filters for MultiMaps](#filters-for-multimaps) + * [Distance Functions](#distance-functions) +* [Converters](#converters) +* [Custom Key Types](#custom-key-types) +* [Restrictions](#restrictions) +* [Troubleshooting / FAQ](#troubleshooting-faq) ### Performance -[When to use a PH-Tree](#when-to-use-a-ph-tree) - -[Optimising Performance](#optimising-performance) +* [When to use a PH-Tree](#when-to-use-a-ph-tree) +* [Optimising Performance](#optimizing-performance) ### Compiling / Building -[Build system & dependencies](#build-system-and-dependencies) - -[bazel](#bazel) - -[cmake](#cmake) +* [Build system & dependencies](#build-system-and-dependencies) +* [bazel](#bazel) +* [cmake](#cmake) ## Further Resources -[Theory](#theory) +* [Theory](#theory) ---------------------------------- @@ -71,7 +70,7 @@ More information about PH-Trees (including a Java implementation) is available [ #### Key Types -The **PH-Tree Map** supports out of the box five types: +The **PH-Tree Map** has five predefined tree types: - `PhTreeD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. - `PhTreeF` uses `PhPointF` keys, which are vectors/points of 32 bit `float`. @@ -79,18 +78,20 @@ The **PH-Tree Map** supports out of the box five types: - `PhTreeBoxF` uses `PhBoxF` keys, which consist of two `PhPointF` that define an axis-aligned rectangle/box. - `PhTree` uses `PhPoint` keys, which are vectors/points of `std::int64` -The **PH-Tree MultiMap** supports out of the box three types: +The **PH-Tree MultiMap** has three predefined tree types: - `PhTreeMultiMapD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. +- `PhTreeMultiMapF` uses `PhPointF` keys, which are vectors/points of 32 bit `float`. - `PhTreeMultiMapBoxD` uses `PhBoxD` keys, which consist of two `PhPointD` that define an axis-aligned rectangle/box. +- `PhTreeMultiMapBoxF` uses `PhBoxF` keys, which consist of two `PhPointF` that define an axis-aligned rectangle/box. - `PhTreeMultiMap` uses `PhPoint` keys, which are vectors/points of `std::int64` -Additional tree types can be defined easily analogous to the types above, please refer to the declaration of the tree +Additional key types and tree types can be defined easily analogous to the types above, please refer to the declaration of the types for an example. Support for custom key classes (points and boxes) as well as custom coordinate mappings can be implemented using custom `Converter` classes, see below. The `PhTreeMultiMap` is by default backed by `std::unordered_set` but this can be changed via a template parameter. -The `PhTree` and `PhTreeMultiMap` types are available from `phtree.h` and `phtree_multimap.h`. +The `PhTree` and `PhTreeMultiMap` types are declared in `phtree.h` and `phtree_multimap.h`. @@ -107,8 +108,12 @@ auto tree = PhTreeD<3, MyData>(); PhPointD<3> p{1.1, 1.0, 10.}; // Some operations +tree.relocate(p1, p2); // Move an entry from point 1 to point 2 +tree.relocate_if(p1, p2, predicate); // Conditionally move an entry from point 1 to point 2 tree.emplace(p, my_data); tree.emplace_hint(hint, p, my_data); +tree.try_emplace(p, my_data); +tree.try_emplace(hint, p, my_data); tree.insert(p, my_data); tree[p] = my_data; tree.count(p); @@ -120,7 +125,6 @@ tree.empty(); tree.clear(); // Multi-map only -tree.relocate(p_old, p_new, value); tree.estimate_count(query); ``` @@ -128,9 +132,10 @@ tree.estimate_count(query); #### Queries -* For-each over all elements: `tree.fore_each(callback);` +* For-each over all elements: `tree.for_each(callback);` + **Note that `for_each` tends to be 10%-20% faster than using an iterator.** * Iterator over all elements: `auto iterator = tree.begin();` -* For-each with box shaped window queries: `tree.fore_each(PhBoxD(min, max), callback);` +* For-each with box shaped window queries: `tree.for_each(PhBoxD(min, max), callback);` * Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` * Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point, distance_function);` * Custom query shapes, such as spheres: `tree.for_each(callback, FilterSphere(center, radius, tree.converter()));` @@ -148,7 +153,7 @@ struct Counter { size_t n_ = 0; }; -// Count entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +// Count entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3) Counter callback; tree.for_each({{1, 1, 1}, {3, 3, 3}}, callback); // callback.n_ is now the number of entries in the box. @@ -164,18 +169,18 @@ for (auto it : tree) { ... } -// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +// Iterate over all entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3) for (auto it = tree.begin_query({{1, 1, 1}, {3, 3, 3}}); it != tree.end(); ++it) { ... } // Find 5 nearest neighbors of (1,1,1) -for (auto it = tree.begin_knn_query(5, {1, 1, 1}); it != tree.end(); ++it) { +for (auto it = tree.begin_knn_query(5, {1, 1, 1}, DistanceEuclidean<3>())); it != tree.end(); ++it) { ... } ``` - + ##### Filters @@ -183,7 +188,8 @@ All queries allow specifying an additional filter. The filter is called for ever returned (subject to query constraints) and to every node in the tree that the query decides to traverse (also subject to query constraints). Returning `true` in the filter does not change query behaviour, returning `false` means that the current value or child node is not returned or traversed. An example of a geometric filter can be found -in `phtree/common/filter.h` in `FilterAABB`. +in `phtree/common/filter.h` in `FilterAABB` or `FilterSphere` (for examples with box keys see +`FilterBoxAABB` or `FilterBoxSphere`). ```C++ template @@ -198,13 +204,47 @@ struct FilterByValueId { } }; -// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3). +// Iterate over all entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3). // Return only entries that suffice the filter condition. for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}, FilterByValueId<3, T>())); it != tree.end(); ++it) { ... } ``` +Note: The filter example works only for the 'map' version of the PH-Tree, such as `PhTree`, `PhTreeD`, ... . Filters for +the `PhTreeMultiMap` are discussed in the next section. + + + +#### Filters for MultiMaps + +The `PhTreeMultiMap` requires a different type of filter. In order to function as a multimap, it uses a collections +("buckets") as entries for each occupied coordinate. The buckets allow it to store several values per coordinate. When +using a filter, the PH-Tree will check `IsEntryValid` for every *bucket* (this is different from version 1.x.x where it +called `IsEntryValid` for every entry in a bucket but never for the bucket itself). Since 2.0.0 there is a new function +required in every multimap filter: `IsBucketEntryValid`. It is called once for every entry in a bucket if the bucket +passed `IsEntryValid`. An example of a geometric filter can be found in `phtree/common/filter.h` in `FilterMultiMapAABB` +. + +```C++ +template +struct FilterMultiMapByValueId { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const BucketT& bucket) const { + // Arbitrary example: Only allow keys/buckets with a certain property, e.g. keys that lie within a given sphere. + return check_some_geometric_propert_of_key(key); + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint& key, const T& value) const { + // Arbitrary example: Only allow values with even values of id_ + return value.id_ % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // Allow all nodes + return true; + } +}; +``` + ##### Distance function @@ -250,7 +290,14 @@ double resultung_float = ((double)my_int) / 1000000.; It is obvious that this approach leads to a loss of numerical precision. Moreover, the loss of precision depends on the actual range of the double values and the constant. The chosen constant should probably be as large as possible but small enough such that converted values do not exceed the 64bit limit of `std::int64_t`. Note that the PH-Tree provides -several `ConverterMultiply` implementations for point/box and double/float. +several `ConverterMultiply` implementations for point/box and double/float. For example: + +```C++ +// Multiply converter that multiplies by 1'000'000 (and divides by 1). +auto tree = PhTreeD>(); +``` + +You can also write your own converter. For example: ```C++ template @@ -371,10 +418,10 @@ void test() { **Problem**: The PH-Tree appears to be losing updates/insertions. **Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. The -easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning the -PH-Tree into a multi-map, for example by using something like `std::map` or `std::set` as member type: -`PhTree<3, std::set>`. The `set` instances can then be used to handle key conflicts by storing multiple -entries for the same key. The logic to handle conflicts must currently be implemented manually by the user. +easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning a +`PhTree` into a multi-map, for example by using something like `std::map` or `std::set` as member type: +`PhTree<3, T, CONVERTER, std::set>`. The `set` instances can then be used to handle key conflicts by +storing multiple entries for the same key. The logic to handle conflicts must currently be implemented manually. ---------------------------------- @@ -393,7 +440,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . **Generally, the PH-Tree tends to have the following advantages:** * Fast insertion/removal times. While some indexes, such as *k*-D-trees, trees can be build from scratch very fast, they - tend to be be much slower when removing entries or when indexing large datasets. Also, most indexes require + tend to be much slower when removing entries or when indexing large datasets. Also, most indexes require rebalancing which may result in unpredictable latency (R-trees) or may result in index degradation if delayed (*k*D-trees). @@ -407,7 +454,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . * Scalability with the number of dimensions. The PH-Tree has been shown to deal "well" with high dimensional data ( 1000k+ dimensions). What does "well" mean? * It works very well for up to 30 (sometimes 50) dimensions. **Please note that the C++ implementation has not been - optimised nearly as much as the Java implementation.** + optimized nearly as much as the Java implementation.** * For more dimensions (Java was tested with 1000+ dimensions) the PH-Tree still has excellent insertion/deletion performance. However, the query performance cannot compete with specialised high-dim indexes such as cover-trees or pyramid-trees (these tend to be *very slow* on insertion/deletion though). @@ -426,54 +473,60 @@ heavily on the actual dataset, usage patterns, hardware, ... . * PH-Trees are not very efficient in scenarios where queries tend to return large result sets in the order of 1000 or more. - + -### Optimising Performance +### Optimizing Performance There are numerous ways to improve performance. The following list gives an overview over the possibilities. -1) **Use `for_each` instead of iterators**. This should improve performance of queries by 5%-10%. +1) **Use `-O3 -mavx, -mbmi2` compiler flags**. Ensure that vectorization and count trailing zeros (CTZ/TZCNT) are + enabled. -2) **Use `emplace_hint` if possible**. When updating the position of an entry, the naive way is to use `erase()` - /`emplace()`. With `emplace_hint`, insertion can avoid navigation to the target node if the insertion coordinate is - close to the removal coordinate. - ```c++ - auto iter = tree.find(old_position); - tree.erase(iter); - tree.emplace_hint(iter, new_position, value); - ``` +2) **Use `for_each` instead of iterators**. This should improve performance of queries by 10%-20%. -3) **Store pointers instead of large data objects**. For example, use `PhTree<3, MyLargeClass*>` instead of +3) **Use `relocate()` / `relocate_if()` if possible**. When updating the position of an entry, the naive way is + to use `erase()` / `emplace()`. With `relocate` / `relocate_if()`, insertion can avoid a lot of duplicate + navigation in the tree if the new coordinate is close to the old coordinate. + ```c++ + relocate(old_position, new_position); + relocate_if(old_position, new_position, [](const T& value) { return [true/false]; }); + ``` + The multi-map version relocates all values unless a 'value' is specified to identify the value to be relocated: + ```c++ + relocate(old_position, new_position, value); + ``` + +4) **Store pointers instead of large data objects**. For example, use `PhTree<3, MyLargeClass*>` instead of `PhTree<3, MyLargeClass>` if `MyLargeClass` is large. * This prevents the PH-Tree from storing the values inside the tree. This should improve cache-locality and thus performance when operating on the tree. * Using pointers is also useful if construction/destruction of values is expensive. The reason is that the tree has to construct and destruct objects internally. This may be avoidable but is currently still happening. -4) **Use non-box query shapes**. Depending on the use case it may be more suitable to use a custom filter for queries. +5) **Use non-box query shapes**. Depending on the use case it may be more suitable to use a custom filter for queries. For example: `tree.for_each(callback, FilterSphere(center, radius, tree.converter()));` -5) **Use a different data converter**. The default converter of the PH-Tree results in a reasonably fast index. Its +6) **Use a different data converter**. The default converter of the PH-Tree results in a reasonably fast index. Its biggest advantage is that it provides lossless conversion from floating point coordinates to PH-Tree coordinates (integers) and back to floating point coordinates. * The `ConverterMultiply` is a lossy converter but it tends to improve performance by 10% or more. This is not caused by faster operation in the converter itself but by a more compact tree shape. The example shows how to use a converter that multiplies coordinates by 100'000, thus preserving roughly 5 fractional digits: - `PhTreeD>` + `PhTreeD>()` -6) **Use custom key types**. By default, the PH-Tree accepts only coordinates in the form of its own key types, such +7) **Use custom key types**. By default, the PH-Tree accepts only coordinates in the form of its own key types, such as `PhPointD`, `PhBoxF` or similar. To avoid conversion from custom types to PH-Tree key types, custom classes can often be adapted to be accepted directly by the PH-Tree without conversion. This requires implementing a custom converter as described in the section about [Custom Key Types](#custom-key-types). -7) Advanced: **Adapt internal Node representation**. Depending on the dimensionality `DIM`, the PH-Tree uses internally - in - `Nodes` different container types to hold entries. By default, it uses an array for `DIM<=3`, a vector for `DIM<=8` - and an ordered map for `DIM>8`. Adapting these thresholds can have strong effects on performance as well as memory - usage. One example: Changing the threshold to use vector for `DIM==3` reduced performance of the `update_d` benchmark +8) Advanced: **Adapt internal Node representation**. Depending on the dimensionality `DIM`, the PH-Tree uses + internally in `Nodes` different container types to hold entries. + By default, it uses an array for `DIM<=3`, a vector for `DIM<=8` and an ordered map for `DIM>8`. + Adapting these thresholds can have strong effects on performance as well as memory usage. + One example: Changing the threshold to use vector for `DIM==3` reduced performance of the `update_d` benchmark by 40%-50% but improved performance of `query_d` by 15%-20%. The threshold is currently hardcoded. The effects are not always easy to predict but here are some guidelines: * "array" is the fastest solution for insert/update/remove type operations. Query performance is "ok". Memory @@ -487,67 +540,126 @@ There are numerous ways to improve performance. The following list gives an over ## Compiling the PH-Tree -This section will guide you through the initial build system and IDE you need to go through in order to build and run -custom versions of the PH-Tree on your machine. +The PH-Tree index itself is a *header only* library, it can be used by simply copying everything in the +`include/phtree` folder. +The examples, tests and benchmarks can be build with bazel or cmake. ### Build system & dependencies -PH-Tree can be built with *cmake 3.14* or [Bazel](https://bazel.build) as build system. All code is written in C++ -targeting the C++17 standard. The code has been verified to compile on Linux with Clang 9, 10, 11, 12, and GCC 9, 10, -11, and on Windows with Visual Studio 2019. - -#### Ubuntu Linux - -* Installing [clang](https://apt.llvm.org/) - -* Installing [bazel](https://docs.bazel.build/versions/main/install-ubuntu.html) - -* To install [cmake](https://launchpad.net/~hnakamur/+archive/ubuntu/cmake): - +PH-Tree can be built with [Bazel](https://bazel.build) (primary build system) or with +[cmake](https://cmake.org/) *3.14*. +All code is written in C++ targeting the C++17 standard. +The code has been verified to compile on Linux with Clang 11 and GCC 9, and on Windows with Visual Studio 2019 +(except benchmarks, which don't work with VS). +The PH-tree makes use of vectorization and CountTrailingZeros/CTZ/TZCNT, so suggested compilation options for +clang/gcc are: ``` -sudo add-apt-repository ppa:hnakamur/libarchive -sudo add-apt-repository ppa:hnakamur/libzstd -sudo add-apt-repository ppa:hnakamur/cmake -sudo apt update -sudo apt install cmake +-O3 -mavx -mbmi2 ``` -#### Windows - -To build on Windows, you'll need to have a version of Visual Studio 2019 installed (likely Professional), in addition to -[Bazel](https://docs.bazel.build/versions/master/windows.html) or -[cmake](https://cmake.org/download/). ### Bazel +`WORKSPACE` file: +``` +http_archive( + name = "phtree", + strip_prefix = "phtree-cpp-v1.5.0", + url = "https://github.com/tzaeschke/phtree-cpp", +) +``` +`BUILD` file: +``` +cc_binary( + ... + deps = [ + "@phtree//:phtree", + ], +) +``` Once you have set up your dependencies, you should be able to build the PH-Tree repository by running: - ``` bazel build ... ``` Similarly, you can run all unit tests with: - ``` bazel test ... ``` +Benchmarks: +``` +bazel run //benchmark:update_mm_d_benchmark --config=benchmark -- --benchmark_counters_tabular=true +``` + + -### cmake +### cmake dependency +The library supports three types of cmake dependency management, `FetchContent`, `find_package()` and `add_subfolder()`. +All three approaches are used in [this example project](https://github.com/tzaeschke/test-phtree-cpp-cmake). +#### FetchContent +With `FetchContent_...()`: +``` +include(FetchContent) +FetchContent_Declare( + phtree + GIT_REPOSITORY https://github.com/tzaeschke/phtree-cpp.git + GIT_TAG v1.5.0 +) +FetchContent_MakeAvailable(phtree) +``` + +#### find_package() +You need to build the library with: +``` +mkdir out && cd out +cmake .. -DPHTREE_INSTALL=on +sudo cmake --build . --config Release --target install -- -j +``` +Note that the option `CMAKE_INSTALL_PREFIX:PATH=...` does _not_ work. +The library can then be included with: +``` +find_package(phtree CONFIG REQUIRED) +add_executable(ExampleProject example.cc) +target_link_libraries(ExampleProject phtree::phtree) +``` + +#### add_subfolder() +For this you can simply copy the PH-Tree source code into your project (you can skip `benchmark` and `test`) and +then include the folder with `add_subdirectory(phtree-cpp)`. + +### cmake build +`cmake` uses `ccache` when available. ``` mkdir build cd build cmake .. cmake --build . +``` + +Run example: +``` +cmake .. -DPHTREE_BUILD_EXAMPLES=ON +cmake --build . ./example/Example ``` +Run tests: +``` +cmake .. -DPHTREE_BUILD_TESTS=ON +cmake --build . +ctest +``` +Next to example (`PHTREE_BUILD_EXAMPLES`) there are also tests (`PHTREE_BUILD_TESTS`) and +benchmarks (`PHTREE_BUILD_BENCHMARKS`). To build all, use `PHTREE_BUILD_ALL`. +**Note that the benchmarks currently don't work on Windows.** + ## Further Resources diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 00000000..5d168dc9 --- /dev/null +++ b/TODO.txt @@ -0,0 +1,88 @@ +Ideas that didn't work +====================== +#39 Store nodes flat in Entries. + Some improvement (5-10%), but it doesn work for flat_array_map, because that + is already a "flat" std::array and would cause the whole tree to materialize during compilation time. + Lesson: Try to mak flat_sparse_map "flat" -> see #86 +#88 Using PQ for upper part of WQ. This had absolutely no effect (testing with query_mm_d_benchmark with 100K-10M). + Counting showed that PQ would go 3-5 nodes deep (100K:3, 10M: 5) but that had no effect. + Lesson: Look at WQ initialization, it may be too expensive. Why is WQ traversal so slow??? + + + +Fix const-ness +============== +- operator[] should have a const overload +- find() should have a non-const overload +- test: + +TEST(PhTreeTest, SmokeTestConstTree) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + tree1.emplace(p, Id{2}); + Id id3{3}; + tree1.insert(p, id3); + Id id4{4}; + tree1.insert(p, id4); + const auto& tree = tree1; + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + + +b_plus_tree_map - binary search +=============== +Use custom binary search: + + // return BptEntry* ?!?!? + template + [[nodiscard]] auto lower_bound(key_t key, std::vector& data) noexcept { + return std::lower_bound(data.begin(), data.end(), key, [](E& left, const key_t key) { + return left.first < key; + }); + // auto pos = __lower_bound(&*data_leaf_.begin(), &*data_leaf_.end(), key); + // return data_leaf_.begin() + pos; + } + + template + inline auto __lower_bound(const TT* __first, const TT* __last, key_t __val) const noexcept { + const TT* const_first = __first; + auto __len = __last - __first; + + while (__len > 0) { + auto __half = __len >> 1; + const TT* __middle = __first + __half; + if (__middle->first < __val) { + __first = __middle; + ++__first; + __len = __len - __half - 1; + } else + __len = __half; + } + return __first - const_first; + } + diff --git a/WORKSPACE b/WORKSPACE index 0bd3d32b..4520a3c8 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,19 +1,6 @@ # Bazel bootstrapping -load("//tools/build_rules:http.bzl", "http_archive", "http_file") - -http_archive( - name = "bazel_skylib", - sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", - url = "https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz", -) - -load("@bazel_skylib//lib:versions.bzl", "versions") - -versions.check( - minimum_bazel_version = "4.2.2", - maximum_bazel_version = "4.2.2", -) +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") # NOTE: We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without # having all targets defined in third-party BUILD files in that directory buildable. @@ -34,17 +21,16 @@ http_archive( http_archive( name = "gbenchmark", - sha256 = "dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c", - strip_prefix = "benchmark-1.5.2", - url = "https://github.com/google/benchmark/archive/v1.5.2.tar.gz", + sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4", + strip_prefix = "benchmark-1.6.1", + url = "https://github.com/google/benchmark/archive/v1.6.1.tar.gz", ) http_archive( name = "gtest", - build_file = "@third_party//gtest:BUILD", - sha256 = "9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb", - strip_prefix = "googletest-release-1.10.0", - url = "https://github.com/google/googletest/archive/release-1.10.0.tar.gz", + sha256 = "b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5", + strip_prefix = "googletest-release-1.11.0", + url = "https://github.com/google/googletest/archive/release-1.11.0.tar.gz", ) # Development environment tooling diff --git a/phtree/benchmark/BUILD b/benchmark/BUILD similarity index 56% rename from phtree/benchmark/BUILD rename to benchmark/BUILD index 95315788..c6549ccd 100644 --- a/phtree/benchmark/BUILD +++ b/benchmark/BUILD @@ -3,9 +3,6 @@ package(default_visibility = ["//visibility:private"]) cc_library( name = "benchmark", testonly = True, - srcs = [ - "logging.cc", - ], hdrs = [ "benchmark_util.h", "logging.h", @@ -21,15 +18,80 @@ cc_library( ) cc_binary( - name = "count_mm_d_benchmark", + name = "bpt_insert_benchmark", + testonly = True, + srcs = [ + "bpt_insert_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "//include/phtree/common", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "bpt_erase_benchmark", testonly = True, srcs = [ - "count_mm_d_benchmark.cc", + "bpt_erase_benchmark.cc", ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", + "//include/phtree/common", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "bpt_erase_it_benchmark", + testonly = True, + srcs = [ + "bpt_erase_it_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "//include/phtree/common", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "bpt_lower_bound_benchmark", + testonly = True, + srcs = [ + "bpt_lower_bound_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "//include/phtree/common", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "bpt_iter_benchmark", + testonly = True, + srcs = [ + "bpt_iter_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "//include/phtree/common", "@gbenchmark//:benchmark", "@spdlog", ], @@ -43,8 +105,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -58,8 +120,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -73,8 +135,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -88,8 +150,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -103,8 +165,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -118,8 +180,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -133,8 +195,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -148,8 +210,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -163,8 +225,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -178,8 +240,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -193,8 +255,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -208,8 +270,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -223,8 +285,23 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_mm_d_filter_benchmark", + testonly = True, + srcs = [ + "query_mm_d_filter_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -238,8 +315,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -253,8 +330,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -268,8 +345,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -283,8 +360,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -298,8 +375,68 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_insert_d_benchmark", + testonly = True, + srcs = [ + "hd_insert_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_erase_d_benchmark", + testonly = True, + srcs = [ + "hd_erase_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_query_d_benchmark", + testonly = True, + srcs = [ + "hd_query_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_knn_d_benchmark", + testonly = True, + srcs = [ + "hd_knn_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 00000000..bb75b306 --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,60 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-benchmarks) + +set(BENCHMARK_ENABLE_TESTING OFF) + +include(FetchContent) + +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.7.0 +) +FetchContent_MakeAvailable(googlebenchmark) + +FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/gabime/spdlog.git + GIT_TAG v1.10.0 +) +FetchContent_MakeAvailable(spdlog) + +macro(package_add_benchmark TESTNAME) + add_executable(${TESTNAME} ${ARGN} benchmark_util.h logging.h) + target_link_libraries(${TESTNAME} PRIVATE benchmark::benchmark) + target_link_libraries(${TESTNAME} PRIVATE spdlog::spdlog) + target_link_libraries(${TESTNAME} PRIVATE phtree::phtree) +endmacro() + +add_compile_definitions(RUN_HAVE_STD_REGEX=0 RUN_HAVE_POSIX_REGEX=0 COMPILE_HAVE_GNU_POSIX_REGEX=0) + +package_add_benchmark(bpt_erase_benchmark bpt_erase_benchmark.cc) +package_add_benchmark(bpt_erase_it_benchmark bpt_erase_it_benchmark.cc) +package_add_benchmark(bpt_insert_benchmark bpt_insert_benchmark.cc) +package_add_benchmark(bpt_iter_benchmark bpt_iter_benchmark.cc) +package_add_benchmark(bpt_lower_bound_benchmark bpt_lower_bound_benchmark.cc) + +package_add_benchmark(count_mm_d_benchmark count_mm_d_benchmark.cc) +package_add_benchmark(erase_benchmark erase_benchmark.cc) +package_add_benchmark(erase_d_benchmark erase_d_benchmark.cc) +package_add_benchmark(extent_benchmark extent_benchmark.cc) +package_add_benchmark(extent_benchmark_weird extent_benchmark_weird.cc) +package_add_benchmark(find_benchmark find_benchmark.cc) +package_add_benchmark(hd_erase_d_benchmark hd_erase_d_benchmark.cc) +package_add_benchmark(hd_insert_d_benchmark hd_insert_d_benchmark.cc) +package_add_benchmark(hd_knn_d_benchmark hd_knn_d_benchmark.cc) +package_add_benchmark(hd_query_d_benchmark hd_query_d_benchmark.cc) +package_add_benchmark(insert_benchmark insert_benchmark.cc) +package_add_benchmark(insert_box_d_benchmark insert_box_d_benchmark.cc) +package_add_benchmark(insert_d_benchmark insert_d_benchmark.cc) +package_add_benchmark(knn_d_benchmark knn_d_benchmark.cc) +package_add_benchmark(query_benchmark query_benchmark.cc) +package_add_benchmark(query_box_d_benchmark query_box_d_benchmark.cc) +package_add_benchmark(query_d_benchmark query_d_benchmark.cc) +package_add_benchmark(query_mm_box_d_benchmark query_mm_box_d_benchmark.cc) +package_add_benchmark(query_mm_d_benchmark query_mm_d_benchmark.cc) +package_add_benchmark(query_mm_d_filter_benchmark query_mm_d_filter_benchmark.cc) +package_add_benchmark(update_box_d_benchmark update_box_d_benchmark.cc) +package_add_benchmark(update_d_benchmark update_d_benchmark.cc) +package_add_benchmark(update_mm_box_d_benchmark update_mm_box_d_benchmark.cc) +package_add_benchmark(update_mm_d_benchmark update_mm_d_benchmark.cc) diff --git a/phtree/benchmark/benchmark_util.h b/benchmark/benchmark_util.h similarity index 93% rename from phtree/benchmark/benchmark_util.h rename to benchmark/benchmark_util.h index 5af70367..73069710 100644 --- a/phtree/benchmark/benchmark_util.h +++ b/benchmark/benchmark_util.h @@ -81,7 +81,7 @@ auto CreateDataCLUSTER = [](auto& points, }; auto CreateDuplicates = - [](auto& points, size_t num_unique_entries, size_t num_total_entities, std::uint32_t seed) { + [](auto& points, int num_unique_entries, size_t num_total_entities, std::uint32_t seed) { std::default_random_engine random_engine{seed}; std::uniform_int_distribution<> distribution(0, num_unique_entries); for (size_t i = num_unique_entries; i < num_total_entities; ++i) { @@ -91,7 +91,7 @@ auto CreateDuplicates = }; } // namespace -enum TestGenerator { CUBE, CLUSTER }; +enum TestGenerator { CUBE = 4, CLUSTER = 7 }; template auto CreatePointDataMinMax = [](auto& points, @@ -101,11 +101,13 @@ auto CreatePointDataMinMax = [](auto& points, double world_minimum, double world_maximum, double fraction_of_duplicates) { - auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { p[dim] = value; }; + auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { + p[dim] = static_cast < typename std::remove_reference_t>(value); + }; // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - size_t num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: @@ -140,7 +142,7 @@ auto CreateBoxDataMinMax = [](auto& points, // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: diff --git a/benchmark/bpt_erase_benchmark.cc b/benchmark/bpt_erase_benchmark.cc new file mode 100644 index 00000000..976fc875 --- /dev/null +++ b/benchmark/bpt_erase_benchmark.cc @@ -0,0 +1,185 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_map.h" +#include "phtree/common/b_plus_tree_multimap.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum Scenario { + MAP, + MULTIMAP, + HASH_MAP, + STD_MAP, +}; + +using payload_t = int; +using key_t = uint32_t; + +template +using TestIndex = typename std::conditional_t< + SCENARIO == MAP, + b_plus_tree_map, + typename std::conditional_t< + SCENARIO == MULTIMAP, + b_plus_tree_multimap, + typename std::conditional_t< + SCENARIO == HASH_MAP, + b_plus_tree_hash_map, + std::map>>>; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + using Index = TestIndex; + + public: + explicit IndexBenchmark(benchmark::State& state, double fraction_of_duplicates); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + void Remove(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + const double fraction_of_duplicates_; + + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double fraction_of_duplicates) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, fraction_of_duplicates_(fraction_of_duplicates) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Creating {} entities with DIM={}.", num_entities_, 1); + CreatePointData<1>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, fraction_of_duplicates_); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, Index& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { + // n += tree.erase(points_[i][0]); + // TODO + tree.erase(points_[i][0]); + ++n; + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree3D_MAP_REM(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MM_REM(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MULTIMAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_HM_REM(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_STD_MAP_REM(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, STD_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_MAP_REM, MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MM_REM, MULTIMAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_HM_REM, HASH_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_STD_MAP_REM, STD_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/bpt_erase_it_benchmark.cc b/benchmark/bpt_erase_it_benchmark.cc new file mode 100644 index 00000000..edc05ade --- /dev/null +++ b/benchmark/bpt_erase_it_benchmark.cc @@ -0,0 +1,186 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_map.h" +#include "phtree/common/b_plus_tree_multimap.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum Scenario { + MAP, + MULTIMAP, + HASH_MAP, + STD_MAP, +}; + +using payload_t = int; +using key_t = uint32_t; + +template +using TestIndex = typename std::conditional_t< + SCENARIO == MAP, + b_plus_tree_map, + typename std::conditional_t< + SCENARIO == MULTIMAP, + b_plus_tree_multimap, + typename std::conditional_t< + SCENARIO == HASH_MAP, + b_plus_tree_hash_map, + std::map>>>; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + using Index = TestIndex; + + public: + explicit IndexBenchmark(benchmark::State& state, double fraction_of_duplicates); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + void Remove(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + const double fraction_of_duplicates_; + + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double fraction_of_duplicates) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, fraction_of_duplicates_(fraction_of_duplicates) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Creating {} entities with DIM={}.", num_entities_, 1); + CreatePointData<1>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, fraction_of_duplicates_); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, Index& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { + auto iter = tree.find(points_[i][0]); + if (iter != tree.end()) { + tree.erase(iter); + ++n; + } + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree3D_MAP_REM_IT(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MM_REM_IT(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MULTIMAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_HM_REM_IT(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_STD_MAP_REM_IT(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, STD_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_MAP_REM_IT, MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MM_REM_IT, MULTIMAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_HM_REM_IT, HASH_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_STD_MAP_REM_IT, STD_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/bpt_insert_benchmark.cc b/benchmark/bpt_insert_benchmark.cc new file mode 100644 index 00000000..539e2d90 --- /dev/null +++ b/benchmark/bpt_insert_benchmark.cc @@ -0,0 +1,217 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_map.h" +#include "phtree/common/b_plus_tree_multimap.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum Scenario { + MAP, + MULTIMAP, + HASH_MAP, + STD_MAP, +}; + +using payload_t = int; +using key_t = uint32_t; + +template +using TestIndex = typename std::conditional_t< + SCENARIO == MAP, + b_plus_tree_map, + typename std::conditional_t< + SCENARIO == MULTIMAP, + b_plus_tree_multimap, + typename std::conditional_t< + SCENARIO == HASH_MAP, + b_plus_tree_hash_map, + std::map>>>; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = TestIndex; + + public: + explicit IndexBenchmark(benchmark::State& state, double fraction_of_duplicates); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + const double fraction_of_duplicates_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double fraction_of_duplicates) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, fraction_of_duplicates_(fraction_of_duplicates) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Creating {} entities with DIM={}.", num_entities_, 1); + CreatePointData<1>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, fraction_of_duplicates_); + + state.counters["total_insert_count"] = benchmark::Counter(0); + state.counters["insert_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + switch (TYPE) { + case MAP: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } + break; + } + case MULTIMAP: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } + break; + } + case HASH_MAP: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } + break; + } + } + + state.counters["total_insert_count"] += num_entities_; + state.counters["insert_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree3D_MAP_INS_3(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree4D_MAP_INS_4(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<4, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree6D_MAP_INS_6(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<6, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MAP_INS(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MM_INS(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MULTIMAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_HM_INS(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_STD_MAP_INS(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_MAP_INS_3, MAP, 0.0) + ->RangeMultiplier(2) + ->Ranges({{2, 8}, {TestGenerator::CUBE, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree4D_MAP_INS_4, MAP, 0.0) + ->RangeMultiplier(2) + ->Ranges({{2, 16}, {TestGenerator::CUBE, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D_MAP_INS_6, MAP, 0.0) + ->RangeMultiplier(2) + ->Ranges({{2, 32}, {TestGenerator::CUBE, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MAP_INS, MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MM_INS, MULTIMAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_HM_INS, HASH_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_STD_MAP_INS, STD_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/bpt_iter_benchmark.cc b/benchmark/bpt_iter_benchmark.cc new file mode 100644 index 00000000..fc12a3c1 --- /dev/null +++ b/benchmark/bpt_iter_benchmark.cc @@ -0,0 +1,171 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_map.h" +#include "phtree/common/b_plus_tree_multimap.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum Scenario { + MAP, + MULTIMAP, + HASH_MAP, + STD_MAP, +}; + +using payload_t = int; +using key_t = uint32_t; + +template +using TestIndex = typename std::conditional_t< + SCENARIO == MAP, + b_plus_tree_map, + typename std::conditional_t< + SCENARIO == MULTIMAP, + b_plus_tree_multimap, + typename std::conditional_t< + SCENARIO == HASH_MAP, + b_plus_tree_hash_map, + std::map>>>; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + using Index = TestIndex; + + public: + explicit IndexBenchmark(benchmark::State& state, double fraction_of_duplicates); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state); + + const TestGenerator data_type_; + const size_t num_entities_; + const double fraction_of_duplicates_; + + Index tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double fraction_of_duplicates) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, fraction_of_duplicates_(fraction_of_duplicates) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + QueryWorld(state); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Creating {} entities with DIM={}.", num_entities_, 1); + CreatePointData<1>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, fraction_of_duplicates_); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i][0], (payload_t)i); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state) { + size_t n = 0; + for (auto q = tree_.begin(); q != tree_.end(); ++q) { + ++n; + } + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +} // namespace + +template +void PhTree3D_MAP_ITER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MM_ITER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MULTIMAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_HM_ITER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_STD_MAP_ITER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, STD_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_MAP_ITER, MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MM_ITER, MULTIMAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_HM_ITER, HASH_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_STD_MAP_ITER, STD_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/bpt_lower_bound_benchmark.cc b/benchmark/bpt_lower_bound_benchmark.cc new file mode 100644 index 00000000..9bafa8e6 --- /dev/null +++ b/benchmark/bpt_lower_bound_benchmark.cc @@ -0,0 +1,187 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_map.h" +#include "phtree/common/b_plus_tree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum Scenario { + MAP, + MULTIMAP, + HASH_MAP, + STD_MAP, +}; + +using payload_t = int; +using key_t = uint32_t; + +template +using TestIndex = typename std::conditional_t< + SCENARIO == MAP, + b_plus_tree_map, + typename std::conditional_t< + SCENARIO == MULTIMAP, + b_plus_tree_multimap, + typename std::conditional_t< + SCENARIO == HASH_MAP, + b_plus_tree_hash_map, + std::map>>>; + +/* + * Benchmark for looking up entries by their key. + */ +template +class IndexBenchmark { + using Index = TestIndex; + + public: + explicit IndexBenchmark(benchmark::State& state, double fraction_of_duplicates); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + bool QueryWorld(); + + const TestGenerator data_type_; + const size_t num_entities_; + const double fraction_of_duplicates_; + + Index tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double fraction_of_duplicates) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, fraction_of_duplicates_(fraction_of_duplicates) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + int num_inner = 0; + int num_found = 0; + for (auto _ : state) { + num_found += QueryWorld(); + ++num_inner; + } + + // Moved outside of the loop because EXPENSIVE + state.counters["total_result_count"] += num_found; + state.counters["query_rate"] += num_inner; + state.counters["result_rate"] += num_found; + state.counters["avg_result_count"] += num_found; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Creating {} entities with DIM={}.", num_entities_, 1); + CreatePointData<1>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, fraction_of_duplicates_); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i][0], (payload_t)i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +bool IndexBenchmark::QueryWorld() { + static int pos = 0; + pos = (pos + 1) % num_entities_; + bool found = true; + if (pos % 2 == 0) { + // This should always be a match + auto iter = tree_.lower_bound(points_.at(pos)[0]); + found = iter != tree_.end() && iter->second == pos; + } else { + // This should rarely be a match + payload_t x = pos % GLOBAL_MAX; + found = tree_.find(x) != tree_.end(); + } + return found; +} + +} // namespace + +template +void PhTree3D_MAP_LOWER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MM_LOWER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MULTIMAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_HM_LOWER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, HASH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_STD_MAP_LOWER(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, STD_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_MAP_LOWER, MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MM_LOWER, MULTIMAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_HM_LOWER, HASH_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_STD_MAP_LOWER, STD_MAP, 0.0) + ->RangeMultiplier(10) + ->Ranges({{100, 100 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/bpt_results.txt b/benchmark/bpt_results.txt new file mode 100644 index 00000000..99d536d7 --- /dev/null +++ b/benchmark/bpt_results.txt @@ -0,0 +1,139 @@ +----------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations insert_rate total_insert_count +----------------------------------------------------------------------------------------------------------- +PhTree3D_MAP_INS/MAP/100/7 0.001 ms 0.001 ms 785258 113.224M/s 78.5258M +PhTree3D_MAP_INS/MAP/1000/7 0.035 ms 0.035 ms 20049 28.6738M/s 20.049M +PhTree3D_MAP_INS/MAP/10000/7 0.805 ms 0.804 ms 867 12.4313M/s 8.67M +PhTree3D_MAP_INS/MAP/100000/7 13.6 ms 13.6 ms 52 7.35575M/s 5.2M +PhTree3D_MAP_INS/MAP/100/4 0.004 ms 0.004 ms 171983 24.7352M/s 17.1983M +PhTree3D_MAP_INS/MAP/1000/4 0.105 ms 0.105 ms 6658 9.54912M/s 6.658M +PhTree3D_MAP_INS/MAP/10000/4 2.07 ms 2.06 ms 334 4.84615M/s 3.34M +PhTree3D_MAP_INS/MAP/100000/4 25.4 ms 25.4 ms 28 3.93662M/s 2.8M +PhTree3D_MM_INS/MULTIMAP/100/7 0.002 ms 0.002 ms 352242 50.795M/s 35.2242M +PhTree3D_MM_INS/MULTIMAP/1000/7 0.041 ms 0.041 ms 16949 24.3583M/s 16.949M +PhTree3D_MM_INS/MULTIMAP/10000/7 0.591 ms 0.590 ms 1178 16.9395M/s 11.78M +PhTree3D_MM_INS/MULTIMAP/100000/7 6.98 ms 6.97 ms 100 14.3413M/s 10M +PhTree3D_MM_INS/MULTIMAP/100/4 0.002 ms 0.002 ms 356972 50.9751M/s 35.6972M +PhTree3D_MM_INS/MULTIMAP/1000/4 0.048 ms 0.048 ms 14495 20.6886M/s 14.495M +PhTree3D_MM_INS/MULTIMAP/10000/4 0.822 ms 0.822 ms 853 12.1727M/s 8.53M +PhTree3D_MM_INS/MULTIMAP/100000/4 12.7 ms 12.7 ms 55 7.85215M/s 5.5M +PhTree3D_HM_INS/HASH_MAP/100/7 0.001 ms 0.001 ms 724308 104.457M/s 72.4308M +PhTree3D_HM_INS/HASH_MAP/1000/7 0.015 ms 0.015 ms 45433 65.3682M/s 45.433M +PhTree3D_HM_INS/HASH_MAP/10000/7 0.251 ms 0.251 ms 2796 39.8325M/s 27.96M +PhTree3D_HM_INS/HASH_MAP/100000/7 3.01 ms 3.01 ms 232 33.2264M/s 23.2M +PhTree3D_HM_INS/HASH_MAP/100/4 0.002 ms 0.002 ms 316381 45.3375M/s 31.6381M +PhTree3D_HM_INS/HASH_MAP/1000/4 0.059 ms 0.059 ms 11810 16.903M/s 11.81M +PhTree3D_HM_INS/HASH_MAP/10000/4 0.824 ms 0.824 ms 845 12.141M/s 8.45M +PhTree3D_HM_INS/HASH_MAP/100000/4 8.51 ms 8.50 ms 83 11.7665M/s 8.3M + +----------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations remove_rate total_remove_count +----------------------------------------------------------------------------------------------------------- +PhTree3D_MAP_REM/MAP/100/7 0.001 ms 0.001 ms 1134213 161.146M/s 113.421M +PhTree3D_MAP_REM/MAP/1000/7 0.012 ms 0.012 ms 56164 80.3349M/s 56.164M +PhTree3D_MAP_REM/MAP/10000/7 0.282 ms 0.282 ms 2484 35.4535M/s 24.84M +PhTree3D_MAP_REM/MAP/100000/7 3.95 ms 3.95 ms 177 25.3126M/s 17.7M +PhTree3D_MAP_REM/MAP/100/4 0.004 ms 0.004 ms 194002 27.839M/s 19.4002M +PhTree3D_MAP_REM/MAP/1000/4 0.107 ms 0.107 ms 6534 9.32138M/s 6.534M +PhTree3D_MAP_REM/MAP/10000/4 1.73 ms 1.73 ms 414 5.77979M/s 4.14M +PhTree3D_MAP_REM/MAP/100000/4 7.26 ms 7.26 ms 98 13.7833M/s 9.8M +PhTree3D_MM_REM/MULTIMAP/100/7 0.002 ms 0.002 ms 418182 59.7288M/s 41.8182M +PhTree3D_MM_REM/MULTIMAP/1000/7 0.037 ms 0.037 ms 18841 27.0127M/s 18.841M +PhTree3D_MM_REM/MULTIMAP/10000/7 0.602 ms 0.602 ms 1160 16.6031M/s 11.6M +PhTree3D_MM_REM/MULTIMAP/100000/7 7.52 ms 7.52 ms 92 13.2971M/s 9.2M +PhTree3D_MM_REM/MULTIMAP/100/4 0.004 ms 0.004 ms 182417 26.114M/s 18.2417M +PhTree3D_MM_REM/MULTIMAP/1000/4 0.091 ms 0.092 ms 7628 10.9284M/s 7.628M +PhTree3D_MM_REM/MULTIMAP/10000/4 1.21 ms 1.21 ms 575 8.23784M/s 5.75M +PhTree3D_MM_REM/MULTIMAP/100000/4 8.71 ms 8.71 ms 80 11.4819M/s 8M +PhTree3D_HM_REM/HASH_MAP/100/7 0.001 ms 0.001 ms 936039 132.9M/s 93.6039M +PhTree3D_HM_REM/HASH_MAP/1000/7 0.007 ms 0.007 ms 96077 137.52M/s 96.077M +PhTree3D_HM_REM/HASH_MAP/10000/7 0.150 ms 0.150 ms 4654 66.8036M/s 46.54M +PhTree3D_HM_REM/HASH_MAP/100000/7 1.80 ms 1.80 ms 388 55.4664M/s 38.8M +PhTree3D_HM_REM/HASH_MAP/100/4 0.002 ms 0.002 ms 348380 50.0444M/s 34.838M +PhTree3D_HM_REM/HASH_MAP/1000/4 0.066 ms 0.066 ms 10532 15.1587M/s 10.532M +PhTree3D_HM_REM/HASH_MAP/10000/4 0.814 ms 0.814 ms 859 12.2878M/s 8.59M +PhTree3D_HM_REM/HASH_MAP/100000/4 3.92 ms 3.92 ms 179 25.4785M/s 17.9M + +-------------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations remove_rate total_remove_count +-------------------------------------------------------------------------------------------------------------- +PhTree3D_MAP_REM_IT/MAP/100/7 0.001 ms 0.001 ms 1109414 11.0697M/s 7.7659M +PhTree3D_MAP_REM_IT/MAP/1000/7 0.012 ms 0.012 ms 60138 5.31403M/s 3.72856M +PhTree3D_MAP_REM_IT/MAP/10000/7 0.274 ms 0.273 ms 2559 2.13396M/s 1.4919M +PhTree3D_MAP_REM_IT/MAP/100000/7 3.93 ms 3.92 ms 180 1.14716M/s 809.82k +PhTree3D_MAP_REM_IT/MAP/100/4 0.004 ms 0.004 ms 194959 28.1196M/s 19.3009M +PhTree3D_MAP_REM_IT/MAP/1000/4 0.108 ms 0.108 ms 6561 8.85178M/s 6.25919M +PhTree3D_MAP_REM_IT/MAP/10000/4 1.73 ms 1.73 ms 406 3.65339M/s 2.56511M +PhTree3D_MAP_REM_IT/MAP/100000/4 7.10 ms 7.08 ms 101 1.41156M/s 1010k +PhTree3D_MM_REM_IT/MULTIMAP/100/7 0.002 ms 0.002 ms 342783 48.476M/s 34.2783M +PhTree3D_MM_REM_IT/MULTIMAP/1000/7 0.050 ms 0.050 ms 14134 20.0733M/s 14.134M +PhTree3D_MM_REM_IT/MULTIMAP/10000/7 0.645 ms 0.644 ms 1068 15.5355M/s 10.68M +PhTree3D_MM_REM_IT/MULTIMAP/100000/7 7.33 ms 7.31 ms 95 13.6752M/s 9.5M +PhTree3D_MM_REM_IT/MULTIMAP/100/4 0.002 ms 0.002 ms 358777 49.0992M/s 35.8777M +PhTree3D_MM_REM_IT/MULTIMAP/1000/4 0.060 ms 0.059 ms 11738 16.8332M/s 11.738M +PhTree3D_MM_REM_IT/MULTIMAP/10000/4 0.893 ms 0.892 ms 785 11.2159M/s 7.85M +PhTree3D_MM_REM_IT/MULTIMAP/100000/4 14.0 ms 14.0 ms 50 7.16646M/s 5M +PhTree3D_HM_REM_IT/HASH_MAP/100/7 0.001 ms 0.001 ms 1048725 10.6901M/s 7.34108M +PhTree3D_HM_REM_IT/HASH_MAP/1000/7 0.007 ms 0.007 ms 107111 9.49133M/s 6.64088M +PhTree3D_HM_REM_IT/HASH_MAP/10000/7 0.134 ms 0.134 ms 5246 4.36381M/s 3.05842M +PhTree3D_HM_REM_IT/HASH_MAP/100000/7 1.61 ms 1.61 ms 436 2.79811M/s 1.96156M +PhTree3D_HM_REM_IT/HASH_MAP/100/4 0.002 ms 0.002 ms 347320 49.8532M/s 34.3847M +PhTree3D_HM_REM_IT/HASH_MAP/1000/4 0.064 ms 0.064 ms 10810 15.0175M/s 10.3127M +PhTree3D_HM_REM_IT/HASH_MAP/10000/4 0.770 ms 0.769 ms 907 8.21894M/s 5.73043M +PhTree3D_HM_REM_IT/HASH_MAP/100000/4 3.67 ms 3.67 ms 190 2.72647M/s 1.9M + +----------------------------------------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations avg_result_count query_rate result_rate total_result_count +----------------------------------------------------------------------------------------------------------------------------------------- +PhTree3D_MAP_LOWER/MAP/100/7 0.000 ms 0.000 ms 108527761 0.03 154.585M/s 4.63756M/s 3.25583M +PhTree3D_MAP_LOWER/MAP/1000/7 0.000 ms 0.000 ms 24656734 0.035 35.2453M/s 1.23359M/s 862.985k +PhTree3D_MAP_LOWER/MAP/10000/7 0.000 ms 0.000 ms 11105985 0.0597996 15.9078M/s 951.278k/s 664.133k +PhTree3D_MAP_LOWER/MAP/100000/7 0.000 ms 0.000 ms 7059850 0.248968 9.76046M/s 2.43004M/s 1.75768M +PhTree3D_MAP_LOWER/MAP/100/4 0.000 ms 0.000 ms 42177752 0.5 60.1509M/s 30.0755M/s 21.0889M +PhTree3D_MAP_LOWER/MAP/1000/4 0.000 ms 0.000 ms 12767100 0.507 18.4261M/s 9.34202M/s 6.47292M +PhTree3D_MAP_LOWER/MAP/10000/4 0.000 ms 0.000 ms 5861927 0.632024 8.38577M/s 5.30001M/s 3.70488M +PhTree3D_MAP_LOWER/MAP/100000/4 0.000 ms 0.000 ms 5154060 0.549654 7.31538M/s 4.02092M/s 2.83295M +PhTree3D_MM_LOWER/MULTIMAP/100/7 0.000 ms 0.000 ms 61857402 0.03 88.7629M/s 2.66289M/s 1.85572M +PhTree3D_MM_LOWER/MULTIMAP/1000/7 0.000 ms 0.000 ms 43391899 0.031 61.9978M/s 1.92193M/s 1.34515M +PhTree3D_MM_LOWER/MULTIMAP/10000/7 0.000 ms 0.000 ms 25851325 0.0556007 37.9624M/s 2.11073M/s 1.43735M +PhTree3D_MM_LOWER/MULTIMAP/100000/7 0.000 ms 0.000 ms 14594151 0.244103 21.1417M/s 5.16075M/s 3.56247M +PhTree3D_MM_LOWER/MULTIMAP/100/4 0.000 ms 0.000 ms 75162827 0.5 107.597M/s 53.7983M/s 37.5814M +PhTree3D_MM_LOWER/MULTIMAP/1000/4 0.000 ms 0.000 ms 35007278 0.515 50.1738M/s 25.8395M/s 18.0287M +PhTree3D_MM_LOWER/MULTIMAP/10000/4 0.000 ms 0.000 ms 13386620 0.632095 19.0946M/s 12.0696M/s 8.46162M +PhTree3D_MM_LOWER/MULTIMAP/100000/4 0.000 ms 0.000 ms 7956589 0.549539 11.2079M/s 6.15921M/s 4.37246M +PhTree3D_HM_LOWER/HASH_MAP/100/7 0.000 ms 0.000 ms 101800469 0.03 146.587M/s 4.39759M/s 3.05401M +PhTree3D_HM_LOWER/HASH_MAP/1000/7 0.000 ms 0.000 ms 52684519 0.035 76.3512M/s 2.67229M/s 1.84396M +PhTree3D_HM_LOWER/HASH_MAP/10000/7 0.000 ms 0.000 ms 34751990 0.0598003 49.6004M/s 2.96612M/s 2.07818M +PhTree3D_HM_LOWER/HASH_MAP/100000/7 0.000 ms 0.000 ms 21139383 0.248973 30.1455M/s 7.50542M/s 5.26314M +PhTree3D_HM_LOWER/HASH_MAP/100/4 0.000 ms 0.000 ms 79877507 0.5 114.73M/s 57.3652M/s 39.9388M +PhTree3D_HM_LOWER/HASH_MAP/1000/4 0.000 ms 0.000 ms 27626562 0.507 39.72M/s 20.1381M/s 14.0067M +PhTree3D_HM_LOWER/HASH_MAP/10000/4 0.000 ms 0.000 ms 12454253 0.632008 17.8635M/s 11.2899M/s 7.87119M +PhTree3D_HM_LOWER/HASH_MAP/100000/4 0.000 ms 0.000 ms 11540737 0.549849 16.5351M/s 9.09181M/s 6.34567M + +--------------------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations avg_result_count query_rate result_rate +--------------------------------------------------------------------------------------------------------------------- +PhTree3D_MAP_ITER/MAP/100/7 0.000 ms 0.000 ms 11384242 7 16.2563M/s 113.794M/s +PhTree3D_MAP_ITER/MAP/1000/7 0.000 ms 0.000 ms 7744321 62 10.8926M/s 675.34M/s +PhTree3D_MAP_ITER/MAP/10000/7 0.000 ms 0.000 ms 1883198 583 2.67577M/s 1.55997G/s +PhTree3D_MAP_ITER/MAP/100000/7 0.007 ms 0.007 ms 99600 4.499k 137.937k/s 620.577M/s +PhTree3D_MAP_ITER/MAP/100/4 0.000 ms 0.000 ms 6536021 99 9.43801M/s 934.363M/s +PhTree3D_MAP_ITER/MAP/1000/4 0.001 ms 0.001 ms 1000000 954 1.94032M/s 1.85107G/s +PhTree3D_MAP_ITER/MAP/10000/4 0.011 ms 0.011 ms 63214 6.318k 90.7464k/s 573.336M/s +PhTree3D_MAP_ITER/MAP/100000/4 0.024 ms 0.024 ms 30684 10k 42.095k/s 420.95M/s +PhTree3D_MM_ITER/MULTIMAP/100/7 0.000 ms 0.000 ms 7277771 100 9.63493M/s 963.493M/s +PhTree3D_MM_ITER/MULTIMAP/1000/7 0.001 ms 0.001 ms 1236789 1000 1.78368M/s 1.78368G/s +PhTree3D_MM_ITER/MULTIMAP/10000/7 0.008 ms 0.008 ms 88764 10k 128.154k/s 1.28154G/s +PhTree3D_MM_ITER/MULTIMAP/100000/7 0.128 ms 0.128 ms 5518 100k 7.84115k/s 784.115M/s +PhTree3D_MM_ITER/MULTIMAP/100/4 0.000 ms 0.000 ms 7093222 100 9.96576M/s 996.576M/s +PhTree3D_MM_ITER/MULTIMAP/1000/4 0.001 ms 0.001 ms 1281482 1000 1.84305M/s 1.84305G/s +PhTree3D_MM_ITER/MULTIMAP/10000/4 0.009 ms 0.009 ms 81319 10k 116.382k/s 1.16382G/s +PhTree3D_MM_ITER/MULTIMAP/100000/4 0.164 ms 0.164 ms 4277 100k 6.08844k/s 608.844M/s +PhTree3D_HM_ITER/HASH_MAP/100/7 0.000 ms 0.000 ms 11617119 7 16.3329M/s 114.33M/s +PhTree3D_HM_ITER/HASH_MAP/1000/7 0.000 ms 0.000 ms 8866499 62 12.3994M/s 768.762M/s +PhTree3D_HM_ITER/HASH_MAP/10000/7 0.000 ms 0.000 ms 1901829 583 2.79653M/s 1.63038G/s +PhTree3D_HM_ITER/HASH_MAP/100000/7 0.004 ms 0.004 ms 166174 4.499k 238.732k/s 1074.06M/s +PhTree3D_HM_ITER/HASH_MAP/100/4 0.000 ms 0.000 ms 7324969 99 10.155M/s 1005.34M/s +PhTree3D_HM_ITER/HASH_MAP/1000/4 0.001 ms 0.001 ms 1104930 954 1.63454M/s 1.55935G/s +PhTree3D_HM_ITER/HASH_MAP/10000/4 0.006 ms 0.006 ms 110415 6.318k 159.513k/s 1007.8M/s +PhTree3D_HM_ITER/HASH_MAP/100000/4 0.011 ms 0.011 ms 63458 10k 91.0623k/s 910.623M/s diff --git a/phtree/benchmark/count_mm_d_benchmark.cc b/benchmark/count_mm_d_benchmark.cc similarity index 95% rename from phtree/benchmark/count_mm_d_benchmark.cc rename to benchmark/count_mm_d_benchmark.cc index b05987bd..a2840cb7 100644 --- a/phtree/benchmark/count_mm_d_benchmark.cc +++ b/benchmark/count_mm_d_benchmark.cc @@ -67,7 +67,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -165,11 +165,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - query.box.min()[d] = s - radius; - query.box.max()[d] = s + radius; + auto s = shift * cube_distribution_(random_engine_); + query.box.min()[d] = s; + query.box.max()[d] = s + length; } } diff --git a/phtree/benchmark/erase_benchmark.cc b/benchmark/erase_benchmark.cc similarity index 89% rename from phtree/benchmark/erase_benchmark.cc rename to benchmark/erase_benchmark.cc index 1e59a6d2..99881ff4 100644 --- a/phtree/benchmark/erase_benchmark.cc +++ b/benchmark/erase_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTree& tree); - void Remove(benchmark::State& state, PhTree& tree); + void Insert(benchmark::State& state, PhTree& tree); + void Remove(benchmark::State& state, PhTree& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_int_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTree(); + auto* tree = new PhTree(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); +void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/erase_d_benchmark.cc b/benchmark/erase_d_benchmark.cc similarity index 90% rename from phtree/benchmark/erase_d_benchmark.cc rename to benchmark/erase_d_benchmark.cc index a544a4e0..0500f88c 100644 --- a/phtree/benchmark/erase_d_benchmark.cc +++ b/benchmark/erase_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); - void Remove(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_real_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new PhTreeD(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (payload_t i = 0; i < num_entities_; ++i) { tree.emplace(points_[i], i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/extent_benchmark.cc b/benchmark/extent_benchmark.cc similarity index 96% rename from phtree/benchmark/extent_benchmark.cc rename to benchmark/extent_benchmark.cc index 760a5749..85dba744 100644 --- a/phtree/benchmark/extent_benchmark.cc +++ b/benchmark/extent_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -42,7 +42,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -73,8 +73,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/extent_benchmark_weird.cc b/benchmark/extent_benchmark_weird.cc similarity index 98% rename from phtree/benchmark/extent_benchmark_weird.cc rename to benchmark/extent_benchmark_weird.cc index bee6ecb0..7653bece 100644 --- a/phtree/benchmark/extent_benchmark_weird.cc +++ b/benchmark/extent_benchmark_weird.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -48,7 +48,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -81,8 +81,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/benchmark/find_benchmark.cc b/benchmark/find_benchmark.cc new file mode 100644 index 00000000..132a3236 --- /dev/null +++ b/benchmark/find_benchmark.cc @@ -0,0 +1,173 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum QueryType { + FIND, + COUNT, +}; + +/* + * Benchmark for looking up entries by their key. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double dummy); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + int QueryWorldCount(); + int QueryWorldFind(); + + const TestGenerator data_type_; + const size_t num_entities_; + const QueryType query_type_; + + PhTree tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, query_type_(QUERY_TYPE) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + int num_inner = 0; + int num_found = 0; + switch (query_type_) { + case COUNT: { + for (auto _ : state) { + num_found += QueryWorldCount(); + ++num_inner; + } + break; + } + case FIND: { + for (auto _ : state) { + num_found += QueryWorldFind(); + ++num_inner; + } + break; + } + } + // Moved outside of the loop because EXPENSIVE + state.counters["total_result_count"] += num_found; + state.counters["query_rate"] += num_inner; + state.counters["result_rate"] += num_found; + state.counters["avg_result_count"] += num_found; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +int IndexBenchmark::QueryWorldCount() { + static int pos = 0; + pos = (pos + 1) % num_entities_; + bool found; + if (pos % 2 == 0) { + found = tree_.count(points_.at(pos)); + assert(found); + } else { + int x = pos % GLOBAL_MAX; + PhPoint p = PhPoint({x, x, x}); + found = tree_.count(p); + } + return found; +} + +template +int IndexBenchmark::QueryWorldFind() { + static int pos = 0; + pos = (pos + 1) % num_entities_; + bool found; + if (pos % 2 == 0) { + // This should always be a match + found = tree_.find(points_.at(pos)) != tree_.end(); + assert(found); + } else { + // This should rarely be a match + int x = pos % GLOBAL_MAX; + PhPoint p = PhPoint({x, x, x}); + found = tree_.find(p) != tree_.end(); + } + return found; +} + +} // namespace + +template +void PhTree3DCount(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::COUNT> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DFind(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::FIND> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3DCount, COUNT, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DFind, FIND, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_erase_d_benchmark.cc b/benchmark/hd_erase_d_benchmark.cc new file mode 100644 index 00000000..f2650c12 --- /dev/null +++ b/benchmark/hd_erase_d_benchmark.cc @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_insert_d_benchmark.cc b/benchmark/hd_insert_d_benchmark.cc new file mode 100644 index 00000000..b2f8d9c7 --- /dev/null +++ b/benchmark/hd_insert_d_benchmark.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = PhTreeD; + + public: + explicit IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + PhPointD& p = points_[i]; + tree.emplace(p, (int)i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_knn_d_benchmark.cc b/benchmark/hd_knn_d_benchmark.cc new file mode 100644 index 00000000..44ecad2a --- /dev/null +++ b/benchmark/hd_knn_d_benchmark.cc @@ -0,0 +1,152 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for k-nearest-neighbour queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, PhPointD& center); + void CreateQuery(PhPointD& center); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t knn_result_size_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(2))} +, num_entities_(state.range(0)) +, knn_result_size_(state.range(1)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_query_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { + size_t n = 0; + for (auto q = tree_.begin_knn_query(knn_result_size_, center, DistanceEuclidean()); + q != tree_.end(); + ++q) { + ++n; + } + + state.counters["total_query_count"] += 1; + state.counters["query_rate"] += 1; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_d_benchmark.cc b/benchmark/hd_query_d_benchmark.cc similarity index 62% rename from phtree/benchmark/query_d_benchmark.cc rename to benchmark/hd_query_d_benchmark.cc index 57fd2268..25f268ce 100644 --- a/phtree/benchmark/query_d_benchmark.cc +++ b/benchmark/hd_query_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -44,27 +44,20 @@ using TreeType = PhTreeD; template class IndexBenchmark { public: - IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size_ = 100); - + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, BoxType& query_box); - void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + constexpr int query_edge_length() { + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -75,17 +68,14 @@ class IndexBenchmark { template IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size) -: data_type_{data_type} -, num_entities_(num_entities) + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , avg_query_result_size_(avg_query_result_size) , tree_{} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities) { +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -106,8 +96,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -145,7 +135,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); @@ -163,7 +153,7 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); + int length = query_edge_length(); // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { @@ -177,71 +167,70 @@ void IndexBenchmark::CreateQuery(BoxType& query_box) { } // namespace template -void PhTree3D_MMI(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, MIN_MAX_ITER> benchmark{state, arguments...}; +void PhTree6D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_FOR_EACH> benchmark{state}; benchmark.Benchmark(state); } template -void PhTree3D_MMFE(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, MIN_MAX_FOR_EACH> benchmark{state, arguments...}; +void PhTree10D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_FOR_EACH> benchmark{state}; benchmark.Benchmark(state); } -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); +template +void PhTree20D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); +template +void PhTree6D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); +template +void PhTree10D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); +template +void PhTree20D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} // index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) +BENCHMARK_CAPTURE(PhTree6D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) +BENCHMARK_CAPTURE(PhTree10D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) +BENCHMARK_CAPTURE(PhTree20D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) +BENCHMARK_CAPTURE(PhTree6D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +BENCHMARK_CAPTURE(PhTree10D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +BENCHMARK_CAPTURE(PhTree20D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/benchmark/insert_benchmark.cc b/benchmark/insert_benchmark.cc new file mode 100644 index 00000000..89c79ba1 --- /dev/null +++ b/benchmark/insert_benchmark.cc @@ -0,0 +1,156 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum InsertionType { + INSERT, + EMPLACE, + SQUARE_BR, +}; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = PhTree; + + public: + explicit IndexBenchmark(benchmark::State& state); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + switch (TYPE) { + case INSERT: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.insert(points_[i], (int)i); + } + break; + } + case EMPLACE: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); + } + break; + } + case SQUARE_BR: { + for (size_t i = 0; i < num_entities_; ++i) { + tree[points_[i]] = (int)i; + } + break; + } + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree3D_INS(benchmark::State& state, Arguments&&...) { + IndexBenchmark<3, INSERT> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_EMP(benchmark::State& state, Arguments&&...) { + IndexBenchmark<3, EMPLACE> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_SQB(benchmark::State& state, Arguments&&...) { + IndexBenchmark<3, SQUARE_BR> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +BENCHMARK_CAPTURE(PhTree3D_INS, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_EMP, EMPLACE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_SQB, SQUARE_BR, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_box_d_benchmark.cc b/benchmark/insert_box_d_benchmark.cc similarity index 57% rename from phtree/benchmark/insert_box_d_benchmark.cc rename to benchmark/insert_box_d_benchmark.cc index 817e848d..66e7b83c 100644 --- a/phtree/benchmark/insert_box_d_benchmark.cc +++ b/benchmark/insert_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -32,25 +32,28 @@ const double BOX_LEN = 10; */ template class IndexBenchmark { + using Index = PhTreeBoxD; + public: - IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + explicit IndexBenchmark(benchmark::State& state); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeBoxD& tree); + void Insert(benchmark::State& state, Index& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> boxes_; }; template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities) -: data_type_{data_type}, num_entities_(num_entities), boxes_(num_entities) { +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, boxes_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -59,7 +62,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeBoxD(); + auto* tree = new Index(); state.ResumeTiming(); Insert(state, *tree); @@ -83,10 +86,10 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State& state, PhTreeBoxD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { PhBoxD& p = boxes_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; @@ -96,37 +99,15 @@ void IndexBenchmark::Insert(benchmark::State& state, PhTreeBoxD& } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<3> benchmark{state}; benchmark.Benchmark(state); } // index type, scenario name, data_generator, num_entities -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_10M, TestGenerator::CLUSTER, 10000000) +BENCHMARK_CAPTURE(PhTree3D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_d_benchmark.cc b/benchmark/insert_d_benchmark.cc similarity index 50% rename from phtree/benchmark/insert_d_benchmark.cc rename to benchmark/insert_d_benchmark.cc index 7ef06a36..eccce1cf 100644 --- a/phtree/benchmark/insert_d_benchmark.cc +++ b/benchmark/insert_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -26,30 +26,39 @@ namespace { const double GLOBAL_MAX = 10000; +enum InsertionType { + INSERT, + EMPLACE, + SQUARE_BR, +}; + /* * Benchmark for adding entries to the index. */ template class IndexBenchmark { + using Index = PhTreeD; + public: - IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + explicit IndexBenchmark(benchmark::State& state); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, Index& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> points_; }; template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities) -: data_type_{data_type}, num_entities_(num_entities), points_(num_entities) { +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -58,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new Index(); state.ResumeTiming(); Insert(state, *tree); @@ -82,10 +91,10 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { PhPointD& p = points_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; @@ -95,73 +104,53 @@ void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tre } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<3> benchmark{state}; benchmark.Benchmark(state); } template -void PhTree6D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<6> benchmark{state, arguments...}; +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; benchmark.Benchmark(state); } template -void PhTree10D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<10> benchmark{state, arguments...}; +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; benchmark.Benchmark(state); } template -void PhTree20D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<20> benchmark{state, arguments...}; +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; benchmark.Benchmark(state); } // index type, scenario name, data_generator, num_entities -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CL_10M, TestGenerator::CLUSTER, 10000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree6D, INS_CL_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree6D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree10D, INS_CL_100K, TestGenerator::CLUSTER, 100000) +// BENCHMARK_CAPTURE(PhTree3D, INSERT, 0) +//->RangeMultiplier(10) +//->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) +//->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 10 * 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree10D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) +BENCHMARK_CAPTURE(PhTree6D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{100 * 1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree20D, INS_CL_100K, TestGenerator::CLUSTER, 100000) +BENCHMARK_CAPTURE(PhTree10D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{100 * 1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree20D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) +BENCHMARK_CAPTURE(PhTree20D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{100 * 1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/knn_d_benchmark.cc b/benchmark/knn_d_benchmark.cc similarity index 96% rename from phtree/benchmark/knn_d_benchmark.cc rename to benchmark/knn_d_benchmark.cc index 7c56b852..dcf5abf1 100644 --- a/phtree/benchmark/knn_d_benchmark.cc +++ b/benchmark/knn_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -44,8 +44,8 @@ class IndexBenchmark { void CreateQuery(PhPointD& center); const TestGenerator data_type_; - const int num_entities_; - const double knn_result_size_; + const size_t num_entities_; + const size_t knn_result_size_; PhTreeD tree_; std::default_random_engine random_engine_; @@ -82,8 +82,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/logging.h b/benchmark/logging.h similarity index 75% rename from phtree/benchmark/logging.h rename to benchmark/logging.h index 14b7ae68..64573099 100644 --- a/phtree/benchmark/logging.h +++ b/benchmark/logging.h @@ -22,11 +22,34 @@ constexpr auto kInternalLoggerName = "internal"; // Sets up spdlog for internal and external. If you need to do some logging before doing this // call, use instead CaptureLogMessagesToBufferSink()/SetupLoggingAndFlushBuffer. -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level); +void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { + auto& console_sink = sinks.emplace_back(std::make_shared()); + console_sink->set_level(log_level); + + // Find the minimum log level, in case one of the sinks passed to us has a lower log level. + const auto& sink_with_lowest_log_level = *std::min_element( + sinks.begin(), + sinks.end(), + [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { + return a->level() < b->level(); + }); + spdlog::level::level_enum min_log_level = + std::min(sink_with_lowest_log_level->level(), log_level); + + // Create the external logger, worker logger and the internal (default) logger from the same log + // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message + // was logged to. + spdlog::set_default_logger( + std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); + spdlog::set_level(min_log_level); + spdlog::flush_on(min_log_level); +} // Sets up default logging typically used for tests/benchmarks. Also used for default // initialization if the logging hasn't been initialized before the first logging line. -void SetupDefaultLogging(); +void SetupDefaultLogging() { + SetupLogging({}, spdlog::level::warn); +} template inline void log( diff --git a/phtree/benchmark/query_benchmark.cc b/benchmark/query_benchmark.cc similarity index 62% rename from phtree/benchmark/query_benchmark.cc rename to benchmark/query_benchmark.cc index b0f50f39..5fa89813 100644 --- a/phtree/benchmark/query_benchmark.cc +++ b/benchmark/query_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -33,27 +33,22 @@ const int GLOBAL_MAX = 10000; template class IndexBenchmark { public: - IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size_); + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, PhBox& query); - void CreateQuery(PhBox& query); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + constexpr int query_edge_length() { + return ( + int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; PhTree tree_; @@ -63,17 +58,14 @@ class IndexBenchmark { }; template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size) -: data_type_{data_type} -, num_entities_(num_entities) +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , avg_query_result_size_(avg_query_result_size) +, tree_{} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities) { +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -94,15 +86,13 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } - state.counters["total_result_count"] = benchmark::Counter(0); state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - logging::info("World setup complete."); } @@ -113,7 +103,6 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, PhBox& query_ ++n; } - state.counters["total_result_count"] += n; state.counters["query_rate"] += 1; state.counters["result_rate"] += n; state.counters["avg_result_count"] += n; @@ -121,12 +110,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, PhBox& query_ template void IndexBenchmark::CreateQuery(PhBox& query_box) { - int length = query_endge_length(); - // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + scalar_64_t s = (scalar_64_t)(shift * cube_distribution_(random_engine_)); query_box.min()[d] = s; query_box.max()[d] = s + length; } @@ -141,31 +129,9 @@ void PhTree3D(benchmark::State& state, Arguments&&... arguments) { } // index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_box_d_benchmark.cc b/benchmark/query_box_d_benchmark.cc similarity index 61% rename from phtree/benchmark/query_box_d_benchmark.cc rename to benchmark/query_box_d_benchmark.cc index ecd736a8..e88bf00f 100644 --- a/phtree/benchmark/query_box_d_benchmark.cc +++ b/benchmark/query_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -45,27 +45,22 @@ using TreeType = PhTreeBoxD; template class IndexBenchmark { public: - IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size_ = 100); + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - void QueryWorld(benchmark::State& state, BoxType& query_box); - void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + constexpr int query_edge_length() { + return ( + int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -76,16 +71,14 @@ class IndexBenchmark { template IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - double avg_query_result_size) -: data_type_{data_type} -, num_entities_(num_entities) + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , avg_query_result_size_(avg_query_result_size) +, tree_{} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, boxes_(num_entities) { +, boxes_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -106,15 +99,13 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(boxes_[i], (int)i); } - state.counters["total_result_count"] = benchmark::Counter(0); state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - logging::info("World setup complete."); } @@ -145,7 +136,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); @@ -155,7 +146,6 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp break; } - state.counters["total_result_count"] += n; state.counters["query_rate"] += 1; state.counters["result_rate"] += n; state.counters["avg_result_count"] += n; @@ -163,12 +153,12 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); + int length = query_edge_length(); // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + auto s = shift * cube_distribution_(random_engine_); + s = s * shift; query_box.min()[d] = s; query_box.max()[d] = s + length; } @@ -189,59 +179,16 @@ void PhTree3D_MMFE(benchmark::State& state, Arguments&&... arguments) { } // index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, query_result_size -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +// PhTree +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +// PhTree +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/benchmark/query_d_benchmark.cc b/benchmark/query_d_benchmark.cc new file mode 100644 index 00000000..2c6ad3ec --- /dev/null +++ b/benchmark/query_d_benchmark.cc @@ -0,0 +1,190 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, BoxType& query_box); + void CreateQuery(BoxType& query_box); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr int query_edge_length() { + return ( + int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); + }; + + TreeType tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BoxType query_box; + CreateQuery(query_box); + state.ResumeTiming(); + + QueryWorld(state, query_box); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +struct Counter { + void operator()(PointType, T&) { + ++n_; + } + + size_t n_ = 0; +}; + +template +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { + ++n; + } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + size_t n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + auto s = shift * cube_distribution_(random_engine_); + query_box.min()[d] = s; + query_box.max()[d] = s + length; + } +} + +} // namespace + +template +void PhTree3D_MMI(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_ITER> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D_MMFE(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, MIN_MAX_FOR_EACH> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree3D_MMFE, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D_MMI, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_mm_box_d_benchmark.cc b/benchmark/query_mm_box_d_benchmark.cc similarity index 92% rename from phtree/benchmark/query_mm_box_d_benchmark.cc rename to benchmark/query_mm_box_d_benchmark.cc index 538e73d9..51dd6c72 100644 --- a/phtree/benchmark/query_mm_box_d_benchmark.cc +++ b/benchmark/query_mm_box_d_benchmark.cc @@ -68,7 +68,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -144,7 +144,7 @@ struct CounterMultiMap { }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { CounterTreeWithMap counter{query.box_, 0}; tree.for_each(query.box_, counter); @@ -152,7 +152,7 @@ typename std::enable_if::type CountEnt } template -int CountEntries(TestMap& tree, const Query& query) { +size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{query.box_, 0}; tree.for_each(query.box_, counter); return counter.n_; @@ -175,7 +175,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; @@ -184,11 +184,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box_.min()[d] = x - radius; - query.box_.max()[d] = x + radius; + auto x = shift * cube_distribution_(random_engine_); + query.box_.min()[d] = x; + query.box_.max()[d] = x + length; } } diff --git a/phtree/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc similarity index 73% rename from phtree/benchmark/query_mm_d_benchmark.cc rename to benchmark/query_mm_d_benchmark.cc index 9e819450..ab0b4054 100644 --- a/phtree/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -32,7 +32,7 @@ namespace { const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD }; using TestPoint = PhPointD<3>; using QueryBox = PhBoxD<3>; @@ -41,8 +41,6 @@ using BucketType = std::set; struct Query { QueryBox box{}; - TestPoint center{}; - double radius{}; }; template @@ -52,7 +50,10 @@ template using TestMap = typename std::conditional_t< SCENARIO == TREE_WITH_MAP, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MULTI_MAP, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::unordered_set>>>; template class IndexBenchmark { @@ -70,7 +71,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -88,7 +89,7 @@ IndexBenchmark::IndexBenchmark(benchmark::State& state, double av , tree_{} , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities_) { +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -120,46 +121,49 @@ void InsertEntry( tree.emplace(point, data); } -bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { - const auto& point = entity; - double dx = center[0] - point[0]; - double dy = center[1] - point[1]; - double dz = center[2] - point[2]; - return dx * dx + dy * dy + dz * dz <= radius * radius; +template +void InsertEntry( + TestMap& tree, + const PhPointD& point, + const payload_t& data) { + tree.emplace(point, data); } struct CounterTreeWithMap { void operator()(const PhPointD<3>&, const BucketType& value) { for (auto& x : value) { - // n_ += (x.entity_id_ >= 0); - n_ += CheckPosition(x, center_, radius_); + (void)x; + ++n_; } } - const TestPoint& center_; - double radius_; size_t n_; }; struct CounterMultiMap { - void operator()(const PhPointD<3>&, const payload_t& value) { - n_ += CheckPosition(value, center_, radius_); + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; } - const TestPoint& center_; - double radius_; size_t n_; }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { - CounterTreeWithMap counter{query.center, query.radius, 0}; + CounterTreeWithMap counter{0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +size_t CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } template -int CountEntries(TestMap& tree, const Query& query) { - CounterMultiMap counter{query.center, query.radius, 0}; +size_t CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } @@ -181,7 +185,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; @@ -190,39 +194,51 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box.min()[d] = x - radius; - query.box.max()[d] = x + radius; - query.center[d] = x; + auto x = shift * cube_distribution_(random_engine_); + query.box.min()[d] = x; + query.box.max()[d] = x + length; } - query.radius = radius; } } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP_STD> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, avg_query_result_size -// PhTree -BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) +BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTree with manual BUCKET management +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/benchmark/query_mm_d_filter_benchmark.cc b/benchmark/query_mm_d_filter_benchmark.cc new file mode 100644 index 00000000..28939d08 --- /dev/null +++ b/benchmark/query_mm_d_filter_benchmark.cc @@ -0,0 +1,352 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for querying entries in multi-map implementations. + * This benchmarks uses a SPHERE shaped query! + */ +namespace { + +const double GLOBAL_MAX = 10000; + +enum Scenario { SPHERE_WQ, SPHERE, WQ, SPHERE_IT_WQ, LEGACY_WQ }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = TestPoint; +using BucketType = std::set; + +struct Query { + QueryBox box{}; + TestPoint center{}; + double radius{}; +}; + +template +using CONVERTER = ConverterIEEE; + +template +using DistanceFn = DistanceEuclidean; + +template +using TestMap = PhTreeMultiMapD>; + +template < + typename CONVERTER = ConverterIEEE<3>, + typename DISTANCE = DistanceEuclidean> +class FilterSphereLegacy { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using ScalarExternal = typename CONVERTER::ScalarExternal; + + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + FilterSphereLegacy( + const KeyExternal& center, + const ScalarExternal& radius, + CONVERTER converter = CONVERTER(), + DISTANCE distance_function = DISTANCE()) + : center_external_{center} + , center_internal_{converter.pre(center)} + , radius_{radius} + , converter_{converter} + , distance_function_{distance_function} {}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal&, const BucketT&) const { + // We simulate a legacy filter by returning 'true' for all buckets + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const KeyInternal& key, const T&) const { + KeyExternal point = converter_.post(key); + return distance_function_(center_external_, point) <= radius_; + } + + /* + * Calculate whether AABB encompassing all possible points in the node intersects with the + * sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + KeyInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + KeyExternal closest_point = converter_.post(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + const KeyExternal center_external_; + const KeyInternal center_internal_; + const ScalarExternal radius_; + const CONVERTER converter_; + const DISTANCE distance_function_; +}; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_edge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry(TestMap& tree, const PhPointD& point, const payload_t& data) { + tree.emplace(point, data); +} + +bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { + const auto& point = entity; + double dx = center[0] - point[0]; + double dy = center[1] - point[1]; + double dz = center[2] - point[2]; + return dx * dx + dy * dy + dz * dz <= radius * radius; +} + +struct CounterCheckPosition { + template + void operator()(const PhPointD<3>& p, const T&) { + n_ += CheckPosition(p, center_, radius_); + } + const TestPoint& center_; + double radius_; + size_t n_; +}; + +struct Counter { + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; + } + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterCheckPosition counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + for (auto it = tree.begin_query(query.box, filter); it != tree.end(); ++it) { + ++counter.n_; + } + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + // Legacy: use non-multi-map filter + FilterSphereLegacy filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.8); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], points_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + size_t n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_edge_length() * 0.5; + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)radius * 2) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + auto x = shift * cube_distribution_(random_engine_); + query.box.min()[d] = x - radius; + query.box.max()[d] = x + radius; + query.center[d] = x; + } + query.radius = radius; +} + +} // namespace + +template +void PhTree3DSphereWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphere(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphereITWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_IT_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DLegacyWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::LEGACY_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +BENCHMARK_CAPTURE(PhTree3DSphereWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphere, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphereITWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DLegacyWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_box_d_benchmark.cc b/benchmark/update_box_d_benchmark.cc similarity index 62% rename from phtree/benchmark/update_box_d_benchmark.cc rename to benchmark/update_box_d_benchmark.cc index ab825e26..918bbc4b 100644 --- a/phtree/benchmark/update_box_d_benchmark.cc +++ b/benchmark/update_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -24,12 +24,14 @@ using namespace improbable::phtree::phbenchmark; namespace { -constexpr int UPDATES_PER_ROUND = 1000; +constexpr size_t UPDATES_PER_ROUND = 1000; constexpr double MOVE_DISTANCE = 10; const double GLOBAL_MAX = 10000; const double BOX_LEN = 10; +enum UpdateType { RELOCATE, ERASE_BY_KEY }; + template using BoxType = PhBoxD; @@ -46,14 +48,12 @@ struct UpdateOp { /* * Benchmark for updating the position of entries. */ -template +template class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, double move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -75,27 +75,23 @@ class IndexBenchmark { std::uniform_int_distribution<> entity_id_distribution_; }; -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - double move_distance) -: data_type_{data_type} -, num_entities_(num_entities) +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, size_t updates_per_round, double move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(move_distance) -, boxes_(num_entities) +, boxes_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); BuildUpdates(); @@ -105,12 +101,12 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); for (size_t i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + tree_.emplace(boxes_[i], (int)i); } state.counters["total_upd_count"] = benchmark::Counter(0); @@ -118,8 +114,8 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("World setup complete."); } -template -void IndexBenchmark::BuildUpdates() { +template +void IndexBenchmark::BuildUpdates() { for (auto& update : updates_) { int box_id = entity_id_distribution_(random_engine_); update.id_ = box_id; @@ -134,14 +130,37 @@ void IndexBenchmark::BuildUpdates() { } template -void IndexBenchmark::UpdateWorld(benchmark::State& state) { - size_t initial_tree_size = tree_.size(); +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { size_t n = 0; - for (auto& update : updates_) { - size_t result_erase = tree_.erase(update.old_); - auto result_emplace = tree_.emplace(update.new_, update.id_); + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + +template +size_t UpdateByKey(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + size_t result_erase = tree.erase(update.old_); + auto result_emplace = tree.emplace(update.new_, update.id_); n += result_erase == 1 && result_emplace.second; } + return n; +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { + size_t initial_tree_size = tree_.size(); + size_t n = 0; + switch (UPDATE_TYPE) { + case UpdateType::ERASE_BY_KEY: + n = UpdateByKey(tree_, updates_); + break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; + } if (n != updates_.size()) { logging::error("Invalid update count: {}/{}", updates_.size(), n); @@ -159,37 +178,29 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; + IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_d_benchmark.cc b/benchmark/update_d_benchmark.cc similarity index 66% rename from phtree/benchmark/update_d_benchmark.cc rename to benchmark/update_d_benchmark.cc index f358c564..c790c6a9 100644 --- a/phtree/benchmark/update_d_benchmark.cc +++ b/benchmark/update_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -29,7 +29,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum UpdateType { ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; +enum UpdateType { RELOCATE, ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; template using PointType = PhPointD; @@ -52,9 +52,7 @@ class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, std::vector move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -78,19 +76,15 @@ class IndexBenchmark { template IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - std::vector move_distance) -: data_type_{data_type} -, num_entities_(num_entities) + benchmark::State& state, size_t updates_per_round, std::vector move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(std::move(move_distance)) -, points_(num_entities) +, points_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -136,6 +130,15 @@ void IndexBenchmark::BuildUpdates() { } } +template +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + template size_t UpdateByKey(TreeType& tree, std::vector>& updates) { size_t n = 0; @@ -190,6 +193,9 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { case UpdateType::EMPLACE_HINT: n = UpdateByIterHint(tree_, updates_); break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; } if (n != updates_.size()) { @@ -208,6 +214,12 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTreeEraseKey3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; @@ -227,83 +239,28 @@ void PhTreeEmplaceHint3D(benchmark::State& state, Arguments&&... arguments) { } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) +// PhTree with erase()/emplace +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with erase(iter) +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) +// PhTree with emplace_hint() +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_mm_box_d_benchmark.cc b/benchmark/update_mm_box_d_benchmark.cc similarity index 80% rename from phtree/benchmark/update_mm_box_d_benchmark.cc rename to benchmark/update_mm_box_d_benchmark.cc index 13f58b5e..271637ba 100644 --- a/phtree/benchmark/update_mm_box_d_benchmark.cc +++ b/benchmark/update_mm_box_d_benchmark.cc @@ -35,7 +35,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; const double BOX_LEN = 100; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE }; using payload_t = scalar_64_t; @@ -46,9 +46,16 @@ using CONVERTER = ConverterBoxIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeBoxD>, - PhTreeMultiMapBoxD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapBoxD< + DIM, + payload_t, + CONVERTER, + b_plus_tree_hash_set>, + PhTreeMultiMapBoxD, std::set>>>; template struct UpdateOp { @@ -112,19 +119,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -151,7 +164,7 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -202,7 +215,7 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { logging::error("Invalid update count: {}/{}", updates_.size(), n); } - if constexpr (SCENARIO == MULTI_MAP) { + if constexpr (SCENARIO == MM_BPT_RELOCATE) { (void)initial_tree_size; if (tree_.size() != num_entities_) { logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); @@ -222,26 +235,38 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMultiMapBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapStdBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance // PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeMultiMapBox3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::map +BENCHMARK_CAPTURE(PhTreeMultiMapStdBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/phtree/benchmark/update_mm_d_benchmark.cc b/benchmark/update_mm_d_benchmark.cc similarity index 71% rename from phtree/benchmark/update_mm_d_benchmark.cc rename to benchmark/update_mm_d_benchmark.cc index f3149403..6c5cfa57 100644 --- a/phtree/benchmark/update_mm_d_benchmark.cc +++ b/benchmark/update_mm_d_benchmark.cc @@ -34,7 +34,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE, MM_SET_RELOCATE_IF }; using payload_t = scalar_64_t; @@ -48,9 +48,12 @@ using CONVERTER = ConverterIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::set>>>; template struct UpdateOp { @@ -114,19 +117,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -138,8 +147,6 @@ typename std::enable_if::type Updat continue; } - // TODO implement erase_hint or find_hint or something? - // Entry is already inserted, now remove old entry. auto iter_old_bucket = tree.find(update.old_); assert(iter_old_bucket != tree.end()); @@ -153,8 +160,10 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( - TestMap& tree, std::vector>& updates) { +typename std::enable_if< + SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE, + size_t>::type +UpdateEntry(TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { n += tree.relocate(update.old_, update.new_, update.id_); @@ -162,6 +171,17 @@ typename std::enable_if::type UpdateEnt return n; } +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate_if( + update.old_, update.new_, [&update](const payload_t& v) { return v == update.id_; }); + } + return n; +} + template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); @@ -215,26 +235,50 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateIfStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE_IF> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMRelocateStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMEraseEmplace3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMMRelocateIfStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -// PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap with b_plus_tree_hash_map +BENCHMARK_CAPTURE(PhTreeMMRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::set +BENCHMARK_CAPTURE(PhTreeMMRelocateStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTree (manual bucket handling) +BENCHMARK_CAPTURE(PhTreeMMEraseEmplace3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/ci/includes/bazel.sh b/ci/includes/bazel.sh deleted file mode 100755 index 79a70e5d..00000000 --- a/ci/includes/bazel.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -source ci/includes/os.sh - -# Main function that should be used by scripts sourcing this file. -function runBazel() { - BAZEL_SUBCOMMAND="$1" - shift - "$(pwd)/tools/bazel" "$BAZEL_SUBCOMMAND" ${BAZEL_CI_CONFIG:-} "$@" -} - -function getBazelVersion() { - echo "4.2.2" -} diff --git a/ci/linting/buildifier.sh b/ci/linting/buildifier.sh index 1be7b1c3..1344e2e3 100755 --- a/ci/linting/buildifier.sh +++ b/ci/linting/buildifier.sh @@ -4,7 +4,6 @@ set -x -e -u -o pipefail cd "$(dirname "$0")/../../" -source ci/includes/bazel.sh source ci/includes/os.sh MAYBEARG='-mode=check' @@ -16,9 +15,9 @@ if [ $# -eq 1 ]; then fi # Ensure Bazel is installed. -runBazel version +bazel version -if runBazel run buildifier -- ${MAYBEARG} -v $(find "$(pwd)/" \( -name BUILD -o -name WORKSPACE \) -type f); then +if bazel run buildifier -- ${MAYBEARG} -v $(find "$(pwd)/" \( -name BUILD -o -name WORKSPACE \) -type f); then echo -e "\033[0;32mAll BUILD and WORKSPACE files passed buildifier linting check.\033[0m" else echo -e "\033[0;31mThe above listed BUILD and WORKSPACE file(s) didn't pass the buildifier linting check!\033[0m" diff --git a/ci/linting/clang-format.sh b/ci/linting/clang-format.sh index cebf4a22..551151be 100755 --- a/ci/linting/clang-format.sh +++ b/ci/linting/clang-format.sh @@ -3,7 +3,6 @@ set -e -u -o pipefail source ci/includes/os.sh -source ci/includes/bazel.sh TARGETS="//..." EXCLUDED_TARGETS="" @@ -66,22 +65,22 @@ function generateAqueryTargetString() { function bazelLintTest() { # Use bazel to create patch files for all eligible source files. # Fail if any of the patch files are non-empty (i.e. lint was detected). - CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_test -- $(generateBuildTargetString) + CLANG_FORMAT="$(clangFormatLocation)" bazel build --config lint --output_groups=clang_format_test -- $(generateBuildTargetString) } function bazelLintFix() { # Use bazel to create patch files for all eligible source files. - CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_patches_only -- $(generateBuildTargetString) + CLANG_FORMAT="$(clangFormatLocation)" bazel build --config lint --output_groups=clang_format_patches_only -- $(generateBuildTargetString) # Find bazel-bin prefix. - BAZEL_BIN=$(runBazel info bazel-bin) + BAZEL_BIN=$(bazel info bazel-bin) # I.e. on Linux, this is `bazel-out/k8-gcc-opt/bin`. - PREFIX=${BAZEL_BIN#$(runBazel info execution_root)/} + PREFIX=${BAZEL_BIN#$(bazel info execution_root)/} # Use aquery to get the list of output files of the `CreatePatch` action, # Then strip the patch path down to that of its source file, and apply # the patch file generated by Bazel to the original source file. - CLANG_FORMAT="$(clangFormatLocation)" runBazel aquery --config lint --include_aspects --output_groups clang_format_patches_only "mnemonic(\"CreatePatch\", $(generateAqueryTargetString))" --output textproto \ + CLANG_FORMAT="$(clangFormatLocation)" bazel aquery --config lint --include_aspects --output_groups clang_format_patches_only "mnemonic(\"CreatePatch\", $(generateAqueryTargetString))" --output textproto \ `# Get relative paths to source files` \ `# perl used instead of grep --perl-regexp since grep macOS doesnt support it` \ | perl -ne "while(/(?<=exec_path: \"${PREFIX//\//\\/}\/).*\.patch_.+(?=\")/g){print \"\$&\n\";}" \ diff --git a/cmake/phtreeConfig.cmake.in b/cmake/phtreeConfig.cmake.in new file mode 100644 index 00000000..9c15f36a --- /dev/null +++ b/cmake/phtreeConfig.cmake.in @@ -0,0 +1,4 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/examples/BUILD b/examples/BUILD index 56f61fe1..376c48d4 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -7,6 +7,6 @@ cc_binary( "//visibility:public", ], deps = [ - "//phtree", + "//:phtree", ], ) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 370887f6..ce8a6792 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,5 @@ cmake_minimum_required(VERSION 3.14) -project(Example) +project(phtree-examples) -set(SOURCE_FILES example.cc) -add_executable(Example ${SOURCE_FILES}) +add_executable(Example example.cc) target_link_libraries(Example phtree) diff --git a/examples/example.cc b/examples/example.cc index b0ceb5e9..aecbb049 100644 --- a/examples/example.cc +++ b/examples/example.cc @@ -14,11 +14,48 @@ * limitations under the License. */ -#include "../phtree/phtree.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include #include +#include using namespace improbable::phtree; +int relocate_example() { + //auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::unordered_set>(); + auto tree = PhTreeMultiMapD<2, int, ConverterMultiply<2, 1, 200>, std::unordered_set>(); + std::vector> vecPos; + int dim = 1000; + + int num = 30000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + long T = 0; + int nT = 0; + while (true) { + auto t1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i, false); + p = newp; + } + auto t2 = std::chrono::high_resolution_clock::now(); + auto s = std::chrono::duration_cast(t2 - t1); + ++nT; + T += (long)s.count() / 1000; + std::cout << s.count() << " " << (T / nT) + << " msec/num= " << (s.count() / (double)num) << std::endl; + } + + return 0; +} + int main() { std::cout << "PH-Tree example with 3D `double` coordinates." << std::endl; PhPointD<3> p1({1, 1, 1}); @@ -55,4 +92,8 @@ int main() { std::cout << "ID at " << p4b << ": " << tree.find(p4b).second() << std::endl; std::cout << "Done." << std::endl; -} \ No newline at end of file + + //relocate_example(); + + return 0; +} diff --git a/fuzzer/BUILD b/fuzzer/BUILD new file mode 100644 index 00000000..e4a7f162 --- /dev/null +++ b/fuzzer/BUILD @@ -0,0 +1,12 @@ +package(default_visibility = ["//visibility:private"]) + +#cc_binary( +# name = "b_plus_multimap_fuzzer", +# srcs = [ +# "b_plus_multimap_fuzzer.cpp", +# ], +# linkstatic = True, +# deps = [ +# "//:phtree", +# ], +#) diff --git a/fuzzer/README.md b/fuzzer/README.md new file mode 100644 index 00000000..482ef200 --- /dev/null +++ b/fuzzer/README.md @@ -0,0 +1,33 @@ +# Fuzzing + + +Requirements: + * `clang`. + * libFuzzer: https://github.com/google/fuzzing/blob/master/tutorial/libFuzzerTutorial.md + +Compile one of: +* `clang++ -g -std=c++17 -fsanitize=fuzzer fuzzer/b_plus_multimap_fuzzer.cc -I.` +* `clang++ -g -std=c++17 -fsanitize=fuzzer fuzzer/b_plus_map_fuzzer.cc -I.` +* `clang++ -g -std=c++17 -fsanitize=fuzzer fuzzer/b_plus_hash_map_fuzzer.cc -I.` + +Execute: +* `./a.out` +* `./a.out -minimize_crash=1 -runs=10000 /tmp/tmp.b521097a4f49` +* `./a.out /tmp/tmp.12345678/artifacts/minimized-from-185ecf42f208c2a7736a98ba0403f31868bcb681` + +To give an artifact path: +* `-artifact_prefix=/home/my-name/tmp/fuzz/artifacts/` + +## Bazel + +Fuzzing with bazel is possible but is currently disabled because it breaks `bazel build ...`. + +* We would need to set `clang`/`clang++` as compiler (`gcc` would not work anymore) +* We would need to solve the problem that `bazel build ...` fails unless `-fsanitize=fuzzer` is set + +### Using a simple executable +Uncomment build rules in BUILD file, then: +`CC=clang bazel run //fuzzer:b_plus_multimap_fuzzer --config=fuzz` + +### Using the bazel cc_fuzz_test +https://github.com/bazelbuild/rules_fuzzing/blob/master/docs/guide.md diff --git a/fuzzer/b_plus_hash_map_fuzzer.cc b/fuzzer/b_plus_hash_map_fuzzer.cc new file mode 100644 index 00000000..5a1ea2b9 --- /dev/null +++ b/fuzzer/b_plus_hash_map_fuzzer.cc @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/common/b_plus_tree_hash_map.h" + +static volatile int Sink; + +using Instruction = std::uint8_t; +using Key = std::uint8_t; +using Value = std::uint8_t; + +constexpr bool PRINT = !true; + +void print() {} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + if (PRINT) { + std::cout << "TEST(PhTreeBptMapTest, FuzzTest1) {" << std::endl; + std::cout << " using Key = std::uint8_t;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " b_plus_tree_map tree{};" << std::endl; + } + + auto scopeguard = []() { std::cout << "};" << std::endl; }; + + improbable::phtree::b_plus_tree_hash_map tree; + std::map map; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 4; + Key key = Data[pos++]; + Value value = Data[pos++]; + switch (inst) { + case 0: { + if (PRINT) + std::cout << " tree.emplace(" << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace(key, value); + map.emplace(key, value); + break; + } + case 1: { + if (PRINT) + std::cout << " tree.erase(" << (int)key << ");" << std::endl; + tree.erase(key); + map.erase(key); + break; + } + case 2: { + if (PRINT) + std::cout << " auto it = tree.find(" << (int)key << ");" << std::endl; + auto it = tree.find(key); + if (it != tree.end()) { + if (PRINT) + std::cout << " tree.erase(it);" << std::endl; + tree.erase(it); + } + auto it2 = map.find(key); + if (it2 != map.end()) { + map.erase(it2); + } + break; + } + case 3: { + if (PRINT) + std::cout << " auto it = tree.lower_bound(" << (int)key << ");" << std::endl; + auto it = tree.lower_bound(key); + if (PRINT) + std::cout << " tree.emplace_hint(it, " << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace_hint(it, key, value); + auto it2 = map.lower_bound(key); + map.emplace_hint(it2, key, value); + break; + } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + } + } + + tree._check(); + + for (auto& entry : map) { + const Key& vRef = entry.first; + Key vMap = tree.find(vRef)->first; + assert(vMap == vRef); + } + for (auto& entry : tree) { + Key v = entry.first; + const Key& vRef = map.find(v)->first; + Key vMap = tree.find(v)->first; + assert(vMap == vRef); + } + assert(tree.size() == map.size()); + + return 0; +} diff --git a/fuzzer/b_plus_map_fuzzer.cc b/fuzzer/b_plus_map_fuzzer.cc new file mode 100644 index 00000000..eca26a8f --- /dev/null +++ b/fuzzer/b_plus_map_fuzzer.cc @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/common/b_plus_tree_map.h" + +static volatile int Sink; + +using Instruction = std::uint8_t; +using Key = std::uint8_t; +using Value = std::uint8_t; + +constexpr bool PRINT = !true; + +void print() {} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + if (PRINT) { + std::cout << "TEST(PhTreeBptMapTest, FuzzTest1) {" << std::endl; + std::cout << " using Key = std::uint8_t;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " b_plus_tree_map tree{};" << std::endl; + } + + auto scopeguard = []() { std::cout << "};" << std::endl; }; + + improbable::phtree::b_plus_tree_map tree; + std::map map; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 4; + Key key = Data[pos++]; + Value value = Data[pos++]; + switch (inst) { + case 0: { + if (PRINT) + std::cout << " tree.emplace(" << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace(key, value); + map.emplace(key, value); + break; + } + case 1: { + if (PRINT) + std::cout << " tree.erase(" << (int)key << ");" << std::endl; + tree.erase(key); + map.erase(key); + break; + } + case 2: { + if (PRINT) + std::cout << " auto it = tree.find(" << (int)key << ");" << std::endl; + auto it = tree.find(key); + if (it != tree.end()) { + if (PRINT) + std::cout << " tree.erase(it);" << std::endl; + tree.erase(it); + } + auto it2 = map.find(key); + if (it2 != map.end()) { + map.erase(it2); + } + break; + } + case 3: { + if (PRINT) + std::cout << " auto it = tree.lower_bound(" << (int)key << ");" << std::endl; + auto it = tree.lower_bound(key); + if (PRINT) + std::cout << " tree.emplace_hint(it, " << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace_hint(it, key, value); + auto it2 = map.lower_bound(key); + map.emplace_hint(it2, key, value); + break; + } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + } + } + + tree._check(); + + for (auto& entry : map) { + const Key& vRef = entry.first; + Key vMap = tree.find(vRef)->first; + assert(vMap == vRef); + } + for (auto& entry : tree) { + Key v = entry.first; + const Key& vRef = map.find(v)->first; + Key vMap = tree.find(v)->first; + assert(vMap == vRef); + } + assert(tree.size() == map.size()); + + return 0; +} diff --git a/fuzzer/b_plus_multimap_fuzzer.cc b/fuzzer/b_plus_multimap_fuzzer.cc new file mode 100644 index 00000000..0a8e1859 --- /dev/null +++ b/fuzzer/b_plus_multimap_fuzzer.cc @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/common/b_plus_tree_multimap.h" + +static volatile int Sink; + +using Instruction = std::uint8_t; +using Key = std::uint8_t; +using Value = std::uint8_t; + +constexpr bool PRINT = !true; + +void print() {} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + if (PRINT) { + std::cout << "TEST(PhTreeBptMulitmapTest, FuzzTest1) {" << std::endl; + std::cout << " using Key = std::uint8_t;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " b_plus_tree_multimap tree{};" << std::endl; + } + + auto scopeguard = []() { std::cout << "};" << std::endl; }; + + improbable::phtree::b_plus_tree_multimap tree; + std::multimap map; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 4; + Key key = Data[pos++]; + Value value = Data[pos++]; + switch (inst) { + case 0: { + if (PRINT) + std::cout << " tree.emplace(" << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace(key, value); + map.emplace(key, value); + break; + } + case 1: { + if (PRINT) + std::cout << " tree.erase(" << (int)key << ");" << std::endl; + tree.erase(key); + map.erase(key); + break; + } + case 2: { + if (PRINT) + std::cout << " auto it = tree.find(" << (int)key << ");" << std::endl; + auto it = tree.find(key); + if (it != tree.end()) { + if (PRINT) + std::cout << " tree.erase(it);" << std::endl; + tree.erase(it); + } + auto it2 = map.find(key); + if (it2 != map.end()) { + map.erase(it2); + } + break; + } + case 3: { + if (PRINT) + std::cout << " auto it = tree.lower_bound(" << (int)key << ");" << std::endl; + auto it = tree.lower_bound(key); + if (PRINT) + std::cout << " tree.emplace_hint(it, " << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace_hint(it, key, value); + auto it2 = map.lower_bound(key); + map.emplace_hint(it2, key, value); + break; + } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + } + } + + tree._check(); + + for (auto& entry : map) { + const Key& vRef = entry.first; + Key vMap = tree.find(vRef)->first; + assert(vMap == vRef); + } + for (auto& entry : tree) { + Key v = entry.first; + const Key& vRef = map.find(v)->first; + Key vMap = tree.find(v)->first; + assert(vMap == vRef); + } + assert(tree.size() == map.size()); + + return 0; +} diff --git a/fuzzer/phtree_mm_relocate_fuzzer.cc b/fuzzer/phtree_mm_relocate_fuzzer.cc new file mode 100644 index 00000000..b49cfc17 --- /dev/null +++ b/fuzzer/phtree_mm_relocate_fuzzer.cc @@ -0,0 +1,129 @@ +/* +* Copyright 2023 Tilmann Zäschke +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/phtree_multimap.h" + +// clang++ -g -std=c++17 -fsanitize=address,fuzzer fuzzer/phtree_mm_relocate_fuzzer.cc -I. -I./include + + +using namespace improbable::phtree; + + +constexpr bool PRINT = !true; + +void print() {} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + const dimension_t DIM = 1; + + if (PRINT) { + std::cout << "TEST(PhTreeMMTest, FuzzTest1) {" << std::endl; + std::cout << " const dimension_t DIM = 1;" << std::endl; + std::cout << " using Key = PhPoint;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " PhTreeMultiMap> tree{};" << std::endl; + } + + using Instruction = std::uint8_t; + using Key = PhPoint<1>; + using Value = std::uint8_t; + + PhTreeMultiMap> tree; + std::multimap map; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 2; + Key key{Data[pos++]}; + Key key2{Data[pos++]}; + Value value = Data[pos++]; + switch (inst) { + case 0: { + if (PRINT) + std::cout << " tree.emplace({" << key[0] << "}, " << (int)value << ");" + << std::endl; + tree.emplace({key[0]}, value); +// map.emplace(key, value); + break; + } + case 1: { + if (PRINT) + std::cout << " tree.relocate({" << key[0] << "}, {" << key2[0] << "}, " << (int)value << ");" << std::endl; +// tree.erase(key); +// map.erase(key); + tree.relocate({key[0]}, {key2[0]}, value); + break; + } +// case 2: { +// if (PRINT) +// std::cout << " auto it = tree.find(" << (int)key << ");" << std::endl; +// auto it = tree.find(key); +// if (it != tree.end()) { +// if (PRINT) +// std::cout << " tree.erase(it);" << std::endl; +// tree.erase(it); +// } +// auto it2 = map.find(key); +// if (it2 != map.end()) { +// map.erase(it2); +// } +// break; +// } +// case 3: { +// if (PRINT) +// std::cout << " auto it = tree.lower_bound(" << (int)key << ");" << std::endl; +// auto it = tree.lower_bound(key); +// if (PRINT) +// std::cout << " tree.emplace_hint(it, " << (int)key << ", " << (int)value << ");" +// << std::endl; +// tree.emplace_hint(it, key, value); +// auto it2 = map.lower_bound(key); +// map.emplace_hint(it2, key, value); +// break; +// } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + } + } + + //tree._check(); + +// for (auto& entry : map) { +// const Key& vRef = entry.first; +// Key vMap = tree.find(vRef)->first; +// assert(vMap == vRef); +// } +// for (auto& entry : tree) { +// Key v = entry.first; +// const Key& vRef = map.find(v)->first; +// Key vMap = tree.find(v)->first; +// assert(vMap == vRef); +// } +// assert(tree.size() == map.size()); + + return 0; +} diff --git a/include/phtree/common/BUILD b/include/phtree/common/BUILD new file mode 100644 index 00000000..f7b56df4 --- /dev/null +++ b/include/phtree/common/BUILD @@ -0,0 +1,23 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "common", + hdrs = [ + "b_plus_tree_base.h", + "b_plus_tree_hash_map.h", + "b_plus_tree_map.h", + "b_plus_tree_multimap.h", + "base_types.h", + "bits.h", + "common.h", + "debug_helper.h", + "flat_array_map.h", + "flat_sparse_map.h", + "tree_stats.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + ], +) diff --git a/phtree/common/README.md b/include/phtree/common/README.md similarity index 100% rename from phtree/common/README.md rename to include/phtree/common/README.md diff --git a/include/phtree/common/b_plus_tree_base.h b/include/phtree/common/b_plus_tree_base.h new file mode 100644 index 00000000..ee97ca84 --- /dev/null +++ b/include/phtree/common/b_plus_tree_base.h @@ -0,0 +1,531 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_BASE_H +#define PHTREE_COMMON_B_PLUS_TREE_BASE_H + +#include "bits.h" +#include +#include +#include + +namespace phtree::bptree::detail { + +template +struct bpt_config { + static constexpr size_t MAX = MAX_; + static constexpr size_t MIN = MIN_; + static constexpr size_t INIT = INIT_; +}; + +template +class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] constexpr bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] constexpr NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] constexpr NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, KeyT&, KeyT) = 0; + + private: + const bool is_leaf_; + + public: + NInnerT* parent_; +}; + +template < + typename KeyT, + typename NInnerT, + typename NLeafT, + typename ThisT, + typename EntryT, + typename IterT, + typename CFG = bpt_config<16, 2, 2>> +class bpt_node_data : public bpt_node_base { + // TODO This could be improved but requires a code change to move > 1 entry when merging. + static_assert(CFG::MIN == 2 && "M_MIN != 2 is not supported"); + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + public: + // MSVC++ requires this to be public, otherwise there it clashes with sub-classes' NodeT!?!?! + using NodeT = bpt_node_base; + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent) + , data_{} + , prev_node_{prev} + , next_node_{next} { + data_.reserve(CFG::INIT); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] auto lower_bound(KeyT key) noexcept { + return std::lower_bound(data_.begin(), data_.end(), key, [](EntryT& left, const KeyT key) { + return left.first < key; + }); + } + + [[nodiscard]] auto lower_bound_as_iter(KeyT key) noexcept { + auto it = lower_bound(key); + return it == data_.end() ? IterT{} : IterT(this->as_leaf(), std::move(it)); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + struct EraseResult { + bpt_node_data* node_ = nullptr; + DataIteratorT iter_; + }; + + auto erase_entry(const DataIteratorT& it_to_erase, NodeT*& root) { + auto max_key = data_.back().first; + auto it_after_erased = data_.erase(it_to_erase); + return check_merge(it_after_erased, max_key, root); + } + + auto check_merge(const DataIteratorT& iter_after_erased, KeyT max_key_old, NodeT*& root) { + using ER = EraseResult; + auto& parent_ = this->parent_; + bool tail_entry_erased = iter_after_erased == data_.end(); + + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + root = remaining_node; + delete this; + } + } + return tail_entry_erased ? ER{} : ER{this, iter_after_erased}; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + auto next_node = next_node_; // create copy because (this) will be deleted + remove_from_siblings(); + parent_->remove_node(max_key_old, this, root); + return next_node == nullptr ? ER{} : ER{next_node, next_node->data_.begin()}; + } + + if (data_.size() < CFG::MIN) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < CFG::MAX) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + auto next_node = next_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, this, root); + if (prev_node->parent_ != nullptr) { + KeyT old1 = (prev_data.end() - 2)->first; + KeyT new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1, prev_node); + } + if (!tail_entry_erased) { + return ER{prev_node, --prev_data.end()}; + } + return next_node == nullptr ? ER{} : ER{next_node, next_node->data_.begin()}; + } else if (next_node_ != nullptr && next_node_->data_.size() < CFG::MAX) { + remove_from_siblings(); + auto* next_node = next_node_; + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, this, root); + if (tail_entry_erased) { + return ER{next_node, next_data.begin() + 1}; + } + return next_node == nullptr ? ER() : ER{next_node, next_data.begin()}; + } + // This node is too small but there is nothing we can do. + } + if (tail_entry_erased) { + parent_->update_key(max_key_old, data_.back().first, this); + return next_node_ == nullptr ? ER() : ER{next_node_, next_node_->data_.begin()}; + } + return ER{this, iter_after_erased}; + } + + /* + * Check whether a split is required and, if so, perform it. + */ + bool check_split(NodeT*& root) { + if (data_.size() >= CFG::MAX) { + if (!this->rebalance()) { + this->split_node(root); + } + return true; + } + return false; + } + + auto check_split_and_adjust_iterator(DataIteratorT it, KeyT key, NodeT*& root) { + auto* dest = (ThisT*)this; + bool is_split = this->check_split(root); + if (is_split && key > this->data_.back().first) { + dest = this->next_node_; + it = dest->lower_bound(key); + } + + if (dest->parent_ != nullptr && key > dest->data_.back().first) { + dest->parent_->update_key(dest->data_.back().first, key, dest); + } + + return IterT(dest, it); + } + + void _check_data(NInnerT* parent, KeyT known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= CFG::MIN); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + void split_node(NodeT*& root) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + root = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = CFG::MAX >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_.back().first; + this->parent_->update_key_and_add_node(max_key, split_key, this, node2, root); + } + + bool rebalance() { + // We rebalance to "next" if it has at least 50% free space. + // Rebalancing to "prev" is difficult because update_key_and_add_node() relies on "next". + size_t threshold = CFG::MAX >> 1; // 50% + size_t move_amount = CFG::MAX >> 2; + auto& data = this->data_; + if (this->next_node_ != nullptr && CFG::MAX - next_node_->data_.size() > threshold) { + auto& next_data = this->next_node_->data_; + auto old_key = data.back().first; + auto start = data.end() - move_amount; + auto end = data.end(); + next_data.insert( + next_data.begin(), std::make_move_iterator(start), std::make_move_iterator(end)); + data.erase(start, end); + if constexpr (std::is_same_v) { + auto it = next_data.begin(); + for (size_t i = 0; i < move_amount; ++i) { + it->second->parent_ = this->next_node_; + ++it; + } + } + this->parent_->update_key(old_key, data.back().first, this); + return true; + } + return false; + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + public: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; +}; + +template > +class bpt_node_inner +: public bpt_node_data< + KeyT, + bpt_node_inner, + NLeafT, + bpt_node_inner, + std::pair, NLeafT>*>, + IterT, + CFG> { + using NInnerT = bpt_node_inner; + using NodePtrT = bpt_node_base*; + using EntryT = std::pair; + + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) { + } + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] auto lower_bound_node(KeyT key, const NodePtrT node) noexcept { + auto it = this->lower_bound(key); + while (it != this->data_.end() && it->first == key) { + if (it->second == node) { + return it; + } + ++it; + } + return this->data_.end(); + } + + void emplace_back(KeyT key, NodePtrT node) { + this->data_.emplace_back(key, node); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, KeyT& known_min, KeyT known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + size_t n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first >= prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(KeyT old_key, KeyT new_key, NodePtrT node) { + if (old_key == new_key) { + return; // This can happen due to multiple entries with same key. + } + auto it = this->lower_bound_node(old_key, node); + assert(it != this->data_.end() && it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key, this); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with key='key1_old' (it's max key) + * Invariants: + * - Node1: key1_old >= key1_new + */ + void update_key_and_add_node( + KeyT key1_old, KeyT key1_new, NodePtrT child1, NodePtrT child2, NodePtrT& root) { + bool has_split = this->check_split(root); + + // splits are always "forward", i.e. creating a "next" node. How about rebalance()? + auto* dest = this; + if (has_split && key1_old > this->data_.back().first) { + dest = this->next_node_; + } + + // update child1 + auto it = dest->lower_bound_node(key1_old, child1); + assert(key1_old >= key1_new && it != dest->data_.end()); + it->first = key1_new; + + if (dest == this && this->next_node_ != nullptr) { + assert(this->next_node_->data_.front().first >= key1_new); + } + ++it; + // key_1_old is the max_key of child2 + dest->data_.emplace(it, key1_old, child2); + child2->parent_ = dest; + } + + void remove_node(KeyT key_remove, NodePtrT node, NodePtrT& root) { + auto it_to_erase = this->lower_bound(key_remove); + while (it_to_erase != this->data_.end() && it_to_erase->first == key_remove) { + if (it_to_erase->second == node) { + delete it_to_erase->second; + this->erase_entry(it_to_erase, root); + return; + } + ++it_to_erase; + } + assert(false && "Node not found!"); + } +}; + +template +class bpt_iterator_base { + using IterT = bpt_iterator_base; + + template + friend class bpt_node_data; + friend F1; + friend NLeafT; + + public: + // Arbitrary position iterator + explicit bpt_iterator_base(NLeafT* node, LeafIteratorT it) noexcept : node_{node}, iter_{it} { + assert( + (node == nullptr || node->is_leaf()) && + "for consistency, insist that we iterate leaves only"); + } + + // begin() iterator + explicit bpt_iterator_base(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf()) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator_base() noexcept : node_{nullptr}, iter_{} {} + + auto operator++() noexcept { + assert(!is_end()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) const noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.node_ == right.node_ && left.iter_ == right.iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return left.node_ != right.node_ || left.iter_ != right.iter_; + } + + protected: + LeafIteratorT& iter() const noexcept { + return const_cast(iter_); + } + + private: + [[nodiscard]] bool is_end() const noexcept { + return node_ == nullptr; + } + + NLeafT* node_; + LeafIteratorT iter_; +}; + +template +[[nodiscard]] static auto lower_bound_leaf(KeyT key, NodeT* node) noexcept { + using LeafT = decltype(node->as_leaf()); + while (node != nullptr && !node->is_leaf()) { + auto it = node->as_inner()->lower_bound(key); + node = it != node->as_inner()->data_.end() ? it->second : nullptr; + } + return (LeafT)node; +} + +template +[[nodiscard]] static auto lower_bound_or_last_leaf(KeyT key, NodeT* node) noexcept { + using LeafT = decltype(node->as_leaf()); + while (!node->is_leaf()) { + auto it = node->as_inner()->lower_bound(key); + if (it == node->as_inner()->data_.end()) { + node = node->as_inner()->data_.back().second; + } else { + node = it->second; + } + } + return (LeafT)node; +} + +} // namespace phtree::bptree::detail + +#endif // PHTREE_COMMON_B_PLUS_TREE_BASE_H diff --git a/include/phtree/common/b_plus_tree_hash_map.h b/include/phtree/common/b_plus_tree_hash_map.h new file mode 100644 index 00000000..98dcfeea --- /dev/null +++ b/include/phtree/common/b_plus_tree_hash_map.h @@ -0,0 +1,502 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H +#define PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H + +#include "b_plus_tree_base.h" +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { +using namespace ::phtree::bptree::detail; + +/* + * The b_plus_tree_hash_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior + * ======== + * This is a hash set/map. It behaves just like std::unordered_set / std::unordered_map, minus + * some API functions. + * The set/map is ordered by their hash. Entries with identical hash have no specific ordering + * but the order is stable with respect to insertion/removal of other entries. + * + * + * Rationale + * ========= + * This implementations is optimized for small entry count (for the multi-map PH-tree we + * expect small numbers of entries that actually have identical positions), however it should + * scale well with large entry counts (it is a tree, so there is no need for rehashing). + * Benchmarks show 10%-20% performance improvements for relocate() when using this custom set/map. + * + * + * Internals + * ========= + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + */ +template , typename PredT = std::equal_to> +class b_plus_tree_hash_set { + using hash_t = std::uint32_t; + + class bpt_node_leaf; + class bpt_iterator; + using LeafEntryT = std::pair; + using IterT = bpt_iterator; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using NodeT = bpt_node_base; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_hash_set; + + public: + using value_compare = PredT; + explicit b_plus_tree_hash_set() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_hash_set(const b_plus_tree_hash_set& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_hash_set(b_plus_tree_hash_set&& other) noexcept + : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_hash_set& operator=(const b_plus_tree_hash_set& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_hash_set& operator=(b_plus_tree_hash_set&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_hash_set() noexcept { + delete root_; + } + + [[nodiscard]] auto find(const T& value) { + auto hash = (hash_t)HashT{}(value); + auto leaf = lower_bound_leaf(hash, root_); + return leaf != nullptr ? leaf->find(hash, value) : IterT{}; + } + + [[nodiscard]] auto find(const T& value) const { + return const_cast(*this).find(value); + } + + [[nodiscard]] auto lower_bound(const T& value) { + auto hash = (hash_t)HashT{}(value); + auto leaf = lower_bound_leaf(hash, root_); + return leaf != nullptr ? leaf->lower_bound_value(hash, value) : IterT{}; + } + + [[nodiscard]] auto lower_bound(const T& value) const { + return const_cast(*this).lower_bound(value); + } + + [[nodiscard]] size_t count(const T& value) const { + return const_cast(*this).find(value) != end(); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + T t(std::forward(args)...); + hash_t hash = (hash_t)HashT{}(t); + auto leaf = lower_bound_or_last_leaf(hash, root_); + return leaf->try_emplace(hash, root_, size_, std::move(t)); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + if (empty() || hint.is_end()) { + return emplace(std::forward(args)...).first; + } + + T t(std::forward(args)...); + auto hash = (hash_t)HashT{}(t); + auto node = hint.node_->as_leaf(); + + // The following may drop a valid hint but is easy to check. + if (node->data_.begin()->first > hash || (node->data_.end() - 1)->first < hash) { + return emplace(std::move(t)).first; + } + + return node->try_emplace(hash, root_, size_, std::move(t)).first; + } + + size_t erase(const T& value) { + auto hash = (hash_t)HashT{}(value); + auto leaf = lower_bound_leaf(hash, root_); + if (leaf == nullptr) { + return 0; + } + + auto iter = leaf->lower_bound_value(hash, value); + if (!iter.is_end() && PredT{}(*iter, value)) { + iter.node_->erase_entry(iter.iter_, root_); + --size_; + return 1; + } + return 0; + } + + auto erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + auto result = iterator.node_->erase_entry(iterator.iter_, root_); + if (result.node_) { + return IterT(static_cast(result.node_), result.iter_); + } + return IterT(); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + hash_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + using bpt_leaf_super = bpt_node_data; + class bpt_node_leaf : public bpt_leaf_super { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_leaf_super(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full = this->lower_bound_as_iter(hash); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + return iter_full; + } + ++iter_full; + } + return IterT(); + } + + [[nodiscard]] auto lower_bound_value(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full = this->lower_bound_as_iter(hash); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + break; + } + ++iter_full; + } + return iter_full; + } + + auto try_emplace(hash_t hash, NodeT*& root, size_t& entry_count, T&& t) { + auto it = this->lower_bound(hash); + if (it != this->data_.end() && it->first == hash) { + // Hash collision ! + PredT equals{}; + IterT full_iter(this, it); + while (!full_iter.is_end() && full_iter.hash() == hash) { + if (equals(*full_iter, t)) { + return std::make_pair(full_iter, false); + } + ++full_iter; + } + } + ++entry_count; + + auto full_it = this->check_split_and_adjust_iterator(it, hash, root); + auto it_result = full_it.node_->data_.emplace(full_it.iter_, hash, std::move(t)); + return std::make_pair(IterT(full_it.node_, it_result), true); + } + + void _check( + size_t& count, + NInnerT* parent, + NLeafT*& prev_leaf, + hash_t& known_min, + hash_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first >= known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_iterator : public bpt_iterator_base { + using SuperT = bpt_iterator_base; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : SuperT(node, it) {} + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept : SuperT(node) {} + + // end() iterator + bpt_iterator() noexcept : SuperT() {} + + auto& operator*() const noexcept { + return const_cast(this->iter()->second); + } + + auto* operator->() const noexcept { + return const_cast(&this->iter()->second); + } + + [[nodiscard]] auto hash() const noexcept { + return this->iter()->first; + } + }; + + private: + NodeT* root_; + size_t size_; +}; + +template < + typename KeyT, + typename ValueT, + typename HashT = std::hash, + typename PredT = std::equal_to> +class b_plus_tree_hash_map { + class iterator; + using IterT = iterator; + using EntryT = std::pair; + + public: + using value_compare = PredT; + b_plus_tree_hash_map() : map_{} {}; + + b_plus_tree_hash_map(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map(b_plus_tree_hash_map&&) noexcept = default; + b_plus_tree_hash_map& operator=(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map& operator=(b_plus_tree_hash_map&&) noexcept = default; + ~b_plus_tree_hash_map() = default; + + auto begin() const { + return IterT(map_.begin()); + } + + auto end() const { + return IterT(map_.end()); + } + + auto find(const KeyT& key) const { + return IterT(map_.find(EntryT{key, {}})); + } + + [[nodiscard]] auto lower_bound(const KeyT& key) const { + return IterT(map_.lower_bound(EntryT{key, {}})); + } + + auto count(const KeyT& key) const { + return map_.count(EntryT{key, {}}); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + return try_emplace(hint, std::forward(args)...); + } + + template + auto try_emplace(const KeyT& key, Args&&... args) { + auto result = map_.emplace(key, std::forward(args)...); + return std::make_pair(iterator(result.first), result.second); + } + + template + auto try_emplace(const IterT& hint, const KeyT& key, Args&&... args) { + auto result = map_.emplace_hint(hint.map_iter_, key, std::forward(args)...); + return IterT(result); + } + + auto erase(const KeyT& key) { + return map_.erase({key, {}}); + } + + auto erase(const IterT& iterator) { + return IterT(map_.erase(iterator.map_iter_)); + } + + auto size() const { + return map_.size(); + } + + auto empty() const { + return map_.empty(); + } + + void _check() { + map_._check(); + } + + private: + struct EntryHashT { + size_t operator()(const EntryT& x) const { + return HashT{}(x.first); + } + }; + + struct EntryEqualsT { + bool operator()(const EntryT& x, const EntryT& y) const { + return PredT{}(x.first, y.first); + } + }; + + class iterator { + using T = EntryT; + using MapIterType = + decltype(std::declval>() + .begin()); + friend b_plus_tree_hash_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + explicit iterator(MapIterType map_iter) noexcept : map_iter_{map_iter} {} + + // end() iterator + iterator() noexcept : map_iter_{} {} + + auto& operator*() const noexcept { + return *map_iter_; + } + + auto* operator->() const noexcept { + return &*map_iter_; + } + + auto& operator++() noexcept { + ++map_iter_; + return *this; + } + + auto operator++(int) noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.map_iter_ == right.map_iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return left.map_iter_ != right.map_iter_; + } + + private: + MapIterType map_iter_; + }; + + b_plus_tree_hash_set map_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H diff --git a/include/phtree/common/b_plus_tree_map.h b/include/phtree/common/b_plus_tree_map.h new file mode 100644 index 00000000..17bbdbc1 --- /dev/null +++ b/include/phtree/common/b_plus_tree_map.h @@ -0,0 +1,326 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_H +#define PHTREE_COMMON_B_PLUS_TREE_H + +#include "b_plus_tree_base.h" +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { +using namespace ::phtree::bptree::detail; + +/* + * The b_plus_tree_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior: + * This is a key-value map. Keys are unique, so for every key there is at most one entry. + * + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + * TODO since this is a "map" (with 1:1 mapping of key:value), we could optimize splitting and + * merging by trying to reduce `dead space` + * (space between key1 and key2 that exceeds (key2 - key1)). + */ +template +class b_plus_tree_map { + static_assert(std::is_integral() && "Key type must be integer"); + static_assert(std::is_unsigned() && "Key type must unsigned"); + + // COUNT_MAX indicates that a tree will never have to hold more than COUNT_MAX entries. + // We can use this to optimize node sizes for small trees. + constexpr static size_t LEAF_MAX = std::min(std::uint64_t(16), COUNT_MAX); + // Special case for small COUNT with smaller inner leaf or + // trees with a single inner leaf. '*2' is added because leaf filling is not compact. + constexpr static size_t INNER_MAX = std::min(std::uint64_t(16), COUNT_MAX / LEAF_MAX * 2); + static_assert(LEAF_MAX > 2 && LEAF_MAX < 1000); + static_assert(COUNT_MAX <= (16 * 16) || (INNER_MAX > 2 && INNER_MAX < 1000)); + // TODO This could be improved but requires a code change to move > 1 entry when merging. + constexpr static size_t LEAF_MIN = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t INNER_MIN = 2; // std::max((size_t)2, M_inner >> 2); + constexpr static size_t LEAF_INIT = std::min(size_t(2), LEAF_MAX); + constexpr static size_t INNER_INIT = std::min(size_t(4), INNER_MAX); + using LEAF_CFG = bpt_config; + using INNER_CFG = bpt_config; + + class bpt_node_leaf; + class bpt_iterator; + using LeafEntryT = std::pair; + using IterT = bpt_iterator; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using NodeT = bpt_node_base; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_map; + + public: + explicit b_plus_tree_map() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_map(const b_plus_tree_map& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_map(b_plus_tree_map&& other) noexcept : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_map& operator=(const b_plus_tree_map& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_map& operator=(b_plus_tree_map&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_map() noexcept { + delete root_; + } + + [[nodiscard]] auto find(KeyT key) noexcept { + auto leaf = lower_bound_leaf(key, root_); + return leaf != nullptr ? leaf->find(key) : IterT{}; + } + + [[nodiscard]] auto find(KeyT key) const noexcept { + return const_cast(*this).find(key); + } + + [[nodiscard]] auto lower_bound(KeyT key) noexcept { + auto leaf = lower_bound_leaf(key, root_); + return leaf != nullptr ? leaf->lower_bound_as_iter(key) : IterT{}; + } + + [[nodiscard]] auto lower_bound(KeyT key) const noexcept { + return const_cast(*this).lower_bound(key); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto emplace_hint(const IterT& hint, KeyT key, Args&&... args) { + if (empty() || hint.is_end()) { + return emplace(key, std::forward(args)...); + } + assert(hint.node_->is_leaf()); + + auto node = hint.node_->as_leaf(); + + // The following may drop a valid hint but is easy to check. + if (node->data_.begin()->first > key || (node->data_.end() - 1)->first < key) { + return emplace(key, std::forward(args)...); + } + return node->try_emplace(key, root_, size_, std::forward(args)...); + } + + template + auto try_emplace(KeyT key, Args&&... args) { + auto leaf = lower_bound_or_last_leaf(key, root_); + return leaf->try_emplace(key, root_, size_, std::forward(args)...); + } + + template + auto try_emplace(IterT iter, KeyT key, Args&&... args) { + return emplace_hint(iter, key, std::forward(args)...).first; + } + + void erase(KeyT key) { + auto leaf = lower_bound_leaf(key, root_); + if (leaf != nullptr) { + size_ -= leaf->erase_key(key, root_); + } + } + + void erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + iterator.node_->erase_entry(iterator.iter_, root_); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + KeyT known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + using bpt_leaf_super = + bpt_node_data; + class bpt_node_leaf : public bpt_leaf_super { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_leaf_super(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(KeyT key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return IterT(this, it); + } + return IterT(); + } + + template + auto try_emplace(KeyT key, NodeT*& root, size_t& entry_count, Args&&... args) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return std::make_pair(IterT(this, it), false); + } + ++entry_count; + + auto full_it = this->check_split_and_adjust_iterator(it, key, root); + auto it_result = full_it.node_->data_.emplace( + full_it.iter_, + std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(std::forward(args)...)); + return std::make_pair(IterT(full_it.node_, it_result), true); + } + + bool erase_key(KeyT key, NodeT*& root) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + this->erase_entry(it, root); + return true; + } + return false; + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, KeyT& known_min, KeyT known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first > known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_iterator : public bpt_iterator_base { + using SuperT = bpt_iterator_base; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = ValueT; + using difference_type = std::ptrdiff_t; + using pointer = ValueT*; + using reference = ValueT&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : SuperT(node, it) {} + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept : SuperT(node) {} + + // end() iterator + bpt_iterator() noexcept : SuperT() {} + + auto& operator*() const noexcept { + return const_cast(*this->iter()); + } + + auto* operator->() const noexcept { + return const_cast(&*this->iter()); + } + }; + + private: + NodeT* root_; + size_t size_; +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_H diff --git a/include/phtree/common/b_plus_tree_multimap.h b/include/phtree/common/b_plus_tree_multimap.h new file mode 100644 index 00000000..7d642fb5 --- /dev/null +++ b/include/phtree/common/b_plus_tree_multimap.h @@ -0,0 +1,353 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_MULTIMAP_H +#define PHTREE_COMMON_B_PLUS_TREE_MULTIMAP_H + +#include "b_plus_tree_base.h" +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree multimap implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { +using namespace ::phtree::bptree::detail; + +/* + * The b_plus_tree_multimap is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior + * ======== + * This is a multimap. It behaves just like std::multimap, minus some API functions. + * The set/map is ordered by their key. Entries with identical keys have no specific ordering + * but the order is stable with respect to insertion/removal of other entries. + * + * + * Rationale + * ========= + * This implementations is optimized for small entry count, however it should + * scale well with large entry counts. + * + * + * Internals + * ========= + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + */ +template +class b_plus_tree_multimap { + static_assert(std::is_integral() && "Key type must be integer"); + static_assert(std::is_unsigned() && "Key type must unsigned"); + + class bpt_node_leaf; + class bpt_iterator; + using LeafEntryT = std::pair; + using IterT = bpt_iterator; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using NodeT = bpt_node_base; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_multimap; + + public: + explicit b_plus_tree_multimap() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_multimap(const b_plus_tree_multimap& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_multimap(b_plus_tree_multimap&& other) noexcept + : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_multimap& operator=(const b_plus_tree_multimap& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? (NodeT*)new NLeafT(*other.root_->as_leaf()) + : (NodeT*)new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_multimap& operator=(b_plus_tree_multimap&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_multimap() noexcept { + delete root_; + } + + [[nodiscard]] auto find(const KeyT key) { + auto leaf = lower_bound_leaf(key, root_); + return leaf != nullptr ? leaf->find(key) : IterT{}; + } + + [[nodiscard]] auto find(const KeyT key) const { + return const_cast(*this).find(key); + } + + [[nodiscard]] size_t count(const KeyT key) const { + return const_cast(*this).find(key) != end(); + } + + [[nodiscard]] auto lower_bound(const KeyT key) { + auto leaf = lower_bound_leaf(key, root_); + return leaf != nullptr ? leaf->lower_bound_as_iter(key) : IterT{}; + } + + [[nodiscard]] auto lower_bound(const KeyT key) const { + return const_cast(*this).lower_bound(key); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(const_cast(root_)); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(KeyT key, Args&&... args) { + auto leaf = lower_bound_or_last_leaf(key, root_); + return leaf->try_emplace(key, root_, size_, std::forward(args)...); + } + + template + auto try_emplace(KeyT key, Args&&... args) { + return emplace(key, std::forward(args)...); + } + + template + auto emplace_hint(const IterT& hint, KeyT key, Args&&... args) { + if (empty() || hint.is_end()) { + return emplace(key, std::forward(args)...); + } + assert(hint.node_->is_leaf()); + + auto node = hint.node_->as_leaf(); + + // The following may drop a valid hint but is easy to check. + if (node->data_.begin()->first > key || (node->data_.end() - 1)->first < key) { + return emplace(key, std::forward(args)...); + } + return node->try_emplace(key, root_, size_, std::forward(args)...); + } + + template + auto try_emplace(const IterT& hint, KeyT key, Args&&... args) { + return emplace_hint(hint, key, std::forward(args)...); + } + + size_t erase(const KeyT key) { + auto begin = lower_bound(key); + auto end = key == std::numeric_limits::max() ? IterT() : lower_bound(key + 1); + if (begin == end) { + return 0; + } + auto size_before = size_; + erase(begin, end); + return size_before - size_; + } + + auto erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + auto result = iterator.node_->erase_entry(iterator.iter_, root_); + if (result.node_) { + return IterT(static_cast(result.node_), result.iter_); + } + return IterT(); + } + + auto erase(const IterT& begin, const IterT& end) { + assert(begin != this->end()); + NLeafT* current = begin.node_; + auto current_begin = begin.iter_; + size_t end_offset = 0; + if (!end.is_end()) { + if (begin.node_ == end.node_) { + // No page merge, but end_offset depends on "begin" iterator + end_offset = end.iter_ - begin.iter_; + } else { + // The end iterator may be invalidated by page merges! + end_offset = end.iter_ - end.node_->data_.begin(); + } + } + size_t n_erased = 0; + while (current != end.node_ && current->next_node_ != nullptr) { + auto old_size = current->data_.size(); + KeyT max_key_old = current->data_.back().first; + current->data_.erase(current_begin, current->data_.end()); + n_erased += (old_size - current->data_.size()); + auto result = current->check_merge(current->data_.end(), max_key_old, root_); + current = result.node_->as_leaf(); + assert(current != nullptr); + current_begin = result.iter_; + } + auto old_size = current->data_.size(); + KeyT max_key_old = current->data_.back().first; + auto current_end = end.is_end() ? current->data_.end() : current_begin + end_offset; + auto next_entry = current->data_.erase(current_begin, current_end); + n_erased += (old_size - current->data_.size()); + auto result = current->check_merge(next_entry, max_key_old, root_); + size_ -= n_erased; + if (result.node_) { + return IterT(result.node_->as_leaf(), result.iter_); + } + return IterT(); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + KeyT known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + using bpt_leaf_super = bpt_node_data; + class bpt_node_leaf : public bpt_leaf_super { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_leaf_super(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(KeyT key) noexcept { + IterT iter_full = this->lower_bound_as_iter(key); + if (!iter_full.is_end() && iter_full.iter_->first == key) { + return iter_full; + } + return IterT(); + } + + template + auto try_emplace(KeyT key, NodeT*& root, size_t& entry_count, Args&&... args) { + auto it = this->lower_bound(key); + ++entry_count; + auto full_it = this->check_split_and_adjust_iterator(it, key, root); + auto it_result = + full_it.node_->data_.emplace(full_it.iter_, key, std::forward(args)...); + return IterT(full_it.node_, it_result); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, KeyT& known_min, KeyT known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first >= known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_iterator : public bpt_iterator_base { + using SuperT = bpt_iterator_base; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = ValueT; + using difference_type = std::ptrdiff_t; + using pointer = ValueT*; + using reference = ValueT&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : SuperT(node, it) {} + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept : SuperT(node) {} + + // end() iterator + bpt_iterator() noexcept : SuperT() {} + + auto& operator*() const noexcept { + return const_cast(*this->iter()); + } + + auto* operator->() const noexcept { + return const_cast(&*this->iter()); + } + }; + + private: + NodeT* root_; + size_t size_; +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_MULTIMAP_H diff --git a/phtree/common/base_types.h b/include/phtree/common/base_types.h similarity index 84% rename from phtree/common/base_types.h rename to include/phtree/common/base_types.h index 5ad77ea2..dcb91dac 100644 --- a/phtree/common/base_types.h +++ b/include/phtree/common/base_types.h @@ -40,8 +40,10 @@ using scalar_64_t = int64_t; using scalar_32_t = int32_t; using scalar_16_t = int16_t; -// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) -using bit_width_t = uint16_t; +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices). +// However, uint32_t turned out to be faster, probably due to fewer cycles required for 32bit +// instructions (8bit/16bit tend to require more cycles, see CPU tables available on the web). +using bit_width_t = uint32_t; // Number of bit for 'scalar_64_t' or 'scalar_32_t'. Note that 'digits' does _not_ include sign bit, // so e.g. int64_t has 63 `digits`, however we need all bits, i.e. 64. template @@ -53,7 +55,15 @@ using bit_mask_t = typename std::make_unsigned::type; template static constexpr bit_mask_t MAX_MASK = std::numeric_limits>::max(); using dimension_t = size_t; // Number of dimensions -using hc_pos_t = uint64_t; +// We have two types that represent hypercube addresses (HC position). +// The hc_pos_dim_t uses a template parameter to determine how many bits are needed, this is either +// 32bit or 64bit. This parameter is used where HC positions are stored because benchmarks show a +// difference in performance when this is used. +// The hc_pos_64_t type is always set to 64. It is used where computations play a role that appear +// to prefer being in always 64bit, mainly in CalcPosInArray() and in Node. +template +using hc_pos_dim_t = std::conditional_t<(DIM < 32), uint32_t, uint64_t>; +using hc_pos_64_t = uint64_t; // ************************************************************************ // Basic structs and classes @@ -109,6 +119,10 @@ class PhBox { return min_ == other.min_ && max_ == other.max_; } + auto operator!=(const PhBox& other) const -> bool { + return !(*this == other); + } + private: Point min_; Point max_; diff --git a/phtree/common/bits.h b/include/phtree/common/bits.h similarity index 100% rename from phtree/common/bits.h rename to include/phtree/common/bits.h diff --git a/phtree/common/common.h b/include/phtree/common/common.h similarity index 89% rename from phtree/common/common.h rename to include/phtree/common/common.h index 2912c8ec..ad09013d 100644 --- a/phtree/common/common.h +++ b/include/phtree/common/common.h @@ -17,11 +17,9 @@ #ifndef PHTREE_COMMON_COMMON_H #define PHTREE_COMMON_COMMON_H +#include "b_plus_tree_map.h" #include "base_types.h" #include "bits.h" -#include "converter.h" -#include "distance.h" -#include "filter.h" #include "flat_array_map.h" #include "flat_sparse_map.h" #include "tree_stats.h" @@ -51,7 +49,8 @@ namespace improbable::phtree { * an array. */ template -static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { +static hc_pos_dim_t CalcPosInArray( + const PhPoint& valSet, bit_width_t postfix_len) { // n=DIM, i={0..n-1} // i = 0 : |0|1|0|1|0|1|0|1| // i = 1 : | 0 | 1 | 0 | 1 | @@ -60,13 +59,13 @@ static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t p // Following formula was for inverse ordering of current ordering... // pos = sum (i=1..n, len/2^i) = sum (..., 2^(n-i)) bit_mask_t valMask = bit_mask_t(1) << postfix_len; - hc_pos_t pos = 0; + hc_pos_64_t pos = 0; for (dimension_t i = 0; i < DIM; ++i) { pos <<= 1; // set pos-bit if bit is set in value pos |= (valMask & valSet[i]) >> postfix_len; } - return pos; + return static_cast>(pos); } template @@ -96,23 +95,23 @@ template static bit_width_t NumberOfDivergingBits( const PhPoint& v1, const PhPoint& v2) { // write all differences to diff, we just check diff afterwards - bit_mask_t diff = 0; + SCALAR diff = 0; for (dimension_t i = 0; i < DIM; ++i) { diff |= (v1[i] ^ v2[i]); } - assert(CountLeadingZeros(diff) <= MAX_BIT_WIDTH); - return MAX_BIT_WIDTH - CountLeadingZeros(diff); + auto diff2 = reinterpret_cast&>(diff); + assert(CountLeadingZeros(diff2) <= MAX_BIT_WIDTH); + return MAX_BIT_WIDTH - CountLeadingZeros(diff2); } template static bool KeyEquals( - const PhPoint& key_a, const PhPoint& key_b, bit_mask_t mask) { + const PhPoint& key_a, const PhPoint& key_b, bit_width_t ignore_bits) { + SCALAR diff{0}; for (dimension_t i = 0; i < DIM; ++i) { - if (((key_a[i] ^ key_b[i]) & mask) != 0) { - return false; - } + diff |= key_a[i] ^ key_b[i]; } - return true; + return diff >> ignore_bits == 0; } // ************************************************************************ diff --git a/phtree/common/debug_helper.h b/include/phtree/common/debug_helper.h similarity index 98% rename from phtree/common/debug_helper.h rename to include/phtree/common/debug_helper.h index ede89586..e3dc136e 100644 --- a/phtree/common/debug_helper.h +++ b/include/phtree/common/debug_helper.h @@ -39,6 +39,7 @@ class PhTreeDebugHelper { template static void CheckConsistency(const TREE& tree) { tree.GetInternalTree().GetDebugHelper().CheckConsistency(); + tree.CheckConsistencyExternal(); } /* diff --git a/include/phtree/common/flat_array_map.h b/include/phtree/common/flat_array_map.h new file mode 100644 index 00000000..9e55f514 --- /dev/null +++ b/include/phtree/common/flat_array_map.h @@ -0,0 +1,302 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_FLAT_ARRAY_MAP_H +#define PHTREE_COMMON_FLAT_ARRAY_MAP_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the array_map implementation, which is used in low-dimensional nodes in the + * PH-Tree. + */ +namespace improbable::phtree { + +template +class flat_array_map; + +namespace detail { + +template +using flat_map_pair = std::pair; + +template +class flat_map_iterator { + friend flat_array_map; + + public: + flat_map_iterator() : first{0}, map_{nullptr} {}; + + explicit flat_map_iterator(Key index, const flat_array_map* map) + : first{index}, map_{map} { + assert(index <= SIZE); + } + + auto& operator*() const { + assert(first < SIZE && map_->occupied(first)); + return const_cast&>(map_->data(first)); + } + + auto* operator->() const { + assert(first < SIZE && map_->occupied(first)); + return const_cast*>(&map_->data(first)); + } + + auto& operator++() noexcept { + first = (first + 1) >= SIZE ? SIZE : map_->lower_bound_index(first + 1); + return *this; + } + + auto operator++(int) noexcept { + flat_map_iterator it(first, map_); + ++(*this); + return it; + } + + friend bool operator==(const flat_map_iterator& left, const flat_map_iterator& right) { + return left.first == right.first; + } + + friend bool operator!=(const flat_map_iterator& left, const flat_map_iterator& right) { + return left.first != right.first; + } + + private: + Key first; + const flat_array_map* map_; +}; +} // namespace detail + +/* + * The array_map is a flat map implementation that uses an array of SIZE=2^DIM. The key is + * effectively the position in the array. + * + * It has O(1) insertion/removal time complexity, but O(2^DIM) space complexity, so it is best used + * when DIM is low and/or the map is known to have a high fill ratio. + */ +template +class flat_array_map { + static_assert(std::is_integral() && "Key type must be integer"); + static_assert(std::is_unsigned() && "Key type must unsigned"); + using map_pair = detail::flat_map_pair; + using iterator = detail::flat_map_iterator; + friend iterator; + + public: + [[nodiscard]] auto find(Key index) noexcept { + return iterator{occupied(index) ? index : SIZE, this}; + } + + [[nodiscard]] auto lower_bound(Key index) const noexcept { + return iterator{lower_bound_index(index), this}; + } + + [[nodiscard]] auto begin() const noexcept { + return iterator{lower_bound_index(0), this}; + } + + [[nodiscard]] auto cbegin() const noexcept { + return iterator{lower_bound_index(0), this}; + } + + [[nodiscard]] auto end() const noexcept { + return iterator{SIZE, this}; + } + + ~flat_array_map() noexcept { + if (occupancy != 0) { + for (Key i = 0; i < SIZE; ++i) { + if (occupied(i)) { + data(i).~pair(); + } + } + } + } + + [[nodiscard]] size_t size() const noexcept { + constexpr size_t BITS = + std::numeric_limits::digits + std::numeric_limits::is_signed; + return std::bitset(occupancy).count(); + } + + template + std::pair try_emplace(Key index, Args&&... args) { + if (!occupied(index)) { + new (reinterpret_cast(&data_[index])) map_pair( + std::piecewise_construct, + std::forward_as_tuple(index), + std::forward_as_tuple(std::forward(args)...)); + occupy(index); + return {&data(index), true}; + } + return {&data(index), false}; + } + + bool erase(Key index) noexcept { + if (occupied(index)) { + data(index).~pair(); + unoccupy(index); + return true; + } + return false; + } + + bool erase(const iterator& iterator) noexcept { + return erase(iterator.first); + } + + private: + /* + * This returns the element at the given index, which is _not_ the n'th element (for n = index). + */ + map_pair& data(Key index) noexcept { + assert(occupied(index)); + return *std::launder(reinterpret_cast(&data_[index])); + } + + const map_pair& data(Key index) const noexcept { + assert(occupied(index)); + return *std::launder(reinterpret_cast(&data_[index])); + } + + [[nodiscard]] Key lower_bound_index(Key index) const noexcept { + assert(index < SIZE); + Key num_zeros = CountTrailingZeros(occupancy >> index); + // num_zeros may be equal to SIZE if no bits remain + return std::min(SIZE, index + num_zeros); + } + + void occupy(Key index) noexcept { + assert(index < SIZE); + assert(!occupied(index)); + // flip the bit + occupancy ^= (Key{1} << index); + } + + void unoccupy(Key index) noexcept { + assert(index < SIZE); + assert(occupied(index)); + // flip the bit + occupancy ^= (Key{1} << index); + } + + [[nodiscard]] bool occupied(Key index) const noexcept { + return (occupancy >> index) & Key{1}; + } + + Key occupancy = 0; + // We use an untyped array to avoid implicit calls to constructors and destructors of entries. + std::aligned_storage_t data_[SIZE]; +}; + +/* + * array_map is a wrapper around flat_array_map. It introduces one layer of indirection. + * This is useful to decouple instantiation of a node from instantiation of it's descendants + * (the flat_array_map directly instantiates an array of descendants). + */ +template +class array_map { + static_assert(SIZE <= 64); // or else we need to adapt 'occupancy' + static_assert(SIZE > 0); + using iterator = improbable::phtree::detail::flat_map_iterator; + + public: + array_map() { + data_ = new flat_array_map(); + } + + array_map(const array_map& other) = delete; + array_map& operator=(const array_map& other) = delete; + + array_map(array_map&& other) noexcept : data_{other.data_} { + other.data_ = nullptr; + } + + array_map& operator=(array_map&& other) noexcept { + data_ = other.data_; + other.data_ = nullptr; + return *this; + } + + ~array_map() { + delete data_; + } + + [[nodiscard]] auto find(Key index) noexcept { + return data_->find(index); + } + + [[nodiscard]] auto find(Key key) const noexcept { + return const_cast(*this).find(key); + } + + [[nodiscard]] auto lower_bound(Key index) const { + return data_->lower_bound(index); + } + + [[nodiscard]] auto begin() const { + return data_->begin(); + } + + [[nodiscard]] iterator cbegin() const { + return data_->cbegin(); + } + + [[nodiscard]] auto end() const { + return data_->end(); + } + + template + auto emplace(Args&&... args) { + return data_->try_emplace(std::forward(args)...); + } + + template + auto try_emplace(Key index, Args&&... args) { + return data_->try_emplace(index, std::forward(args)...); + } + + template + auto try_emplace(const iterator&, Key index, Args&&... args) { + // We ignore the iterator, this is an array based collection, so access is ~O(1). + return data_->try_emplace(index, std::forward(args)...).first; + } + + bool erase(Key index) { + return data_->erase(index); + } + + bool erase(const iterator& iterator) { + return data_->erase(iterator); + } + + [[nodiscard]] size_t size() const { + return data_->size(); + } + + private: + flat_array_map* data_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_FLAT_ARRAY_MAP_H diff --git a/phtree/common/flat_sparse_map.h b/include/phtree/common/flat_sparse_map.h similarity index 66% rename from phtree/common/flat_sparse_map.h rename to include/phtree/common/flat_sparse_map.h index 3c264223..8fadff03 100644 --- a/phtree/common/flat_sparse_map.h +++ b/include/phtree/common/flat_sparse_map.h @@ -30,25 +30,23 @@ */ namespace improbable::phtree { -namespace { -template -using PhFlatMapPair = std::pair; - -using index_t = std::int32_t; -} // namespace - /* * The sparse_map is a flat map implementation that uses an array of *at* *most* SIZE=2^DIM. * The array contains a list sorted by key. * * It has O(log n) lookup and O(n) insertion/removal time complexity, space complexity is O(n). */ -template +template class sparse_map { + using Entry = std::pair; + using iterator = typename std::vector::iterator; + public: - explicit sparse_map() : data_{} {}; + explicit sparse_map() : data_{} { + data_.reserve(4); + } - [[nodiscard]] auto find(size_t key) { + [[nodiscard]] auto find(KeyT key) { auto it = lower_bound(key); if (it != data_.end() && it->first == key) { return it; @@ -56,7 +54,7 @@ class sparse_map { return data_.end(); } - [[nodiscard]] auto find(size_t key) const { + [[nodiscard]] auto find(KeyT key) const { auto it = lower_bound(key); if (it != data_.end() && it->first == key) { return it; @@ -64,16 +62,15 @@ class sparse_map { return data_.end(); } - [[nodiscard]] auto lower_bound(size_t key) { - return std::lower_bound( - data_.begin(), data_.end(), key, [](PhFlatMapPair& left, const size_t key) { - return left.first < key; - }); + [[nodiscard]] auto lower_bound(KeyT key) { + return std::lower_bound(data_.begin(), data_.end(), key, [](Entry& left, const KeyT key) { + return left.first < key; + }); } - [[nodiscard]] auto lower_bound(size_t key) const { + [[nodiscard]] auto lower_bound(KeyT key) const { return std::lower_bound( - data_.cbegin(), data_.cend(), key, [](const PhFlatMapPair& left, const size_t key) { + data_.cbegin(), data_.cend(), key, [](const Entry& left, const KeyT key) { return left.first < key; }); } @@ -99,24 +96,31 @@ class sparse_map { } template - auto emplace(Args&&... args) { - return try_emplace_base(std::forward(args)...); + auto emplace(KeyT key, Args&&... args) { + auto iter = lower_bound(key); + return try_emplace_base(iter, key, std::forward(args)...); + } + + template + auto try_emplace(KeyT key, Args&&... args) { + auto iter = lower_bound(key); + return try_emplace_base(iter, key, std::forward(args)...); } template - auto try_emplace(size_t key, Args&&... args) { - return try_emplace_base(key, std::forward(args)...); + auto try_emplace(iterator iter, KeyT key, Args&&... args) { + return try_emplace_base(iter, key, std::forward(args)...).first; } - void erase(size_t key) { + void erase(KeyT key) { auto it = lower_bound(key); if (it != end() && it->first == key) { data_.erase(it); } } - void erase(const typename std::vector>::iterator& iterator) { - data_.erase(iterator); + void erase(const iterator& iter) { + data_.erase(iter); } [[nodiscard]] size_t size() const { @@ -125,18 +129,7 @@ class sparse_map { private: template - auto emplace_base(size_t key, Args&&... args) { - auto it = lower_bound(key); - if (it != end() && it->first == key) { - return std::make_pair(it, false); - } else { - return std::make_pair(data_.emplace(it, key, std::forward(args)...), true); - } - } - - template - auto try_emplace_base(size_t key, Args&&... args) { - auto it = lower_bound(key); + auto try_emplace_base(const iterator& it, KeyT key, Args&&... args) { if (it != end() && it->first == key) { return std::make_pair(it, false); } else { @@ -149,7 +142,7 @@ class sparse_map { } } - std::vector> data_; + std::vector data_; }; } // namespace improbable::phtree diff --git a/phtree/common/tree_stats.h b/include/phtree/common/tree_stats.h similarity index 100% rename from phtree/common/tree_stats.h rename to include/phtree/common/tree_stats.h diff --git a/phtree/common/converter.h b/include/phtree/converter.h similarity index 93% rename from phtree/common/converter.h rename to include/phtree/converter.h index 012c0454..9781d39b 100644 --- a/phtree/common/converter.h +++ b/include/phtree/converter.h @@ -17,7 +17,7 @@ #ifndef PHTREE_COMMON_CONVERTER_H #define PHTREE_COMMON_CONVERTER_H -#include "base_types.h" +#include "common/common.h" #include /* @@ -90,7 +90,7 @@ class ScalarConverterMultiply { public: static scalar_64_t pre(double value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static double post(scalar_64_t value) { @@ -98,7 +98,7 @@ class ScalarConverterMultiply { } static scalar_32_t pre(float value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static float post(scalar_32_t value) { @@ -126,7 +126,9 @@ class ConverterBase { using KeyExternal = KEY_EXTERNAL; using KeyInternal = PhPoint; using QueryBoxExternal = QUERY_POINT_EXTERNAL; - using QueryBoxInternal = PhBox; + using QueryBoxInternal = PhBox; + using QueryPointExternal = PhPoint; + using QueryPointInternal = PhPoint; }; /* @@ -174,6 +176,8 @@ template < typename CONVERT = ScalarConverterIEEE> class SimplePointConverter : public ConverterPointBase { using BASE = ConverterPointBase; + + public: using Point = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; @@ -215,9 +219,14 @@ template < typename CONVERT = ScalarConverterIEEE> class SimpleBoxConverter : public ConverterBoxBase { using BASE = ConverterBoxBase; + + public: using Box = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + using QueryPoint = typename BASE::QueryPointExternal; + using QueryPointInternal = typename BASE::QueryPointInternal; static_assert(std::is_same>::value); static_assert(std::is_same>::value); @@ -243,7 +252,7 @@ class SimpleBoxConverter : public ConverterBoxBase out; + QueryBoxInternal out; auto& min = out.min(); auto& max = out.max(); for (dimension_t i = 0; i < DIM; ++i) { @@ -253,6 +262,22 @@ class SimpleBoxConverter : public ConverterBoxBase #include #include diff --git a/include/phtree/filter.h b/include/phtree/filter.h new file mode 100644 index 00000000..5e57a3dd --- /dev/null +++ b/include/phtree/filter.h @@ -0,0 +1,419 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_FILTERS_H +#define PHTREE_COMMON_FILTERS_H + +#include "converter.h" +#include "distance.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree { + +/* + * Any iterator that has a filter defined will traverse nodes or return values if and only if the + * filter function returns 'true'. The filter functions are called for every node and every entry + * (note: internally, nodes are also stored in entries, but these entries will be passed to the + * filter for nodes) that the iterator encounters. By implication, it will never call the filter + * function for nodes of entries if their respective parent node has already been rejected. + * + * There are separate filter functions for nodes and for key/value entries. + * + * Every filter needs to provide two functions: + * - bool IsEntryValid(const PhPoint& key, const T& value); + * This function is called for every key/value pair that the query encounters. The function + * should return 'true' iff the key/value should be added to the query result. + * The parameters are the key and value of the key/value pair. + * NOTE: WHen using a MultiMap, 'T' becomes the type of the 'bucket', i.e. the type of the + * container that holds multiple entries for a given coordinate. + * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); + * This function is called for every node that the query encounters. The function should + * return 'true' if the node should be traversed and searched for potential results. + * The parameters are the prefix of the node and the number of least significant bits of the + * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can + * have any value. + * + * - bool IsBucketEntryValid(const KeyT& key, const ValueT& value); + * This is only used/required for MultiMaps, implementations for a normal PhTree are ignored. + * In case of a MultiMap, this method is called for every entry in a bucket (see above). + */ + +/* + * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. + */ +struct FilterNoOp { + /* + * @param key The key/coordinate of the entry. + * @param value The value of the entry. For MultiMaps, this is a container of values. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { + return true; + } + + /* + * @param prefix The prefix of node. Any coordinate in the nodes shares this prefix. + * @param bits_to_ignore The number of bits of the prefix that should be ignored because they + * are NOT the same for all coordinates in the node. For example, assuming 64bit values, if the + * node represents coordinates that all share the first 10 bits of the prefix, then the value of + * bits_to_ignore is 64-10=54. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsNodeValid(const KeyT& /*prefix*/, int /*bits_to_ignore*/) const noexcept { + return true; + } + + /* + * This is checked once for every entry in a bucket. The method is called once a call to + * 'IsEntryValid` for the same bucket has returned 'true'. A typical implementation + * simply returns `true` or checks some values of the entry. + * @param key The key/coordinate of the bucket entry. + * @param value The value of the entry. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsBucketEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { + return true; + } +}; + +/* + * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). + * The result is equivalent to that of the 'begin_query(...)' function. + */ +template +class FilterAABB { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + FilterAABB( + const KeyExternal& min_include, const KeyExternal& max_include, const CONVERTER& converter) + : min_external_{min_include} + , max_external_{max_include} + , min_internal_{converter.pre(min_include)} + , max_internal_{converter.pre(max_include)} + , converter_{converter} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const KeyExternal& min_include, const KeyExternal& max_include) { + min_external_ = min_include; + max_external_ = max_include; + min_internal_ = converter_.get().pre(min_include); + max_internal_ = converter_.get().pre(max_include); + } + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { + auto point = converter_.get().post(key); + for (dimension_t i = 0; i < DIM; ++i) { + if (point[i] < min_external_[i] || point[i] > max_external_[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + for (dimension_t i = 0; i < DIM; ++i) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i] & node_min_bits) > max_internal_[i]) { + return false; + } + } + return true; + } + + private: + KeyExternal min_external_; + KeyExternal max_external_; + KeyInternal min_internal_; + KeyInternal max_internal_; + std::reference_wrapper converter_; +}; + +/* + * The sphere filter can be used to query a point tree for a sphere. + */ +template +class FilterSphere { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + template > + FilterSphere( + const KeyExternal& center, + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) + : center_external_{center} + , center_internal_{converter.pre(center)} + , radius_{radius} + , converter_{converter} + , distance_function_(std::forward(distance_function)){}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { + KeyExternal point = converter_.get().post(key); + return distance_function_(center_external_, point) <= radius_; + } + + /* + * Calculate whether AABB encompassing all possible points in the node intersects with the + * sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + KeyInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + KeyExternal closest_point = converter_.get().post(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + KeyExternal center_external_; + KeyInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterSphere; + +/* + * AABB filter for box keys. + * It detects all boxes that overlap partially or fully with the query box. + */ +template +class FilterBoxAABB { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + FilterBoxAABB( + const QueryPoint& min_include, const QueryPoint& max_include, const CONVERTER& converter) + : min_internal_{converter.pre_query(min_include)} + , max_internal_{converter.pre_query(max_include)} + , converter_{converter} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const QueryPoint& min_include, const QueryPoint& max_include) { + min_internal_ = converter_.get().pre_query(min_include); + max_internal_ = converter_.get().pre_query(max_include); + } + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { + for (dimension_t i = 0; i < DIM; ++i) { + if (key[i + DIM] < min_internal_[i] || key[i] > max_internal_[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + for (dimension_t i = 0; i < DIM; ++i) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i + DIM] & node_min_bits) > max_internal_[i]) { + return false; + } + } + return true; + } + + private: + QueryPointInternal min_internal_; + QueryPointInternal max_internal_; + std::reference_wrapper converter_; +}; + +/* + * The box sphere filter can be used to query a PH-Tree for boxes that intersect with a sphere. + */ +template +class FilterBoxSphere { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + template > + FilterBoxSphere( + const QueryPoint& center, + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) + : center_external_{center} + , center_internal_{converter.pre_query(center)} + , radius_{radius} + , converter_{converter} + , distance_function_(std::forward(distance_function)){}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // choose value closest to center for each dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], key[i], key[i + DIM]); + } + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + /* + * Calculate whether AABB of all possible points in the node intersects with the sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i + DIM] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + QueryPoint center_external_; + QueryPointInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterBoxSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterBoxSphere; + +/* + * AABB filter for MultiMaps. + */ +template +class FilterMultiMapAABB : public FilterAABB { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + FilterMultiMapAABB(const Key& min_include, const Key& max_include, CONVERTER& converter) + : FilterAABB(min_include, max_include, converter){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; + +/* + * Sphere filter for MultiMaps. + */ +template +class FilterMultiMapSphere : public FilterSphere { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + template > + FilterMultiMapSphere( + const Key& center, double radius, const CONVERTER& converter, DIST&& dist_fn = DIST()) + : FilterSphere(center, radius, converter, std::forward(dist_fn)){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterMultiMapSphere(const P&, double, const CONV&, DIST&& fn = DIST()) + -> FilterMultiMapSphere; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_FILTERS_H diff --git a/phtree/phtree.h b/include/phtree/phtree.h similarity index 74% rename from phtree/phtree.h rename to include/phtree/phtree.h index 54dfd2dd..57417f27 100644 --- a/phtree/phtree.h +++ b/include/phtree/phtree.h @@ -32,8 +32,6 @@ namespace improbable::phtree { template > class PhTree { friend PhTreeDebugHelper; - using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; @@ -42,7 +40,17 @@ class PhTree { typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: - explicit PhTree(CONVERTER converter = CONVERTER()) : tree_{converter}, converter_{converter} {} + // Unless specified otherwise this is just PhBox + using QueryBox = typename CONVERTER::QueryBoxExternal; + + template + explicit PhTree(CONV&& converter = CONV()) : tree_{&converter_}, converter_{converter} {} + + PhTree(const PhTree& other) = delete; + PhTree& operator=(const PhTree& other) = delete; + PhTree(PhTree&& other) noexcept = default; + PhTree& operator=(PhTree&& other) noexcept = default; + ~PhTree() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -60,7 +68,7 @@ class PhTree { */ template std::pair emplace(const Key& key, Args&&... args) { - return tree_.emplace(converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); } /* @@ -80,7 +88,7 @@ class PhTree { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - return tree_.emplace_hint(iterator, converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); } /* @@ -93,6 +101,22 @@ class PhTree { return tree_.insert(converter_.pre(key), value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); + } + /* * @return the value stored at position 'key'. If no such value exists, one is added to the tree * and returned. @@ -147,6 +171,44 @@ class PhTree { return tree_.erase(iterator); } + /* + * This function attempts to remove a 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the position and new position refer to the same bucket. + * + * The function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * This method will _not_ remove the value from the old position if it is already present at the + * new position. + * + * @param old_key The old position + * @param new_key The new position + * @return '1' if the 'value' was moved, otherwise '0'. + */ + auto relocate(const Key& old_key, const Key& new_key) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), [](const T&) { return true; }); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate is called for every value before it is relocated. + * If the predicate returns 'false', the relocation is aborted. + * @return '1' if the 'value' was moved, otherwise '0'. + */ + template + auto relocate_if(const Key& old_key, const Key& new_key, PRED&& predicate) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), std::forward(predicate)); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter @@ -158,9 +220,9 @@ class PhTree { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - tree_.for_each(callback, filter); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each(std::forward(callback), std::forward(filter)); } /* @@ -175,15 +237,18 @@ class PhTree { * signature of the default 'FilterNoOp`. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - tree_.for_each(query_type(converter_.pre_query(query_box)), callback, filter); + tree_.for_each( + query_type(converter_.pre_query(query_box)), + std::forward(callback), + std::forward(filter)); } /* @@ -194,8 +259,8 @@ class PhTree { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); + auto begin(FILTER&& filter = FILTER()) const { + return tree_.begin(std::forward(filter)); } /* @@ -211,9 +276,10 @@ class PhTree { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), + FILTER&& filter = FILTER(), QUERY_TYPE query_type = DEFAULT_QUERY_TYPE()) const { - return tree_.begin_query(query_type(converter_.pre_query(query_box)), filter); + return tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter)); } /* @@ -238,18 +304,21 @@ class PhTree { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. return tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, filter); + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter)); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { + auto end() const { return tree_.end(); } @@ -287,11 +356,19 @@ class PhTree { return tree_; } + void CheckConsistencyExternal() const { + [[maybe_unused]] size_t n = 0; + for ([[maybe_unused]] const auto& entry : tree_) { + ++n; + } + assert(n == size()); + } + v16::PhTreeV16 tree_; CONVERTER converter_; }; -/* +/** * Floating-point `double` version of the PH-Tree. * This version of the tree accepts multi-dimensional keys with floating point (`double`) * coordinates. @@ -306,23 +383,25 @@ class PhTree { template > using PhTreeD = PhTree; -/* +/** * Floating-point `float` version of the PH-Tree. * This version of the tree accepts multi-dimensional keys with floating point (`float`) * coordinates. - * * See 'PhTreeD' for details. */ template > using PhTreeF = PhTree; +/** + * A PH-Tree that uses (axis aligned) boxes as keys. + * See 'PhTreeD' for details. + */ template using PhTreeBox = PhTree; /** * A PH-Tree that uses (axis aligned) boxes as keys. * The boxes are defined with 64bit 'double' floating point coordinates. - * * See 'PhTreeD' for details. */ template > @@ -331,7 +410,6 @@ using PhTreeBoxD = PhTreeBox; /** * A PH-Tree that uses (axis aligned) boxes as keys. * The boxes are defined with 32bit 'float' coordinates. - * * See 'PhTreeD' for details. */ template > diff --git a/phtree/phtree_multimap.h b/include/phtree/phtree_multimap.h similarity index 54% rename from phtree/phtree_multimap.h rename to include/phtree/phtree_multimap.h index 75540f9f..a5de53f8 100644 --- a/phtree/phtree_multimap.h +++ b/include/phtree/phtree_multimap.h @@ -1,5 +1,6 @@ /* * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +18,7 @@ #ifndef PHTREE_PHTREE_MULTIMAP_H #define PHTREE_PHTREE_MULTIMAP_H +#include "common/b_plus_tree_hash_map.h" #include "common/common.h" #include "v16/phtree_v16.h" #include @@ -56,8 +58,11 @@ class IteratorBase { friend PHTREE; using T = typename PHTREE::ValueType; + protected: + using BucketIterType = typename PHTREE::BucketIterType; + public: - explicit IteratorBase() noexcept : current_value_ptr_{nullptr}, is_finished_{false} {} + explicit IteratorBase() noexcept : current_value_ptr_{nullptr} {} T& operator*() const noexcept { assert(current_value_ptr_); @@ -71,26 +76,16 @@ class IteratorBase { friend bool operator==( const IteratorBase& left, const IteratorBase& right) noexcept { - // Note: The following compares pointers to Entry objects (actually: their values T) - // so it should be _fast_ and return 'true' only for identical entries. - static_assert(std::is_pointer_v); - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_value_ptr_ == right.current_value_ptr_); + return left.current_value_ptr_ == right.current_value_ptr_; } friend bool operator!=( const IteratorBase& left, const IteratorBase& right) noexcept { - return !(left == right); + return left.current_value_ptr_ != right.current_value_ptr_; } protected: - [[nodiscard]] bool Finished() const noexcept { - return is_finished_; - } - void SetFinished() noexcept { - is_finished_ = true; current_value_ptr_ = nullptr; } @@ -100,41 +95,21 @@ class IteratorBase { private: const T* current_value_ptr_; - bool is_finished_; }; -template +template class IteratorNormal : public IteratorBase { friend PHTREE; - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; + using BucketIterType = typename IteratorBase::BucketIterType; public: - explicit IteratorNormal(const PhTreeIterEndType& iter_ph_end) noexcept - : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{iter_ph_end} - , iter_bucket_{} - , filter_{} { - this->SetFinished(); - } + explicit IteratorNormal() noexcept : IteratorBase(), iter_ph_{}, iter_bucket_{} {} - // Why are we passing two iterators by reference + std::move? - // See: https://abseil.io/tips/117 - IteratorNormal( - const PhTreeIterEndType& iter_ph_end, - ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter = FILTER()) noexcept + template + IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{std::move(iter_ph)} - , iter_bucket_{std::move(iter_bucket)} - , filter_{filter} { - if (iter_ph == iter_ph_end) { - this->SetFinished(); - return; - } + , iter_ph_{std::forward(iter_ph)} + , iter_bucket_{std::forward(iter_bucket)} { FindNextElement(); } @@ -168,17 +143,18 @@ class IteratorNormal : public IteratorBase { private: void FindNextElement() { - while (iter_ph_ != iter_ph_end_) { + while (!iter_ph_.IsEnd()) { while (iter_bucket_ != iter_ph_->end()) { // We filter only entries here, nodes are filtered elsewhere - if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { + if (iter_ph_.__Filter().IsBucketEntryValid( + iter_ph_.GetEntry()->GetKey(), *iter_bucket_)) { this->SetCurrentValue(&(*iter_bucket_)); return; } ++iter_bucket_; } ++iter_ph_; - if (iter_ph_ != iter_ph_end_) { + if (!iter_ph_.IsEnd()) { iter_bucket_ = iter_ph_->begin(); } } @@ -186,24 +162,17 @@ class IteratorNormal : public IteratorBase { this->SetFinished(); } - PhTreeIterEndType& iter_ph_end_; ITERATOR_PH iter_ph_; BucketIterType iter_bucket_; - FILTER filter_; }; -template -class IteratorKnn : public IteratorNormal { - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; - +template +class IteratorKnn : public IteratorNormal { public: - IteratorKnn( - const PhTreeIterEndType& iter_ph_end, - const ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter) noexcept - : IteratorNormal(iter_ph_end, iter_ph, iter_bucket, filter) {} + template + IteratorKnn(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept + : IteratorNormal( + std::forward(iter_ph), std::forward(iter_bucket)) {} [[nodiscard]] double distance() const noexcept { return this->GetIteratorOfPhTree().distance(); @@ -219,24 +188,32 @@ template < dimension_t DIM, typename T, typename CONVERTER = ConverterNoOp, - typename BUCKET = std::unordered_set, + typename BUCKET = b_plus_tree_hash_set, bool POINT_KEYS = true, typename DEFAULT_QUERY_TYPE = QueryPoint> class PhTreeMultiMap { - friend PhTreeDebugHelper; using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; using PHTREE = PhTreeMultiMap; - - public: using ValueType = T; using BucketIterType = decltype(std::declval().begin()); - using EndType = decltype(std::declval>().end()); + using EndType = decltype(std::declval>().end()); + + friend PhTreeDebugHelper; + friend IteratorBase; + + public: + using QueryBox = typename CONVERTER::QueryBoxExternal; explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) - : tree_{converter}, converter_{converter}, size_{0} {} + : tree_{&converter_}, converter_{converter}, size_{0} {} + + PhTreeMultiMap(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap& operator=(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap(PhTreeMultiMap&& other) noexcept = default; + PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; + ~PhTreeMultiMap() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -254,7 +231,7 @@ class PhTreeMultiMap { */ template std::pair emplace(const Key& key, Args&&... args) { - auto& outer_iter = tree_.emplace(converter_.pre(key)).first; + auto& outer_iter = tree_.try_emplace(converter_.pre(key)).first; auto bucket_iter = outer_iter.emplace(std::forward(args)...); size_ += bucket_iter.second ? 1 : 0; return {const_cast(*bucket_iter.first), bucket_iter.second}; @@ -269,7 +246,7 @@ class PhTreeMultiMap { * to erase() and if no other modifications occurred. * The following is valid: * - * // Move value from key1 to key2 + * // Move value from key1 to key2 (if you don't want to use relocate() ). * auto iter = tree.find(key1); * auto value = iter.second(); // The value may become invalid in erase() * erase(iter); @@ -277,7 +254,7 @@ class PhTreeMultiMap { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - auto result_ph = tree_.emplace_hint(iterator.GetIteratorOfPhTree(), converter_.pre(key)); + auto result_ph = tree_.try_emplace(iterator.GetIteratorOfPhTree(), converter_.pre(key)); auto& bucket = result_ph.first; if (result_ph.second) { // new bucket @@ -306,6 +283,22 @@ class PhTreeMultiMap { return emplace(key, value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return emplace(key, std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return emplace_hint(iterator, key, std::forward(args)...); + } + /* * @return '1', if a value is associated with the provided key, otherwise '0'. */ @@ -337,16 +330,11 @@ class PhTreeMultiMap { * See std::unordered_multimap::find(). * * @param key the key to look up - * @return an iterator that points either to the the first value associated with the key or + * @return an iterator that points either to the first value associated with the key or * to {@code end()} if no value was found */ auto find(const Key& key) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().begin(); - return CreateIterator(outer_iter, bucket_iter); + return CreateIterator(tree_.find(converter_.pre(key))); } /* @@ -358,12 +346,7 @@ class PhTreeMultiMap { * or to {@code end()} if the key/value pair was found */ auto find(const Key& key, const T& value) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().find(value); - return CreateIterator(outer_iter, bucket_iter); + return CreateIteratorFind(tree_.find(converter_.pre(key)), value); } /* @@ -388,7 +371,7 @@ class PhTreeMultiMap { /* * See std::map::erase(). Removes any entry located at the provided iterator. * - * This function uses the iterator to directly erase the entry so it is usually faster than + * This function uses the iterator to directly erase the entry, so it is usually faster than * erase(key, value). * * @return '1' if a value was found, otherwise '0'. @@ -416,66 +399,192 @@ class PhTreeMultiMap { /* * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. * - * The relocate will report _success_ in the following cases: + * The relocate function will report _success_ in the following cases: * - the value was removed from the old position and reinserted at the new position - * - the position and new position refer to the same bucket. + * - the old position and new position are identical. * - * The relocate will report_failure_ in the following cases: + * The relocate function will report _failure_ in the following cases: * - The value was already present in the new position * - The value was not present in the old position * - * This method will _always_ attempt to insert the value at the new position even if the value - * was not found at the old position. - * This method will _not_ remove the value from the old position if it is already present at the - * new position. + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). * * @param old_key The old position * @param new_key The new position - * @param always_erase Setting this flag to 'true' ensures that the value is removed from - * the old position even if it is already present at the new position. This may double the - * execution cost of this method. The default is 'false'. + * @param value The value that needs to be relocated. The relocate() method used the value's + * '==' operator to identify the entry that should be moved. + * @param verify_exists This setting toggles whether a relocate() between two identical keys + * should verify whether the key actually exist before return '1'. + * If set to 'false', this function will return '1' if the keys are identical, + * without checking whether the keys actually exist. Avoiding this check can + * considerably speed up relocate() calls, especially when using a + * ConverterMultiply. + * * @return '1' if a value was found and reinserted, otherwise '0'. */ - size_t relocate( - const Key& old_key, const Key& new_key, const T& value, bool always_erase = false) { - // Be smart: insert first, if the target-map already contains the entry we can avoid erase() - auto new_key_pre = converter_.pre(new_key); - auto& new_bucket = tree_.emplace(new_key_pre).first; - auto new_result = new_bucket.emplace(value); - if (!new_result.second) { - // Entry is already in correct place -> abort - // Return '1' if old/new refer to the same bucket, otherwise '0' - if (converter_.pre(old_key) == new_key_pre) { + template + size_t relocate(const Key& old_key, const Key& new_key, T2&& value, bool verify_exists = true) { + auto fn = [&value](BUCKET& src, BUCKET& dst) -> size_t { + auto it = src.find(value); + if (it != src.end() && dst.emplace(std::move(*it)).second) { + src.erase(it); return 1; } - if (!always_erase) { - // Abort, unless we insist on erase() - return 0; + return 0; + }; + auto count_fn = [&value](BUCKET& src) -> size_t { return src.find(value) != src.end(); }; + return tree_._relocate_mm( + converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, count_fn); + } + + template + [[deprecated]] size_t relocate2( + const Key& old_key, const Key& new_key, T2&& value, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + return 0; + } + auto iter_old_value = iter_old->find(value); + if (iter_old_value == iter_old->end()) { + if (iter_new->empty()) { + tree_.erase(iter_new); } + return 0; } - auto old_outer_iter = tree_.find(converter_.pre(old_key)); - if (old_outer_iter == tree_.end()) { - // No entry for old_key -> fail - return 0; + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; } - auto old_bucket_iter = old_outer_iter->find(value); - if (old_bucket_iter == old_outer_iter->end()) { + assert(iter_old_value != iter_old->end()); + if (!iter_new->emplace(std::move(*iter_old_value)).second) { return 0; } - old_outer_iter->erase(old_bucket_iter); - // clean up - if (old_outer_iter->empty()) { - tree_.erase(old_outer_iter); + iter_old->erase(iter_old_value); + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); } return 1; } + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the old position and new position are identical. + * + * The relocate function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate that is used for every value at position old_key to evaluate + * whether it should be relocated to new_key. + * @param verify_exists This setting toggles whether a relocate() between two identical keys + * should verify whether the key actually exist before return '1'. + * If set to 'false', this function will return '1' if the keys are identical, + * without checking whether the keys actually exist. Avoiding this check can + * considerably speed up relocate() calls, especially when using a + * ConverterMultiply. + * + * @return the number of values that were relocated. + */ + template + size_t relocate_if( + const Key& old_key, const Key& new_key, PREDICATE&& pred_fn, bool verify_exists = true) { + auto fn = [&pred_fn](BUCKET& src, BUCKET& dst) -> size_t { + size_t result = 0; + auto iter_src = src.begin(); + while (iter_src != src.end()) { + if (pred_fn(*iter_src) && dst.emplace(std::move(*iter_src)).second) { + iter_src = src.erase(iter_src); + ++result; + } else { + ++iter_src; + } + } + return result; + }; + auto count_fn = [&pred_fn](BUCKET& src) -> size_t { + size_t result = 0; + auto iter_src = src.begin(); + while (iter_src != src.end()) { + if (pred_fn(*iter_src)) { + ++result; + } + ++iter_src; + } + return result; + }; + return tree_._relocate_mm( + converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, count_fn); + } + + template + [[deprecated]] size_t relocate_if2( + const Key& old_key, const Key& new_key, PREDICATE&& predicate, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + assert(iter_new.IsEnd() || !iter_new->empty()); // Otherwise remove iter_new + return 0; + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; + } + + size_t n = 0; + auto it = iter_old->begin(); + while (it != iter_old->end()) { + if (predicate(*it) && iter_new->emplace(std::move(*it)).second) { + it = iter_old->erase(it); + ++n; + } else { + ++it; + } + } + + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); + } else if (iter_new->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_new); + assert(found); + } + return n; + } + + /* + * Relocates all values from one coordinate to another. + * Returns an iterator pointing to the relocated data (or end(), if the relocation failed). + */ + auto relocate_all(const Key& old_key, const Key& new_key) { + return tree_.relocate(old_key, new_key); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @param callback The callback function to be called for every entry that matches the filter. @@ -485,10 +594,12 @@ class PhTreeMultiMap { * follow the signature of the default 'FilterNoOp`. * The default 'FilterNoOp` filter matches all entries. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each(inner_callback, WrapFilter(filter)); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each( + NoOpCallback{}, + WrapCallbackFilter{ + std::forward(callback), std::forward(filter), converter_}); } /* @@ -505,35 +616,30 @@ class PhTreeMultiMap { * The default 'FilterNoOp` filter matches all entries. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - const FILTER& filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each( - query_type(converter_.pre_query(query_box)), inner_callback, WrapFilter(filter)); + tree_.template for_each>( + query_type(converter_.pre_query(query_box)), + {}, + {std::forward(callback), std::forward(filter), converter_}); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - auto outer_iter = tree_.begin(WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + auto begin(FILTER&& filter = FILTER()) const { + return CreateIterator(tree_.begin(std::forward(filter))); } /* @@ -549,16 +655,10 @@ class PhTreeMultiMap { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), - QUERY_TYPE query_type = QUERY_TYPE()) const { - auto outer_iter = - tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + FILTER&& filter = FILTER(), + QUERY_TYPE&& query_type = QUERY_TYPE()) const { + return CreateIterator(tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter))); } /* @@ -583,25 +683,22 @@ class PhTreeMultiMap { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. - auto outer_iter = tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIteratorKnn(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIteratorKnn(outer_iter, bucket_iter, filter); + return CreateIteratorKnn(tree_.begin_knn_query( + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter))); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorNormal{}; } /* @@ -639,100 +736,149 @@ class PhTreeMultiMap { return tree_; } - template - auto CreateIterator( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorNormal( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + void CheckConsistencyExternal() const { + size_t n = 0; + for (const auto& bucket : tree_) { + assert(!bucket.empty()); + n += bucket.size(); + } + assert(n == size_); } - template - auto CreateIteratorKnn( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorKnn( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + template + auto CreateIteratorFind(OUTER_ITER&& outer_iter, const T& value) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().find(value); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); } - template - static auto WrapFilter(FILTER filter) { - // We always have two iterators, one that traverses the PH-Tree and one that traverses the - // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new - // filter checks only if nodes are valid. It cannot check whether buckets are valid. - // The original filter is then used when we iterate over the entries of a bucket. At this - // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). - struct FilterWrapper { - [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BUCKET&) const { - // This filter is checked in the Iterator. - return true; - } - [[nodiscard]] constexpr bool IsNodeValid( - const KeyInternal& prefix, int bits_to_ignore) const { - return filter_.IsNodeValid(prefix, bits_to_ignore); - } - FILTER filter_; - }; - return FilterWrapper{filter}; + template + auto CreateIterator(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); + } + + template + auto CreateIteratorKnn(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorKnn( + std::forward(outer_iter), std::move(bucket_iter)); } - template - struct CallbackWrapper { + /* + * This wrapper wraps the Filter and Callback such that the callback is called for every + * entry in any bucket that matches the user defined IsEntryValid(). + */ + template + class WrapCallbackFilter { + public: /* - * The CallbackWrapper ensures that we call the callback on each entry of the bucket. - * The vanilla PH-Tree call it only on the bucket itself. + * We always have two iterators, one that traverses the PH-Tree and returns 'buckets', the + * other iterator traverses the returned buckets. + * The wrapper ensures that the callback is called for every entry in a bucket.. */ - void operator()(const Key& key, const BUCKET& bucket) const { - auto internal_key = converter_.pre(key); - for (auto& entry : bucket) { - if (filter_.IsEntryValid(internal_key, entry)) { - callback_(key, entry); + template + WrapCallbackFilter(CB&& callback, F&& filter, const CONVERTER& converter) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} {} + + [[nodiscard]] inline bool IsEntryValid( + const KeyInternal& internal_key, const BUCKET& bucket) { + if (filter_.IsEntryValid(internal_key, bucket)) { + auto key = converter_.post(internal_key); + for (auto& entry : bucket) { + if (filter_.IsBucketEntryValid(internal_key, entry)) { + callback_(key, entry); + } } } + // Return false. We already called the callback. + return false; + } + + [[nodiscard]] inline bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + return filter_.IsNodeValid(prefix, bits_to_ignore); } - CALLBACK_FN& callback_; - const FILTER filter_; + + private: + CALLBACK callback_; + FILTER filter_; const CONVERTER& converter_; }; + struct NoOpCallback { + constexpr void operator()(const Key&, const BUCKET&) const noexcept {} + }; + v16::PhTreeV16 tree_; CONVERTER converter_; - IteratorNormal the_end_{tree_.end()}; - BucketIterType bucket_dummy_end_; size_t size_; }; /** * A PH-Tree multi-map that uses (axis aligned) points as keys. * The points are defined with 64bit 'double' floating point coordinates. - * * See 'PhTreeD' for details. */ template < dimension_t DIM, typename T, typename CONVERTER = ConverterIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapD = PhTreeMultiMap; +/** + * A PH-Tree multi-map that uses (axis aligned) points as keys. + * The points are defined with 32bit 'float' floating point coordinates. + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER = ConverterFloatIEEE, + typename BUCKET = b_plus_tree_hash_set> +using PhTreeMultiMapF = PhTreeMultiMap; + +/** + * A PH-Tree that uses (axis aligned) boxes as keys. + * See 'PhTreeD' for details. + */ template < dimension_t DIM, typename T, typename CONVERTER_BOX, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBox = PhTreeMultiMap; /** * A PH-Tree multi-map that uses (axis aligned) boxes as keys. * The boxes are defined with 64bit 'double' floating point coordinates. - * * See 'PhTreeD' for details. */ template < dimension_t DIM, typename T, typename CONVERTER_BOX = ConverterBoxIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBoxD = PhTreeMultiMapBox; +/** + * A PH-Tree multi-map that uses (axis aligned) boxes as keys. + * The boxes are defined with 32bit 'float' floating point coordinates. + * See 'PhTreeD' for details. + */ +template < + dimension_t DIM, + typename T, + typename CONVERTER_BOX = ConverterBoxFloatIEEE, + typename BUCKET = b_plus_tree_hash_set> +using PhTreeMultiMapBoxF = PhTreeMultiMapBox; + } // namespace improbable::phtree #endif // PHTREE_PHTREE_MULTIMAP_H diff --git a/phtree/v16/BUILD b/include/phtree/v16/BUILD similarity index 86% rename from phtree/v16/BUILD rename to include/phtree/v16/BUILD index b44b14a1..f8bfe515 100644 --- a/phtree/v16/BUILD +++ b/include/phtree/v16/BUILD @@ -13,7 +13,7 @@ cc_library( "iterator_full.h", "iterator_hc.h", "iterator_knn_hs.h", - "iterator_simple.h", + "iterator_with_parent.h", "node.h", "phtree_v16.h", ], @@ -21,6 +21,6 @@ cc_library( "//visibility:public", ], deps = [ - "//phtree/common", + "//include/phtree/common", ], ) diff --git a/phtree/v16/debug_helper_v16.h b/include/phtree/v16/debug_helper_v16.h similarity index 61% rename from phtree/v16/debug_helper_v16.h rename to include/phtree/v16/debug_helper_v16.h index 85ef92d9..017c8a54 100644 --- a/phtree/v16/debug_helper_v16.h +++ b/include/phtree/v16/debug_helper_v16.h @@ -17,9 +17,9 @@ #ifndef PHTREE_V16_DEBUG_HELPER_H #define PHTREE_V16_DEBUG_HELPER_H -#include "../common/common.h" -#include "../common/debug_helper.h" #include "node.h" +#include "phtree/common/common.h" +#include "phtree/common/debug_helper.h" #include "phtree_v16.h" #include @@ -30,11 +30,10 @@ class PhTreeV16; template class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { - using KeyT = PhPoint; - using NodeT = Node; + using EntryT = Entry; public: - DebugHelperV16(const NodeT& root, size_t size) : root_{root}, size_{size} {} + DebugHelperV16(const EntryT& root, size_t size) : root_{root}, size_{size} {} /* * Depending on the detail parameter this returns: @@ -57,7 +56,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { ToStringPlain(os, root_); break; case Enum::tree: - ToStringTree(os, 0, root_, KeyT{}, true); + ToStringTree(os, 0, root_, MAX_BIT_WIDTH, true); break; } return os.str(); @@ -70,7 +69,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { */ [[nodiscard]] PhTreeStats GetStats() const override { PhTreeStats stats; - root_.GetStats(stats); + root_.GetNode().GetStats(stats, root_); return stats; } @@ -78,19 +77,19 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { * Checks the consistency of the tree. This function requires assertions to be enabled. */ void CheckConsistency() const override { - assert(size_ == root_.CheckConsistency()); + assert(size_ == root_.GetNode().CheckConsistency(root_)); } private: - void ToStringPlain(std::ostringstream& os, const NodeT& node) const { - for (auto& it : node.Entries()) { - const auto& o = it.second; + void ToStringPlain(std::ostringstream& os, const EntryT& entry) const { + for (auto& it : entry.GetNode().Entries()) { + const auto& child = it.second; // inner node? - if (o.IsNode()) { - ToStringPlain(os, o.GetNode()); + if (child.IsNode()) { + ToStringPlain(os, child); } else { - os << o.GetKey(); - os << " v=" << (o.IsValue() ? "T" : "null") << std::endl; + os << child.GetKey(); + os << " v=" << (child.IsValue() ? "T" : "null") << std::endl; } } } @@ -98,50 +97,53 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { void ToStringTree( std::ostringstream& sb, bit_width_t current_depth, - const NodeT& node, - const KeyT& prefix, - bool printValue) const { + const EntryT& entry, + const bit_width_t parent_postfix_len, + bool print_value) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { ind += "-"; } - sb << ind << "il=" << node.GetInfixLen() << " pl=" << node.GetPostfixLen() - << " ec=" << node.GetEntryCount() << " inf=["; + const auto& node = entry.GetNode(); + const auto infix_len = entry.GetNodeInfixLen(parent_postfix_len); + const auto postfix_len = entry.GetNodePostfixLen(); + sb << ind << "il=" << infix_len << " pl=" << postfix_len << " ec=" << node.GetEntryCount() + << " inf=["; // for a leaf node, the existence of a sub just indicates that the value exists. - if (node.GetInfixLen() > 0) { - bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + if (infix_len > 0) { + bit_mask_t mask = MAX_MASK << infix_len; mask = ~mask; - mask <<= node.GetPostfixLen() + 1; + mask <<= (std::uint64_t)postfix_len + 1; for (dimension_t i = 0; i < DIM; ++i) { - sb << ToBinary(prefix[i] & mask) << ","; + sb << ToBinary(entry.GetKey()[i] & mask) << ","; } } - current_depth += node.GetInfixLen(); + current_depth += infix_len; sb << "] " - << "Node___il=" << node.GetInfixLen() << ";pl=" << node.GetPostfixLen() + << "Node___il=" << infix_len << ";pl=" << postfix_len << ";size=" << node.Entries().size() << std::endl; // To clean previous postfixes. for (auto& it : node.Entries()) { - const auto& o = it.second; - hc_pos_t hcPos = it.first; - if (o.IsNode()) { - sb << ind << "# " << hcPos << " Node: " << std::endl; - ToStringTree(sb, current_depth + 1, o.GetNode(), o.GetKey(), printValue); + const auto& child = it.second; + auto hc_pos = it.first; + if (child.IsNode()) { + sb << ind << "# " << hc_pos << " Node: " << std::endl; + ToStringTree(sb, current_depth + 1, child, postfix_len, print_value); } else { // post-fix - sb << ind << ToBinary(o.GetKey()); - sb << " hcPos=" << hcPos; - if (printValue) { - sb << " v=" << (o.IsValue() ? "T" : "null"); + sb << ind << ToBinary(child.GetKey()); + sb << " hcPos=" << hc_pos; + if (print_value) { + sb << " v=" << (child.IsValue() ? "T" : "null"); } sb << std::endl; } } } - const NodeT& root_; + const EntryT& root_; const size_t size_; }; } // namespace improbable::phtree::v16 diff --git a/include/phtree/v16/entry.h b/include/phtree/v16/entry.h new file mode 100644 index 00000000..8ab8c488 --- /dev/null +++ b/include/phtree/v16/entry.h @@ -0,0 +1,272 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ENTRY_H +#define PHTREE_V16_ENTRY_H + +#include "node.h" +#include "phtree/common/common.h" +#include +#include + +namespace improbable::phtree::v16 { + +template +class Node; + +/* + * Nodes in the PH-Tree contain up to 2^DIM Entries, one in each geometric quadrant. + * Entries can contain two types of data: + * - A key/value pair (value of type T) + * - A prefix/child-node pair, where prefix is the prefix of the child node and the + * child node is contained in a unique_ptr. + */ +template +class Entry { + using KeyT = PhPoint; + using ValueT = std::remove_const_t; + using NodeT = Node; + + enum { + VALUE = 0, + NODE = 1, + EMPTY = 2, + }; + + public: + /* + * Construct entry with existing node. + */ + Entry(const KeyT& k, NodeT&& node, bit_width_t postfix_len) noexcept + : kd_key_{k} + , node_{std::move(node)} + , union_type_{NODE} + , postfix_len_{static_cast(postfix_len)} {} + + /* + * Construct entry with existing T (T is not movable). + */ + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with existing T (T must be movable). + */ + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(std::forward(value)), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or copied T (T is not movable). + */ + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, const ValueT& value) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or copied T (T is not movable, using T's default constructor). + */ + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k) noexcept + : kd_key_{k}, value_(), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or moved T (T must be movable). + */ + template < + typename... Args, + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, Args&&... args) noexcept + : kd_key_{k}, value_(std::forward(args)...), union_type_{VALUE}, postfix_len_{0} {} + + Entry(const Entry& other) = delete; + Entry& operator=(const Entry& other) = delete; + + Entry(Entry&& other) noexcept + : kd_key_{std::move(other.kd_key_)}, union_type_{std::move(other.union_type_)} { + postfix_len_ = std::move(other.postfix_len_); + AssignUnion(std::move(other)); + } + + Entry& operator=(Entry&& other) noexcept { + kd_key_ = std::move(other.kd_key_); + postfix_len_ = std::move(other.postfix_len_); + DestroyUnion(); + AssignUnion(std::move(other)); + return *this; + } + + ~Entry() noexcept { + DestroyUnion(); + } + + void SetNodeCenter() { + // The node center is defined as the prefix + a '1' bit after the prefix. The remaining + // bits, i.e. all post_len bits must be '0'. + // This is required for window queries which would otherwise need to calculate the + // center each time they traverse a node. + assert(union_type_ == NODE); + bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len_; + bit_mask_t maskVT = MAX_MASK << postfix_len_; + // to prevent problems with signed long when using 64 bit + if (postfix_len_ < MAX_BIT_WIDTH - 1) { + for (dimension_t i = 0; i < DIM; ++i) { + kd_key_[i] = (kd_key_[i] | maskHcBit) & maskVT; + } + } else { + for (dimension_t i = 0; i < DIM; ++i) { + kd_key_[i] = 0; + } + } + } + + [[nodiscard]] const KeyT& GetKey() const { + return kd_key_; + } + + [[nodiscard]] bool IsValue() const { + return union_type_ == VALUE; + } + + [[nodiscard]] bool IsNode() const { + return union_type_ == NODE; + } + + [[nodiscard]] T& GetValue() const { + assert(union_type_ == VALUE); + return const_cast(value_); + } + + [[nodiscard]] const NodeT& GetNode() const { + assert(union_type_ == NODE); + return node_; + } + + [[nodiscard]] NodeT& GetNode() { + assert(union_type_ == NODE); + return node_; + } + + void SetKey(const KeyT& key) noexcept { + assert(union_type_ == VALUE); // Do we have any other use? + kd_key_ = key; + } + + void SetNode(NodeT&& node, bit_width_t postfix_len) noexcept { + postfix_len_ = static_cast(postfix_len); + DestroyUnion(); + union_type_ = NODE; + new (&node_) NodeT{std::move(node)}; + SetNodeCenter(); + } + + [[nodiscard]] bit_width_t GetNodePostfixLen() const noexcept { + assert(IsNode()); + return postfix_len_; + } + + [[nodiscard]] bit_width_t GetNodeInfixLen(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1; + } + + [[nodiscard]] bool HasNodeInfix(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1 > 0; + } + + [[nodiscard]] ValueT&& ExtractValue() noexcept { + assert(IsValue()); + return std::move(value_); + } + + [[nodiscard]] NodeT&& ExtractNode() noexcept { + assert(IsNode()); + // Moving the node somewhere else means we should remove it here: + union_type_ = EMPTY; + return std::move(node_); + } + + void ReplaceNodeWithDataFromEntry(Entry&& other) { + assert(IsNode()); + // 'other' may be referenced from the local node, so we need to do move(other) + // before destructing the local node. + auto node = std::move(node_); + union_type_ = EMPTY; + *this = std::move(other); + if (IsNode()) { + SetNodeCenter(); + } + // The 'node' is destructed automatically at the end of this function. + } + + private: + void AssignUnion(Entry&& other) noexcept { + union_type_ = std::move(other.union_type_); + if (union_type_ == NODE) { + new (&node_) NodeT{std::move(other.node_)}; + } else if (union_type_ == VALUE) { + if constexpr (std::is_move_constructible_v) { + new (&value_) ValueT{std::move(other.value_)}; + } else { + new (&value_) ValueT{other.value_}; + } + } else { + assert(false && "Assigning from an EMPTY variant is a waste of time."); + } + } + + void DestroyUnion() noexcept { + if (union_type_ == VALUE) { + value_.~ValueT(); + } else if (union_type_ == NODE) { + node_.~NodeT(); + } else { + assert(union_type_ == EMPTY); + } + union_type_ = EMPTY; + } + + KeyT kd_key_; + union { + NodeT node_; + ValueT value_; + }; + std::uint16_t union_type_; + // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the + // current node). If a variable prefix_len would refer to the number of bits in this node's + // prefix, and if we assume 64 bit values, the following would always hold: + // prefix_len + 1 + postfix_len = 64. + // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, + // i.e. the same bit that is used to create the lookup keys in entries_. + std::uint16_t postfix_len_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/for_each.h b/include/phtree/v16/for_each.h similarity index 58% rename from phtree/v16/for_each.h rename to include/phtree/v16/for_each.h index aee3d157..e61e24fd 100644 --- a/phtree/v16/for_each.h +++ b/include/phtree/v16/for_each.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_FOR_EACH_H #define PHTREE_V16_FOR_EACH_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "phtree/common/common.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -26,47 +26,43 @@ namespace improbable::phtree::v16 { * Iterates over the whole tree. Entries and child nodes that are rejected by the Filter are not * traversed or returned. */ -template +template class ForEach { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: - ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) - : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} + template + ForEach(const CONVERT* converter, CB&& callback, F&& filter) + : converter_{converter} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { - auto iter = node.Entries().begin(); - auto end = node.Entries().end(); + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); + auto& entries = entry.GetNode().Entries(); + auto iter = entries.begin(); + auto end = entries.end(); for (; iter != end; ++iter) { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (filter_.IsNodeValid(key, node.GetPostfixLen() + 1)) { - TraverseNode(child_key, child_node); + if (filter_.IsNodeValid(child_key, child.GetNodePostfixLen() + 1)) { + Traverse(child); } } else { T& value = child.GetValue(); - if (filter_.IsEntryValid(key, value)) { - callback_(converter_.post(child_key), value); + if (filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/for_each_hc.h b/include/phtree/v16/for_each_hc.h similarity index 61% rename from phtree/v16/for_each_hc.h rename to include/phtree/v16/for_each_hc.h index d870debc..ef2a7c6d 100644 --- a/phtree/v16/for_each_hc.h +++ b/include/phtree/v16/for_each_hc.h @@ -1,5 +1,6 @@ /* * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +18,8 @@ #ifndef PHTREE_V16_FOR_EACH_HC_H #define PHTREE_V16_FOR_EACH_HC_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" +#include "phtree/common/common.h" namespace improbable::phtree::v16 { @@ -33,40 +34,37 @@ namespace improbable::phtree::v16 { * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, * 2017. */ -template +template class ForEachHC { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; + using hc_pos_t = hc_pos_dim_t; public: + template ForEachHC( const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - CALLBACK_FN& callback, - FILTER filter) + const CONVERT* converter, + CB&& callback, + F&& filter) : range_min_{range_min} , range_max_{range_max} , converter_{converter} - , callback_{callback} - , filter_(std::move(filter)) {} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { + void Traverse(const EntryT& entry, const EntryIteratorC* opt_it = nullptr) { + assert(entry.IsNode()); hc_pos_t mask_lower = 0; hc_pos_t mask_upper = 0; - CalcLimits(node.GetPostfixLen(), key, mask_lower, mask_upper); - auto iter = node.Entries().lower_bound(mask_lower); - auto end = node.Entries().end(); + CalcLimits(entry.GetNodePostfixLen(), entry.GetKey(), mask_lower, mask_upper); + auto& entries = entry.GetNode().Entries(); + auto postfix_len = entry.GetNodePostfixLen(); + auto end = entries.end(); + auto iter = opt_it != nullptr && *opt_it != end ? *opt_it : entries.lower_bound(mask_lower); for (; iter != end && iter->first <= mask_upper; ++iter) { auto child_hc_pos = iter->first; // Use bit-mask magic to check whether we are in a valid quadrant. @@ -75,45 +73,38 @@ class ForEachHC { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (CheckNode(child_key, child_node)) { - TraverseNode(child_key, child_node); + if (CheckNode(child, postfix_len)) { + Traverse(child); } } else { T& value = child.GetValue(); if (IsInRange(child_key, range_min_, range_max_) && - ApplyFilter(child_key, value)) { - callback_(converter_.post(child_key), value); + filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } } - bool CheckNode(const KeyInternal& key, const NodeT& node) const { + private: + bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) { + const KeyInternal& key = entry.GetKey(); // Check if the node overlaps with the query box. // An infix with len=0 implies that at least part of the child node overlaps with the query, // otherwise the bit mask checking would have returned 'false'. - if (node.GetInfixLen() > 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + bool mismatch = false; + if (entry.HasNodeInfix(parent_postfix_len)) { // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(entry.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR prefix = key[dim] & comparison_mask; - if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { - return false; - } + mismatch |= (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)); } } - return ApplyFilter(key, node); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const NodeT& node) const { - return filter_.IsNodeValid(key, node.GetPostfixLen() + 1); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const T& value) const { - return filter_.IsEntryValid(key, value); + return mismatch ? false : filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); } void CalcLimits( @@ -135,23 +126,17 @@ class ForEachHC { // query higher || NO YES // assert(postfix_len < MAX_BIT_WIDTH); - bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; - bit_mask_t maskVT = MAX_MASK << postfix_len; - constexpr hc_pos_t ONE = 1; // to prevent problems with signed long when using 64 bit if (postfix_len < MAX_BIT_WIDTH - 1) { for (dimension_t i = 0; i < DIM; ++i) { lower_limit <<= 1; + //==> set to 1 if lower value should not be queried + lower_limit |= range_min_[i] >= prefix[i]; + } + for (dimension_t i = 0; i < DIM; ++i) { upper_limit <<= 1; - SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; - if (range_min_[i] >= nodeBisection) { - //==> set to 1 if lower value should not be queried - lower_limit |= ONE; - } - if (range_max_[i] >= nodeBisection) { - // Leave 0 if higher value should not be queried. - upper_limit |= ONE; - } + // Leave 0 if higher value should not be queried. + upper_limit |= range_max_[i] >= prefix[i]; } } else { // special treatment for signed longs @@ -160,28 +145,26 @@ class ForEachHC { // The hypercube assumes that a leading '0' indicates a lower value. // Solution: We leave HC as it is. for (dimension_t i = 0; i < DIM; ++i) { - lower_limit <<= 1; upper_limit <<= 1; - if (range_min_[i] < 0) { - // If minimum is positive, we don't need the search negative values - //==> set upper_limit to 0, prevent searching values starting with '1'. - upper_limit |= ONE; - } - if (range_max_[i] < 0) { - // Leave 0 if higher value should not be queried - // If maximum is negative, we do not need to search positive values - //(starting with '0'). - //--> lower_limit = '1' - lower_limit |= ONE; - } + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= range_min_[i] < 0; + } + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= range_max_[i] < 0; } } } const KeyInternal range_min_; const KeyInternal range_max_; - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/include/phtree/v16/iterator_base.h b/include/phtree/v16/iterator_base.h new file mode 100644 index 00000000..d5152dfe --- /dev/null +++ b/include/phtree/v16/iterator_base.h @@ -0,0 +1,125 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ITERATOR_BASE_H +#define PHTREE_V16_ITERATOR_BASE_H + +#include "phtree/common/common.h" +#include "phtree/filter.h" +#include "entry.h" + +namespace improbable::phtree::v16 { + +/* + * Base class for all PH-Tree iterators. + */ +template +class IteratorBase { + public: + explicit IteratorBase() noexcept : current_entry_{nullptr} {} + explicit IteratorBase(const EntryT* current_entry) noexcept : current_entry_{current_entry} {} + + inline auto& operator*() const noexcept { + assert(current_entry_); + return current_entry_->GetValue(); + } + + inline auto* operator->() const noexcept { + assert(current_entry_); + return ¤t_entry_->GetValue(); + } + + inline friend bool operator==( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ == right.current_entry_; + } + + inline friend bool operator!=( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ != right.current_entry_; + } + + auto& second() const { + return current_entry_->GetValue(); + } + + [[nodiscard]] inline bool IsEnd() const noexcept { + return current_entry_ == nullptr; + } + + inline EntryT* GetEntry() const noexcept { + return const_cast(current_entry_); + } + + protected: + void SetFinished() { + current_entry_ = nullptr; + } + + void SetCurrentResult(const EntryT* current_entry) { + current_entry_ = current_entry; + } + + protected: + const EntryT* current_entry_; +}; + +template +using IteratorEnd = IteratorBase; + +template +class IteratorWithFilter +: public IteratorBase> { + protected: + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = Entry; + + public: + template + explicit IteratorWithFilter(const CONVERT* converter, F&& filter) noexcept + : IteratorBase(nullptr), converter_{converter}, filter_{std::forward(filter)} {} + + explicit IteratorWithFilter(const EntryT* current_entry, const CONVERT* converter) noexcept + : IteratorBase(current_entry), converter_{converter}, filter_{FILTER()} {} + + auto first() const { + return converter_->post(this->current_entry_->GetKey()); + } + + auto& __Filter() { + return filter_; + } + + protected: + [[nodiscard]] bool ApplyFilter(const EntryT& entry) { + return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + } + + auto post(const KeyInternal& point) { + return converter_->post(point); + } + + private: + const CONVERT* converter_; + FILTER filter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ITERATOR_BASE_H diff --git a/phtree/v16/iterator_full.h b/include/phtree/v16/iterator_full.h similarity index 81% rename from phtree/v16/iterator_full.h rename to include/phtree/v16/iterator_full.h index b60be035..fbd9bb60 100644 --- a/phtree/v16/iterator_full.h +++ b/include/phtree/v16/iterator_full.h @@ -17,7 +17,7 @@ #ifndef PHTREE_V16_ITERATOR_FULL_H #define PHTREE_V16_ITERATOR_FULL_H -#include "../common/common.h" +#include "phtree/common/common.h" #include "iterator_base.h" namespace improbable::phtree::v16 { @@ -26,32 +26,35 @@ template class Node; template -class IteratorFull : public IteratorBase { +class IteratorFull : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; using NodeT = Node; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: - IteratorFull(const EntryT& root, const CONVERT& converter, FILTER filter) - : IteratorBase(converter, filter), stack_{}, stack_size_{0} { + template + IteratorFull(const EntryT& root, const CONVERT* converter, F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , stack_{} + , stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } - IteratorFull& operator++() { + IteratorFull& operator++() noexcept { FindNextElement(); return *this; } - IteratorFull operator++(int) { + IteratorFull operator++(int) noexcept { IteratorFull iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); while (*p != PeekEnd()) { @@ -82,22 +85,22 @@ class IteratorFull : public IteratorBase { return stack_[stack_size_ - 1].first; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].first; } - auto& PeekEnd() { + auto& PeekEnd() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].second; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_].first; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } diff --git a/phtree/v16/iterator_hc.h b/include/phtree/v16/iterator_hc.h similarity index 71% rename from phtree/v16/iterator_hc.h rename to include/phtree/v16/iterator_hc.h index 2485550c..bcc072c3 100644 --- a/phtree/v16/iterator_hc.h +++ b/include/phtree/v16/iterator_hc.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_ITERATOR_HC_H #define PHTREE_V16_ITERATOR_HC_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" +#include "phtree/common/common.h" namespace improbable::phtree::v16 { @@ -42,44 +42,45 @@ class NodeIterator; * 2017. */ template -class IteratorHC : public IteratorBase { +class IteratorHC : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: + template IteratorHC( const EntryT& root, const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { + stack_.reserve(8); PrepareAndPush(root); FindNextElement(); } - IteratorHC& operator++() { + IteratorHC& operator++() noexcept { FindNextElement(); return *this; } - IteratorHC operator++(int) { + IteratorHC operator++(int) noexcept { IteratorHC iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { - assert(!this->Finished()); + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); - const EntryT* current_result = nullptr; + const EntryT* current_result; while ((current_result = p->Increment(range_min_, range_max_))) { if (this->ApplyFilter(*current_result)) { if (current_result->IsNode()) { @@ -97,28 +98,31 @@ class IteratorHC : public IteratorBase { this->SetFinished(); } - auto& PrepareAndPush(const EntryT& entry) { - assert(stack_size_ < stack_.size() - 1); + auto& PrepareAndPush(const EntryT& entry) noexcept { + if (stack_.size() < stack_size_ + 1) { + stack_.emplace_back(); + } + assert(stack_size_ < stack_.size()); auto& ni = stack_[stack_size_++]; - ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); + ni.Init(range_min_, range_max_, entry); return ni; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1]; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_]; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } - std::array, MAX_BIT_WIDTH> stack_; + std::vector> stack_; size_t stack_size_; const KeyInternal range_min_; const KeyInternal range_max_; @@ -129,15 +133,18 @@ template class NodeIterator { using KeyT = PhPoint; using EntryT = Entry; - using NodeT = Node; + using EntriesT = const EntryMap; + using hc_pos_t = hc_pos_dim_t; public: - NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} + NodeIterator() : iter_{}, entries_{nullptr}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} - void init(const KeyT& range_min, const KeyT& range_max, const NodeT& node, const KeyT& prefix) { - node_ = &node; - CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); + void Init(const KeyT& range_min, const KeyT& range_max, const EntryT& entry) { + auto& node = entry.GetNode(); + CalcLimits(entry.GetNodePostfixLen(), range_min, range_max, entry.GetKey()); iter_ = node.Entries().lower_bound(mask_lower_); + entries_ = &node.Entries(); + postfix_len_ = entry.GetNodePostfixLen(); } /* @@ -145,7 +152,7 @@ class NodeIterator { * @return TRUE iff a matching element was found. */ const EntryT* Increment(const KeyT& range_min, const KeyT& range_max) { - while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { + while (iter_ != entries_->end() && iter_->first <= mask_upper_) { if (IsPosValid(iter_->first)) { const auto* be = &iter_->second; if (CheckEntry(*be, range_min, range_max)) { @@ -163,16 +170,16 @@ class NodeIterator { return IsInRange(candidate.GetKey(), range_min, range_max); } - auto& node = candidate.GetNode(); // Check if node-prefix allows sub-node to contain any useful values. // An infix with len=0 implies that at least part of the child node overlaps with the query. - if (node.GetInfixLen() == 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (!candidate.HasNodeInfix(postfix_len_)) { return true; } // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(candidate.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (candidate.GetNodePostfixLen() + 1); auto& key = candidate.GetKey(); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR in = key[dim] & comparison_mask; @@ -184,7 +191,7 @@ class NodeIterator { } private: - [[nodiscard]] bool IsPosValid(hc_pos_t key) const { + [[nodiscard]] inline bool IsPosValid(hc_pos_t key) const noexcept { return ((key | mask_lower_) & mask_upper_) == key; } @@ -204,25 +211,19 @@ class NodeIterator { // query higher || NO YES // assert(postfix_len < MAX_BIT_WIDTH); - bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; - bit_mask_t maskVT = MAX_MASK << postfix_len; hc_pos_t lower_limit = 0; hc_pos_t upper_limit = 0; - constexpr hc_pos_t ONE = 1; // to prevent problems with signed long when using 64 bit if (postfix_len < MAX_BIT_WIDTH - 1) { for (dimension_t i = 0; i < DIM; ++i) { lower_limit <<= 1; + //==> set to 1 if lower value should not be queried + lower_limit |= range_min[i] >= prefix[i]; + } + for (dimension_t i = 0; i < DIM; ++i) { upper_limit <<= 1; - SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; - if (range_min[i] >= nodeBisection) { - //==> set to 1 if lower value should not be queried - lower_limit |= ONE; - } - if (range_max[i] >= nodeBisection) { - // Leave 0 if higher value should not be queried. - upper_limit |= ONE; - } + // Leave 0 if higher value should not be queried. + upper_limit |= range_max[i] >= prefix[i]; } } else { // special treatment for signed longs @@ -230,22 +231,19 @@ class NodeIterator { // LOWER value, opposed to indicating a HIGHER value as in the remaining 63 bits. // The hypercube assumes that a leading '0' indicates a lower value. // Solution: We leave HC as it is. - for (dimension_t i = 0; i < DIM; ++i) { - lower_limit <<= 1; upper_limit <<= 1; - if (range_min[i] < 0) { - // If minimum is positive, we don't need the search negative values - //==> set upper_limit to 0, prevent searching values starting with '1'. - upper_limit |= ONE; - } - if (range_max[i] < 0) { - // Leave 0 if higher value should not be queried - // If maximum is negative, we do not need to search positive values - //(starting with '0'). - //--> lower_limit = '1' - lower_limit |= ONE; - } + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= range_min[i] < 0; + } + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= range_max[i] < 0; } } mask_lower_ = lower_limit; @@ -254,9 +252,10 @@ class NodeIterator { private: EntryIteratorC iter_; - const NodeT* node_; + EntriesT* entries_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; + bit_width_t postfix_len_; }; } // namespace } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_knn_hs.h b/include/phtree/v16/iterator_knn_hs.h similarity index 83% rename from phtree/v16/iterator_knn_hs.h rename to include/phtree/v16/iterator_knn_hs.h index 3c30f7d6..7d1b7195 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/include/phtree/v16/iterator_knn_hs.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_QUERY_KNN_HS_H #define PHTREE_V16_QUERY_KNN_HS_H -#include "../common/common.h" #include "iterator_base.h" +#include "phtree/common/common.h" #include namespace improbable::phtree::v16 { @@ -44,29 +44,30 @@ struct CompareEntryDistByDistance { } // namespace template -class IteratorKnnHS : public IteratorBase { +class IteratorKnnHS : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; using EntryDistT = EntryDist; public: + template explicit IteratorKnnHS( const EntryT& root, size_t min_results, const KeyInternal& center, - const CONVERT& converter, - DISTANCE dist, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , center_{center} - , center_post_{converter.post(center)} + , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) - , distance_(std::move(dist)) { + , distance_(std::forward(dist)) { if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { this->SetFinished(); return; @@ -81,12 +82,12 @@ class IteratorKnnHS : public IteratorBase { return current_distance_; } - IteratorKnnHS& operator++() { + IteratorKnnHS& operator++() noexcept { FindNextElement(); return *this; } - IteratorKnnHS operator++(int) { + IteratorKnnHS operator++(int) noexcept { IteratorKnnHS iterator(*this); ++(*this); return iterator; @@ -96,7 +97,7 @@ class IteratorKnnHS : public IteratorBase { void FindNextElement() { while (num_found_results_ < num_requested_results_ && !queue_.empty()) { auto& candidate = queue_.top(); - auto o = candidate.second; + auto* o = candidate.second; if (!o->IsNode()) { // data entry ++num_found_results_; @@ -114,8 +115,7 @@ class IteratorKnnHS : public IteratorBase { auto& e2 = entry.second; if (this->ApplyFilter(e2)) { if (e2.IsNode()) { - auto& sub = e2.GetNode(); - double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); queue_.emplace(d, &e2); } else { double d = distance_(center_post_, this->post(e2.GetKey())); @@ -129,16 +129,14 @@ class IteratorKnnHS : public IteratorBase { current_distance_ = std::numeric_limits::max(); } - double DistanceToNode(const KeyInternal& prefix, int bits_to_ignore) { + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { assert(bits_to_ignore < MAX_BIT_WIDTH); SCALAR mask_min = MAX_MASK << bits_to_ignore; SCALAR mask_max = ~mask_min; KeyInternal buf; - // The following calculates the point inside of the node that is closest to center_. - // If center is inside the node this returns center_, otherwise it finds a point on the - // node's surface. + // The following calculates the point inside the node that is closest to center_. for (dimension_t i = 0; i < DIM; ++i) { - // if center_[i] is outside the node, return distance to closest edge, + // if center_[i] is outside the node, return distance to the closest edge, // otherwise return center_[i] itself (assume possible distance=0) SCALAR min = prefix[i] & mask_min; SCALAR max = prefix[i] | mask_max; @@ -154,8 +152,8 @@ class IteratorKnnHS : public IteratorBase { double current_distance_; std::priority_queue, CompareEntryDistByDistance> queue_; - int num_found_results_; - int num_requested_results_; + size_t num_found_results_; + size_t num_requested_results_; DISTANCE distance_; }; diff --git a/phtree/v16/iterator_simple.h b/include/phtree/v16/iterator_with_parent.h similarity index 56% rename from phtree/v16/iterator_simple.h rename to include/phtree/v16/iterator_with_parent.h index 815979a7..47216615 100644 --- a/phtree/v16/iterator_simple.h +++ b/include/phtree/v16/iterator_with_parent.h @@ -17,51 +17,55 @@ #ifndef PHTREE_V16_ITERATOR_SIMPLE_H #define PHTREE_V16_ITERATOR_SIMPLE_H -#include "../common/common.h" +#include "phtree/common/common.h" #include "iterator_base.h" namespace improbable::phtree::v16 { template -class IteratorSimple : public IteratorBase { +class IteratorWithParent : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; + friend PhTreeV16; public: - explicit IteratorSimple(const CONVERT& converter) : IteratorBase(converter) { - this->SetFinished(); - } - - explicit IteratorSimple( + explicit IteratorWithParent( const EntryT* current_result, const EntryT* current_node, const EntryT* parent_node, - CONVERT converter) - : IteratorBase(converter) { - if (current_result) { - this->SetCurrentResult(current_result); - this->SetCurrentNodeEntry(current_node); - this->SetParentNodeEntry(parent_node); - } else { - this->SetFinished(); - } - } + const CONVERT* converter) noexcept + : IteratorWithFilter(current_result, converter) + , current_node_{current_node} + , parent_node_{parent_node} {} - IteratorSimple& operator++() { + IteratorWithParent& operator++() { this->SetFinished(); return *this; } - IteratorSimple operator++(int) { - IteratorSimple iterator(*this); + IteratorWithParent operator++(int) { + IteratorWithParent iterator(*this); ++(*this); return iterator; } -}; -template -using IteratorEnd = IteratorSimple; + private: + /* + * The parent entry contains the parent node. The parent node is the node ABOVE the current node + * which contains the current entry. + */ + EntryT* GetNodeEntry() const { + return const_cast(current_node_); + } + + EntryT* GetParentNodeEntry() const { + return const_cast(parent_node_); + } + + const EntryT* current_node_; + const EntryT* parent_node_; +}; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/node.h b/include/phtree/v16/node.h similarity index 57% rename from phtree/v16/node.h rename to include/phtree/v16/node.h index 6994bca0..718ec1d8 100644 --- a/phtree/v16/node.h +++ b/include/phtree/v16/node.h @@ -1,5 +1,6 @@ /* * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,76 +18,42 @@ #ifndef PHTREE_V16_NODE_H #define PHTREE_V16_NODE_H -#include "../common/common.h" -#include "../common/tree_stats.h" #include "entry.h" +#include "phtree/common/common.h" #include "phtree_v16.h" #include namespace improbable::phtree::v16 { /* - * We provide different implementations of the node's internal entry set: + * We provide different implementations of the node's internal entry set. + * All implementations are equivalent to "std::map" which can be used as + * a plugin example for verification. + * * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful * because many nodes may have only 2 entries. * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler - * does not support CTZ (count trailing bits). + * does not support CTZ (count trailing zeroes). * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. - * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and - * dimensionality. Remember that n_max = 2^DIM. + * - 'b_plus_tree_map` is the least efficient for small node sizes but scales best with larger + * nodes and dimensionality. Remember that n_max = 2^DIM. */ template -using EntryMap = typename std::conditional< +// using EntryMap = std::map, Entry>; +using EntryMap = typename std::conditional_t< DIM <= 3, - array_map, - typename std::conditional, std::map>::type>::type; + array_map, Entry, (size_t(1) << DIM)>, + typename std::conditional_t< + DIM <= 8, + sparse_map, Entry>, + b_plus_tree_map, Entry, (uint64_t(1) << DIM)>>>; template -using EntryIterator = decltype(EntryMap().begin()); +using EntryIterator = typename std::remove_const_t().begin())>; template using EntryIteratorC = decltype(EntryMap().cbegin()); -namespace { - -/* - * Takes a construct of parent_node -> child_node, ie the child_node is owned by parent_node. - * This function also assumes that the child_node contains only one entry. - * - * This function takes the remaining entry from the child node and inserts it into the parent_node - * where it replaces (and implicitly deletes) the child_node. - * @param prefix_of_child_in_parent This specifies the position of child_node inside the - * parent_node. We only need the relevant bits at the level of the parent_node. This means we can - * use any key of any node or entry that is, or used to be) inside the child_node, because they all - * share the same prefix. This includes the key of the child_node itself. - * @param child_node The node to be removed from the parent node. - * @param parent_node Current owner of the child node. - */ -template -void MergeIntoParent(Node& child_node, Node& parent) { - assert(child_node.GetEntryCount() == 1); - // At this point we have found an entry that needs to be removed. We also know that we need to - // remove the child node because it contains at most one other entry and it is not the root - // node. - auto map_entry = child_node.Entries().begin(); - auto& entry = map_entry->second; - - auto hc_pos_in_parent = CalcPosInArray(entry.GetKey(), parent.GetPostfixLen()); - auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; - - if (entry.IsNode()) { - // connect sub to parent - auto& sub2 = entry.GetNode(); - bit_width_t new_infix_len = child_node.GetInfixLen() + 1 + sub2.GetInfixLen(); - sub2.SetInfixLen(new_infix_len); - } - - // Now move the single entry into the parent, the position in the parent is the same as the - // child_node. - parent_entry.ReplaceNodeWithDataFromEntry(std::move(entry)); -} -} // namespace - /* * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data * of type T or a child node (both are of the variant type Entry). @@ -110,17 +77,14 @@ template class Node { using KeyT = PhPoint; using EntryT = Entry; + using hc_pos_t = hc_pos_dim_t; public: - Node(bit_width_t infix_len, bit_width_t postfix_len) - : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { - assert(infix_len_ < MAX_BIT_WIDTH); - assert(infix_len >= 0); - } + Node() : entries_{} {} // Nodes should never be copied! Node(const Node&) = delete; - Node(Node&&) = delete; + Node(Node&&) noexcept = default; Node& operator=(const Node&) = delete; Node& operator=(Node&&) = delete; @@ -128,14 +92,6 @@ class Node { return entries_.size(); } - [[nodiscard]] bit_width_t GetInfixLen() const { - return infix_len_; - } - - [[nodiscard]] bit_width_t GetPostfixLen() const { - return postfix_len_; - } - /* * Attempts to emplace an entry in this node. * The behavior is analogous to std::map::emplace(), i.e. if there is already a value with the @@ -164,56 +120,113 @@ class Node { * @param args Constructor arguments for creating a value T that can be inserted for the key. */ template - EntryT* Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT& Emplace(bool& is_inserted, const KeyT& key, bit_width_t postfix_len, Args&&... args) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward(args)...); auto& entry = emplace_result.first->second; // Return if emplace succeed, i.e. there was no entry. if (emplace_result.second) { is_inserted = true; - return &entry; + return entry; + } + return HandleCollision(entry, is_inserted, key, postfix_len, std::forward(args)...); + } + + template + EntryT& Emplace( + IterT iter, bool& is_inserted, const KeyT& key, bit_width_t postfix_len, Args&&... args) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); // TODO pass in -> should be known! + if (iter == entries_.end() || iter->first != hc_pos) { + auto emplace_result = + entries_.try_emplace(iter, hc_pos, key, std::forward(args)...); + is_inserted = true; + return emplace_result->second; } - return HandleCollision(entry, is_inserted, key, std::forward(args)...); + auto& entry = iter->second; + return HandleCollision(entry, is_inserted, key, postfix_len, std::forward(args)...); } /* * Returns the value (T or Node) if the entry exists and matches the key. Child nodes are * _not_ traversed. * @param key The key of the entry - * @param parent parent node + * @param parent The parent node * @return The sub node or null. */ - const EntryT* Find(const KeyT& key) const { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); - const auto& entry = entries_.find(hc_pos); - if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { - return &entry->second; + EntryT* Find(const KeyT& key, bit_width_t postfix_len) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); + auto iter = entries_.find(hc_pos); + if (iter != entries_.end() && DoesEntryMatch(iter->second, key, postfix_len)) { + return &iter->second; } return nullptr; } + const EntryT* FindC(const KeyT& key, bit_width_t postfix_len) const { + return const_cast(*this).Find(key, postfix_len); + } + + auto LowerBound(const KeyT& key, bit_width_t postfix_len, bool& found) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); + auto iter = entries_.lower_bound(hc_pos); + found = + (iter != entries_.end() && iter->first == hc_pos && + DoesEntryMatch(iter->second, key, postfix_len)); + return iter; + } + + auto End() { + return entries_.end(); + } + + auto End() const { + return entries_.end(); + } + + EntryIteratorC FindPrefix( + const KeyT& prefix, bit_width_t prefix_post_len, bit_width_t node_postfix_len) const { + assert(prefix_post_len <= node_postfix_len); + hc_pos_t hc_pos = CalcPosInArray(prefix, node_postfix_len); + const auto iter = entries_.find(hc_pos); + if (iter == entries_.end() || iter->second.IsValue() || + iter->second.GetNodePostfixLen() < prefix_post_len) { + // We compare the infix only if it lies fully within the prefix. + return entries_.end(); + } + + if (DoesEntryMatch(iter->second, prefix, node_postfix_len)) { + return {iter}; + } + return entries_.end(); + } + /* * Attempts to erase a key/value pair. * This function is not recursive, if the 'key' leads to a child node, the child node * is returned and nothing is removed. * * @param key The key of the key/value pair to be erased - * @param parent The parent node of the current node (=nullptr) if this is the root node. + * @param parent_entry The parent node of the current node (=nullptr) if this is the root node. + * @param allow_move_into_parent Whether the node can be merged into the parent if only 1 + * entry is left. * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - Node* Erase(const KeyT& key, Node* parent, bool& found) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT* Erase(const KeyT& key, EntryT* parent_entry, bool allow_move_into_parent, bool& found) { + auto postfix_len = parent_entry->GetNodePostfixLen(); + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto it = entries_.find(hc_pos); - if (it != entries_.end() && DoesEntryMatch(it->second, key)) { + if (it != entries_.end() && DoesEntryMatch(it->second, key, postfix_len)) { if (it->second.IsNode()) { - return &it->second.GetNode(); + return &it->second; } entries_.erase(it); found = true; - if (parent && GetEntryCount() == 1) { - MergeIntoParent(*this, *parent); + if (allow_move_into_parent && GetEntryCount() == 1) { + // We take the remaining entry from the current node and inserts it into the + // parent_entry where it replaces (and implicitly deletes) the current node. + parent_entry->ReplaceNodeWithDataFromEntry(std::move(entries_.begin()->second)); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } @@ -228,23 +241,23 @@ class Node { return entries_; } - void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { + void GetStats( + PhTreeStats& stats, const EntryT& current_entry, bit_width_t current_depth = 0) const { size_t num_children = entries_.size(); ++stats.n_nodes_; - ++stats.infix_hist_[GetInfixLen()]; ++stats.node_depth_hist_[current_depth]; ++stats.node_size_log_hist_[32 - CountLeadingZeros(std::uint32_t(num_children))]; stats.n_total_children_ += num_children; - - current_depth += GetInfixLen(); stats.q_total_depth_ += current_depth; for (auto& entry : entries_) { auto& child = entry.second; if (child.IsNode()) { + auto child_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + ++stats.infix_hist_[child_infix_len]; auto& sub = child.GetNode(); - sub.GetStats(stats, current_depth + 1); + sub.GetStats(stats, child, current_depth + 1 + child_infix_len); } else { ++stats.q_n_post_fix_n_[current_depth]; ++stats.size_; @@ -252,11 +265,9 @@ class Node { } } - size_t CheckConsistency(bit_width_t current_depth = 0) const { + size_t CheckConsistency(const EntryT& current_entry, bit_width_t current_depth = 0) const { // Except for a root node if the tree has <2 entries. assert(entries_.size() >= 2 || current_depth == 0); - - current_depth += GetInfixLen(); size_t num_entries_local = 0; size_t num_entries_children = 0; for (auto& entry : entries_) { @@ -264,19 +275,31 @@ class Node { if (child.IsNode()) { auto& sub = child.GetNode(); // Check node consistency - assert(sub.GetInfixLen() + 1 + sub.GetPostfixLen() == GetPostfixLen()); - num_entries_children += sub.CheckConsistency(current_depth + 1); + auto sub_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + assert( + sub_infix_len + 1 + child.GetNodePostfixLen() == + current_entry.GetNodePostfixLen()); + num_entries_children += + sub.CheckConsistency(child, current_depth + 1 + sub_infix_len); } else { ++num_entries_local; } } - return num_entries_local + num_entries_children; - } - void SetInfixLen(bit_width_t newInfLen) { - assert(newInfLen < MAX_BIT_WIDTH); - assert(newInfLen >= 0); - infix_len_ = newInfLen; + // Check node center + auto post_len = current_entry.GetNodePostfixLen(); + if (post_len == MAX_BIT_WIDTH - 1) { + for (auto d : current_entry.GetKey()) { + assert(d == 0); + } + } else { + for (auto d : current_entry.GetKey()) { + assert(((d >> post_len) & 0x1) == 1 && "Last bit of node center must be `1`"); + assert(((d >> post_len) << post_len) == d && "postlen bits must all be `0`"); + } + } + + return num_entries_local + num_entries_children; } private: @@ -287,10 +310,8 @@ class Node { void WriteEntry(hc_pos_t hc_pos, EntryT& entry) { if (entry.IsNode()) { - auto& node = entry.GetNode(); - bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; - node.SetInfixLen(new_subnode_infix_len); - entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode()); + auto postfix_len = entry.GetNodePostfixLen(); + entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode(), postfix_len); } else { entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractValue()); } @@ -311,59 +332,50 @@ class Node { * an entry with the exact same key as new_key, so insertion has failed. */ template - auto* HandleCollision( - EntryT& existing_entry, bool& is_inserted, const KeyT& new_key, Args&&... args) { - assert(!is_inserted); + auto& HandleCollision( + EntryT& entry, + bool& is_inserted, + const KeyT& new_key, + bit_width_t current_postfix_len, + Args&&... args) { // We have two entries in the same location (local pos). - // Now we need to compare the keys. - // If they are identical, we simply return the entry for further traversal. - if (existing_entry.IsNode()) { - auto& sub_node = existing_entry.GetNode(); - if (sub_node.GetInfixLen() > 0) { - bit_width_t max_conflicting_bits = - NumberOfDivergingBits(new_key, existing_entry.GetKey()); - if (max_conflicting_bits > sub_node.GetPostfixLen() + 1) { - is_inserted = true; - return InsertSplit( - existing_entry, new_key, max_conflicting_bits, std::forward(args)...); - } - } - // No infix conflict, just traverse subnode - } else { - bit_width_t max_conflicting_bits = - NumberOfDivergingBits(new_key, existing_entry.GetKey()); - if (max_conflicting_bits > 0) { - is_inserted = true; - return InsertSplit( - existing_entry, new_key, max_conflicting_bits, std::forward(args)...); - } + // Now we need to compare the keys, respectively the prefix of the subnode. + // If they match, we return the entry for further traversal. + bool is_node = entry.IsNode(); + if (is_node && !entry.HasNodeInfix(current_postfix_len)) { + // No infix conflict (because infix has length=0), just traverse subnode + return entry; + } + + bit_width_t max_conflicting_bits = NumberOfDivergingBits(new_key, entry.GetKey()); + auto split_len = is_node ? entry.GetNodePostfixLen() + 1 : 0; + if (max_conflicting_bits <= split_len) { // perfect match -> return existing + return entry; } - return &existing_entry; + + is_inserted = true; + return InsertSplit(entry, new_key, max_conflicting_bits, std::forward(args)...); } template - auto* InsertSplit( + auto& InsertSplit( EntryT& current_entry, const KeyT& new_key, bit_width_t max_conflicting_bits, Args&&... args) { - const auto current_key = current_entry.GetKey(); - - // determine length of infix - bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; - auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); - hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); + hc_pos_t pos_sub_2 = CalcPosInArray(current_entry.GetKey(), new_postfix_len); // Move key/value into subnode - new_sub_node->WriteEntry(pos_sub_2, current_entry); - auto& new_entry = new_sub_node->WriteValue(pos_sub_1, new_key, std::forward(args)...); + Node new_sub_node{}; + new_sub_node.WriteEntry(pos_sub_2, current_entry); + auto& new_entry = new_sub_node.WriteValue(pos_sub_1, new_key, std::forward(args)...); // Insert new node into local node - current_entry.SetNode(std::move(new_sub_node)); - return &new_entry; + current_entry.SetNode(std::move(new_sub_node), new_postfix_len); + return new_entry; } /* @@ -374,28 +386,17 @@ class Node { * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for * other entries). */ - bool DoesEntryMatch(const EntryT& entry, const KeyT& key) const { + bool DoesEntryMatch( + const EntryT& entry, const KeyT& key, const bit_width_t parent_postfix_len) const { if (entry.IsNode()) { - const auto& sub = entry.GetNode(); - if (sub.GetInfixLen() > 0) { - const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); - return KeyEquals(entry.GetKey(), key, mask); + if (entry.HasNodeInfix(parent_postfix_len)) { + return KeyEquals(entry.GetKey(), key, entry.GetNodePostfixLen() + 1); } return true; } return entry.GetKey() == key; } - // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the - // current node). If a variable prefix_len would refer to the number of bits in this node's - // prefix, and if we assume 64 bit values, the following would always hold: - // prefix_len + 1 + postfix_len = 64. - // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, - // ie. the same bit that is used to create the lookup keys in entries_. - bit_width_t postfix_len_; - // The number of bits between this node and the parent node. For 64bit keys possible values - // range from 0 to 62. - bit_width_t infix_len_; EntryMap entries_; }; diff --git a/include/phtree/v16/phtree_v16.h b/include/phtree/v16/phtree_v16.h new file mode 100644 index 00000000..6343e634 --- /dev/null +++ b/include/phtree/v16/phtree_v16.h @@ -0,0 +1,800 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PHTREE_V16_H +#define PHTREE_V16_PHTREE_V16_H + +#include "debug_helper_v16.h" +#include "for_each.h" +#include "for_each_hc.h" +#include "iterator_full.h" +#include "iterator_hc.h" +#include "iterator_knn_hs.h" +#include "iterator_with_parent.h" +#include "node.h" + +namespace improbable::phtree::v16 { + +/* + * The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each + * dimension is (by default) indexed by a 64 bit integer. The index ordered follows z-order / Morton + * order. The index is effectively a 'map', i.e. each key is associated with at most one value. + * + * Keys are points in n-dimensional space. + * + * This API behaves similar to std::map, see function descriptions for details. + * + * Loosely based on PH-Tree Java, V16, see http://www.phtree.org + * + * See also : + * - T. Zaeschke, C. Zimmerli, M.C. Norrie: + * "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) + * - T. Zaeschke: "The PH-Tree Revisited", (2015) + * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). + * + * @tparam T Value type. + * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. + * @tparam CONVERT A converter class with a 'pre()' and a 'post()' function. 'pre()' translates + * external KEYs into the internal PhPoint type. 'post()' + * translates the PhPoint back to the external KEY type. + */ +template > +class PhTreeV16 { + friend PhTreeDebugHelper; + using ScalarExternal = typename CONVERT::ScalarExternal; + using ScalarInternal = typename CONVERT::ScalarInternal; + using KeyT = typename CONVERT::KeyInternal; + using EntryT = Entry; + using NodeT = Node; + + public: + static_assert(!std::is_reference::value, "Reference type value are not supported."); + static_assert(std::is_signed::value, "ScalarInternal must be a signed type"); + static_assert( + std::is_integral::value, "ScalarInternal must be an integral type"); + static_assert( + std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); + static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); + + explicit PhTreeV16(CONVERT* converter) + : num_entries_{0} + , root_{{}, NodeT{}, MAX_BIT_WIDTH - 1} + , converter_{converter} {} + + PhTreeV16(const PhTreeV16& other) = delete; + PhTreeV16& operator=(const PhTreeV16& other) = delete; + PhTreeV16(PhTreeV16&& other) noexcept = default; + PhTreeV16& operator=(PhTreeV16&& other) noexcept = default; + ~PhTreeV16() noexcept = default; + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair try_emplace(const KeyT& key, Args&&... args) { + auto* current_entry = &root_; + bool is_inserted = false; + while (current_entry->IsNode()) { + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); + } + num_entries_ += is_inserted; + return {current_entry->GetValue(), is_inserted}; + } + + /* + * The try_emplace(hint, key, value) method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * try_emplace(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair try_emplace(const ITERATOR& iterator, const KeyT& key, Args&&... args) { + if constexpr (!std::is_same_v>) { + return try_emplace(key, std::forward(args)...); + } else { + // This function can be used to insert a value close to a known value + // or close to a recently removed value. The hint can only be used if the new key is + // inside one of the nodes provided by the hint iterator. + // The idea behind using the 'parent' is twofold: + // - The 'parent' node is one level above the iterator position, it is spatially + // larger and has a better probability of containing the new position, allowing for + // fast track try_emplace. + // - Using 'parent' allows a scenario where the iterator was previously used with + // erase(iterator). This is safe because erase() will never erase the 'parent' node. + + if (!iterator.GetParentNodeEntry()) { + // No hint available, use standard try_emplace() + return try_emplace(key, std::forward(args)...); + } + + auto* parent_entry = iterator.GetParentNodeEntry(); + if (NumberOfDivergingBits(key, parent_entry->GetKey()) > + parent_entry->GetNodePostfixLen() + 1) { + // replace higher up in the tree + return try_emplace(key, std::forward(args)...); + } + + // replace in node + auto* entry = parent_entry; + bool is_inserted = false; + while (entry->IsNode()) { + entry = &entry->GetNode().Emplace( + is_inserted, key, entry->GetNodePostfixLen(), std::forward(args)...); + } + num_entries_ += is_inserted; + return {entry->GetValue(), is_inserted}; + } + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const KeyT& key, const T& value) { + return try_emplace(key, value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const KeyT& key) { + return try_emplace(key).first; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const KeyT& key) const { + if (empty()) { + return 0; + } + auto* current_entry = &root_; + while (current_entry && current_entry->IsNode()) { + current_entry = current_entry->GetNode().FindC(key, current_entry->GetNodePostfixLen()); + } + return current_entry ? 1 : 0; + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + auto find(const KeyT& key) const { + const EntryT* current_entry = &root_; + const EntryT* current_node = nullptr; + const EntryT* parent_node = nullptr; + while (current_entry && current_entry->IsNode()) { + parent_node = current_node; + current_node = current_entry; + current_entry = current_entry->GetNode().FindC(key, current_entry->GetNodePostfixLen()); + } + + return IteratorWithParent(current_entry, current_node, parent_node, converter_); + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const KeyT& key) { + auto* entry = &root_; + // We do not want the root entry to be modified. The reason is simply that a lot of the + // code in this class becomes simpler if we can assume the root entry to contain a node. + bool found = false; + while (entry) { + entry = entry->GetNode().Erase(key, entry, entry != &root_, found); + } + num_entries_ -= found; + return found; + } + + /* + * See std::map::erase(). Removes any value at the given iterator location. + * + * WARNING + * While this is guaranteed to work correctly, only iterators returned from find() + * will result in erase(iterator) being faster than erase(key). + * Iterators returned from other functions may be optimized in a future version. + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + if (iterator.IsEnd()) { + return 0; + } + if constexpr (std::is_same_v>) { + const auto& iter_rich = static_cast&>(iterator); + if (!iter_rich.GetNodeEntry() || iter_rich.GetNodeEntry() == &root_) { + // Do _not_ use the root entry, see erase(key). Start searching from the top. + return erase(iter_rich.GetEntry()->GetKey()); + } + bool found = false; + EntryT* entry = iter_rich.GetNodeEntry(); + // The loop is a safeguard for find_two_mm which may return slightly wrong iterators. + while (entry != nullptr) { + entry = entry->GetNode().Erase(iter_rich.GetEntry()->GetKey(), entry, true, found); + } + num_entries_ -= found; + return found; + } + // There may be no entry because not every iterator sets it. + return erase(iterator.GetEntry()->GetKey()); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key + * @param new_key + * @param predicate + * + * @return A pair, whose first element points to the possibly relocated value, and + * whose second element is a bool that is true if the value was actually relocated. + */ + template + [[deprecated]] size_t relocate_if2(const KeyT& old_key, const KeyT& new_key, PRED pred) { + auto pair = _find_two(old_key, new_key); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd() || !pred(*iter_old)) { + return 0; + } + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + iter_old.GetEntry()->SetKey(new_key); + return 1; + } + + bool is_inserted = false; + auto* new_parent = iter_new.GetNodeEntry(); + new_parent->GetNode().Emplace( + is_inserted, new_key, new_parent->GetNodePostfixLen(), std::move(*iter_old)); + if (!is_inserted) { + return 0; + } + + // Erase old value. See comments in try_emplace(iterator) for details. + EntryT* old_node_entry = iter_old.GetNodeEntry(); + if (iter_old.GetParentNodeEntry() == iter_new.GetNodeEntry()) { + // In this case the old_node_entry may have been invalidated by the previous insertion. + old_node_entry = iter_old.GetParentNodeEntry(); + } + bool found = false; + while (old_node_entry) { + old_node_entry = old_node_entry->GetNode().Erase( + old_key, old_node_entry, old_node_entry != &root_, found); + } + assert(found); + return 1; + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key + * @param new_key + * @param predicate + * + * @return A pair, whose first element points to the possibly relocated value, and + * whose second element is a bool that is true if the value was actually relocated. + */ + template + auto relocate_if(const KeyT& old_key, const KeyT& new_key, PRED&& pred) { + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + EntryT* current_entry = &root_; // An entry. + EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + EntryT* old_node_entry_parent = nullptr; // Parent of the old_node_entry + EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find node for removal + while (current_entry && current_entry->IsNode()) { + old_node_entry_parent = old_node_entry; + old_node_entry = current_entry; + auto postfix_len = old_node_entry->GetNodePostfixLen(); + if (postfix_len + 1 >= n_diverging_bits) { + new_node_entry = old_node_entry; + } + // TODO stop earlier, we are going to have to redo this after insert.... + current_entry = current_entry->GetNode().Find(old_key, postfix_len); + } + EntryT* old_entry = current_entry; // Entry to be removed + + // Can we stop already? + if (old_entry == nullptr || !pred(old_entry->GetValue())) { + return 0; // old_key not found! + } + + // Are the keys equal? Or is the quadrant the same? -> same entry + if (n_diverging_bits == 0) { + return 1; + } + if (old_node_entry->GetNodePostfixLen() >= n_diverging_bits) { + old_entry->SetKey(new_key); + return 1; + } + + // Find node for insertion + auto new_entry = new_node_entry; + while (new_entry && new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = new_entry->GetNode().Find(new_key, new_entry->GetNodePostfixLen()); + } + if (new_entry != nullptr) { + return 0; // Entry exists + } + bool is_inserted = false; + new_entry = &new_node_entry->GetNode().Emplace( + is_inserted, + new_key, + new_node_entry->GetNodePostfixLen(), + std::move(old_entry->ExtractValue())); + + // Erase old value. See comments in try_emplace(iterator) for details. + if (old_node_entry_parent == new_node_entry) { + // In this case the old_node_entry may have been invalidated by the previous + // insertion. + old_node_entry = old_node_entry_parent; + } + + bool is_found = false; + while (old_node_entry) { + old_node_entry = old_node_entry->GetNode().Erase( + old_key, old_node_entry, old_node_entry != &root_, is_found); + } + assert(is_found); + return 1; + } + + private: + /* + * Tries to locate two entries that are 'close' to each other. + * + * Special behavior: + * - returns end() if old_key does not exist; + */ + auto _find_two(const KeyT& old_key, const KeyT& new_key) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + EntryT* current_entry = &root_; // An entry. + EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + EntryT* old_node_entry_parent = nullptr; // Parent of the old_node_entry + EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find node for removal + while (current_entry && current_entry->IsNode()) { + old_node_entry_parent = old_node_entry; + old_node_entry = current_entry; + auto postfix_len = old_node_entry->GetNodePostfixLen(); + if (postfix_len + 1 >= n_diverging_bits) { + new_node_entry = old_node_entry; + } + current_entry = current_entry->GetNode().Find(old_key, postfix_len); + } + EntryT* old_entry = current_entry; // Entry to be removed + + // Can we stop already? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + assert(old_node_entry != nullptr); + if (n_diverging_bits == 0 || old_node_entry->GetNodePostfixLen() >= n_diverging_bits) { + auto iter = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + return std::make_pair(iter, iter); + } + + // Find node for insertion + auto new_entry = new_node_entry; + while (new_entry && new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = new_entry->GetNode().Find(new_key, new_entry->GetNodePostfixLen()); + } + + auto iter1 = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); + } + + public: + /* + * This function is used (internally) by the PH-tree multimap. + * + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key + * @param new_key + * @param verify_exists. If true, verifies that the keys exists, even if the keys are identical. + * If false, identical keys simply return '1', even if the keys don´t actually + * exist. This avoid searching the tree. + * @param RELOCATE A function that handles relocation between buckets. + * @param COUNT A function that veifies relocation in the same bucket, e.g. for identical + * keys, or if the whole bucket is relocated. + * @return The number of relocated entries. + */ + template + size_t _relocate_mm( + const KeyT& old_key, + const KeyT& new_key, + bool verify_exists, + RELOCATE&& relocate_fn, + COUNT&& count_fn) { + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + if (!verify_exists && n_diverging_bits == 0) { + return 1; // We omit calling COUNT because that would require looking up the entry... + } + + EntryT* current_entry = &root_; // An entry. + EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + EntryT* new_node_entry = nullptr; // Node that will contain the new entry + // Find node or entry for removal + while (current_entry && current_entry->IsNode()) { + old_node_entry = current_entry; + auto postfix_len = old_node_entry->GetNodePostfixLen(); + if (postfix_len + 1 >= n_diverging_bits) { + new_node_entry = old_node_entry; + } + current_entry = current_entry->GetNode().Find(old_key, postfix_len); + } + EntryT* old_entry = current_entry; // Entry to be removed + + // Can we stop already? + if (old_entry == nullptr) { + return 0; // old_key not found! + } + + // Are the keys equal? + if (n_diverging_bits == 0) { + return count_fn(old_entry->GetValue()); + } + // Are the keys in the same quadrant? -> same entry + if (old_node_entry->GetNodePostfixLen() >= n_diverging_bits) { + if (old_entry->GetValue().size() == 1) { + auto result = count_fn(old_entry->GetValue()); + if (result > 0) { + old_entry->SetKey(new_key); + } + return result; + } + } + + // Find node for insertion + auto new_entry = new_node_entry; + bool same_node = old_node_entry == new_node_entry; + bool is_inserted = false; + while (new_entry && new_entry->IsNode()) { + new_node_entry = new_entry; + is_inserted = false; + new_entry = + &new_entry->GetNode().Emplace(is_inserted, new_key, new_entry->GetNodePostfixLen()); + num_entries_ += is_inserted; + } + + // Adjust old_entry if necessary, it may have been invalidated by emplace() in the same node + if (is_inserted && same_node) { + old_entry = old_node_entry; + while (old_entry && old_entry->IsNode()) { + old_node_entry = old_entry; + old_entry = old_entry->GetNode().Find(old_key, old_entry->GetNodePostfixLen()); + } + } + + // relocate + auto result = relocate_fn(old_entry->GetValue(), new_entry->GetValue()); + + if (result == 0) { + clean_up(new_key, new_entry, new_node_entry); + } else { + clean_up(old_key, old_entry, old_node_entry); + } + return result; + } + + private: + void clean_up(const KeyT& key, EntryT* entry, EntryT* node_entry) { + // It may happen that node_entry is not the immediate parent, but that is okay! + if (entry != nullptr && entry->GetValue().empty()) { + bool found = false; + while (node_entry != nullptr && node_entry->IsNode()) { + found = false; + node_entry = + node_entry->GetNode().Erase(key, node_entry, node_entry != &root_, found); + } + num_entries_ -= found; + } + } + + public: + auto _find_or_create_two_mm(const KeyT& old_key, const KeyT& new_key, bool count_equals) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + if (!count_equals && n_diverging_bits == 0) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); + } + + EntryT* new_entry = &root_; // An entry. + EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find the deepest common parent node for removal and insertion + bool is_inserted = false; + while (new_entry && new_entry->IsNode() && + new_entry->GetNodePostfixLen() + 1 >= n_diverging_bits) { + new_node_entry = new_entry; + auto postfix_len = new_entry->GetNodePostfixLen(); + new_entry = &new_entry->GetNode().Emplace(is_inserted, new_key, postfix_len); + } + old_node_entry = new_node_entry; + + // Find node for insertion + while (new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = + &new_entry->GetNode().Emplace(is_inserted, new_key, new_entry->GetNodePostfixLen()); + } + num_entries_ += is_inserted; + assert(new_entry != nullptr); + + auto* old_entry = old_node_entry; + while (old_entry && old_entry->IsNode()) { + old_node_entry = old_entry; + old_entry = old_entry->GetNode().Find(old_key, old_entry->GetNodePostfixLen()); + } + + // Does old_entry exist? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (n_diverging_bits == 0) { + auto iter = Iter(old_entry, old_node_entry, nullptr, converter_); + return std::make_pair(iter, iter); + } + + auto iter1 = Iter(old_entry, old_node_entry, nullptr, converter_); + // TODO Note: Emplace() may return a sub-child so new_node_entry be a grandparent! + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPoint &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each( + // TODO check copy elision + const PhBox query_box, + CALLBACK&& callback, + FILTER&& filter = FILTER()) const { + auto pair = find_starting_node(query_box); + ForEachHC( + query_box.min(), + query_box.max(), + converter_, + std::forward(callback), + std::forward(filter)) + .Traverse(*pair.first, &pair.second); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template + auto begin(FILTER&& filter = FILTER()) const { + return IteratorFull(root_, converter_, std::forward(filter)); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * @return Result iterator. + */ + template + auto begin_query( + const PhBox& query_box, FILTER&& filter = FILTER()) const { + auto pair = find_starting_node(query_box); + return IteratorHC( + *pair.first, + query_box.min(), + query_box.max(), + converter_, + std::forward(filter)); + } + + /* + * Locate nearest neighbors for a given point in space. + * + * Example for distance function: auto fn = DistanceEuclidean + * auto iter = tree.begin_knn_query> + * + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template + auto begin_knn_query( + size_t min_results, + const KeyT& center, + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { + return IteratorKnnHS( + root_, + min_results, + center, + converter_, + std::forward(distance_function), + std::forward(filter)); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + auto end() const { + return IteratorEnd(); + } + + /* + * Remove all entries from the tree. + */ + void clear() { + num_entries_ = 0; + root_ = EntryT({}, NodeT{}, MAX_BIT_WIDTH - 1); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return num_entries_; + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return num_entries_ == 0; + } + + private: + /* + * This function is only for debugging. + */ + auto GetDebugHelper() const { + return DebugHelperV16(root_, num_entries_); + } + + /* + * Motivation: Point queries a la find() are faster than window queries. + * Since a window query may have a significant common prefix in their min and max coordinates, + * the part with the common prefix can be executed as point query. + * + * This works if there really is a common prefix, e.g. when querying point data or when + * querying box data with QueryInclude. Unfortunately, QueryIntersect queries have +/-0 infinity + * in their coordinates, so their never is an overlap. + */ + std::pair> find_starting_node( + const PhBox& query_box) const { + auto& prefix = query_box.min(); + bit_width_t max_conflicting_bits = NumberOfDivergingBits(query_box.min(), query_box.max()); + const EntryT* parent = &root_; + if (max_conflicting_bits > root_.GetNodePostfixLen()) { + // Abort early if we have no shared prefix in the query + return {&root_, root_.GetNode().Entries().end()}; + } + EntryIteratorC entry_iter = + root_.GetNode().FindPrefix(prefix, max_conflicting_bits, root_.GetNodePostfixLen()); + while (entry_iter != parent->GetNode().Entries().end() && entry_iter->second.IsNode() && + entry_iter->second.GetNodePostfixLen() >= max_conflicting_bits) { + parent = &entry_iter->second; + entry_iter = parent->GetNode().FindPrefix( + prefix, max_conflicting_bits, parent->GetNodePostfixLen()); + } + return {parent, entry_iter}; + } + + size_t num_entries_; + // Contract: root_ contains a Node with 0 or more entries. The root node is the only Node + // that is allowed to have less than two entries. + EntryT root_; + CONVERT* converter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PHTREE_V16_H diff --git a/phtree/CMakeLists.txt b/phtree/CMakeLists.txt deleted file mode 100644 index 53761cd5..00000000 --- a/phtree/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(phtree) - -add_library(phtree STATIC "") -add_subdirectory(common) -add_subdirectory(v16) - -set_target_properties(phtree PROPERTIES LINKER_LANGUAGE CXX) - diff --git a/phtree/benchmark/find_benchmark.cc b/phtree/benchmark/find_benchmark.cc deleted file mode 100644 index 0621dd7b..00000000 --- a/phtree/benchmark/find_benchmark.cc +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree.h" -#include -#include - -using namespace improbable; -using namespace improbable::phtree; -using namespace improbable::phtree::phbenchmark; - -namespace { - -const int GLOBAL_MAX = 10000; - -enum QueryType { - FIND, - COUNT, -}; - -/* - * Benchmark for looking up entries by their key. - */ -template -class IndexBenchmark { - public: - IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type); - - void Benchmark(benchmark::State& state); - - private: - void SetupWorld(benchmark::State& state); - int QueryWorldCount(benchmark::State& state); - int QueryWorldFind(benchmark::State& state); - - const TestGenerator data_type_; - const int num_entities_; - const QueryType query_type_; - - PhTree tree_; - std::default_random_engine random_engine_; - std::uniform_int_distribution<> cube_distribution_; - std::vector> points_; -}; - -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type) -: data_type_{data_type} -, num_entities_(num_entities) -, query_type_(query_type) -, random_engine_{1} -, cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities) { - logging::SetupDefaultLogging(); - SetupWorld(state); -} - -template -void IndexBenchmark::Benchmark(benchmark::State& state) { - int num_inner = 0; - int num_found = 0; - switch (query_type_) { - case COUNT: { - for (auto _ : state) { - num_found += QueryWorldCount(state); - ++num_inner; - } - break; - } - case FIND: { - for (auto _ : state) { - num_found += QueryWorldFind(state); - ++num_inner; - } - break; - } - } - // Moved outside of the loop because EXPENSIVE - state.counters["total_result_count"] += num_found; - state.counters["query_rate"] += num_inner; - state.counters["result_rate"] += num_found; - state.counters["avg_result_count"] += num_found; -} - -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { - logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); - CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); - } - - state.counters["total_result_count"] = benchmark::Counter(0); - state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - - logging::info("World setup complete."); -} - -template -int IndexBenchmark::QueryWorldCount(benchmark::State&) { - static int pos = 0; - pos = (pos + 1) % num_entities_; - bool found = true; - if (pos % 2 == 0) { - assert(tree_.find(points_.at(pos)) != tree_.end()); - } else { - int x = pos % GLOBAL_MAX; - PhPoint p = PhPoint({x, x, x}); - found = tree_.find(p) != tree_.end(); - } - return found; -} - -template -int IndexBenchmark::QueryWorldFind(benchmark::State&) { - static int pos = 0; - pos = (pos + 1) % num_entities_; - bool found = true; - if (pos % 2 == 0) { - assert(tree_.find(points_.at(pos)) != tree_.end()); - } else { - int x = pos % GLOBAL_MAX; - PhPoint p = PhPoint({x, x, x}); - found = tree_.find(p) != tree_.end(); - } - return found; -} - -} // namespace - -template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; - benchmark.Benchmark(state); -} - -// index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1K, TestGenerator::CUBE, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_10K, TestGenerator::CUBE, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_100K, TestGenerator::CUBE, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1M, TestGenerator::CUBE, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1K, TestGenerator::CUBE, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_10K, TestGenerator::CUBE, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_100K, TestGenerator::CUBE, 100000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1M, TestGenerator::CUBE, 1000000, FIND) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1K, TestGenerator::CLUSTER, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_10K, TestGenerator::CLUSTER, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_100K, TestGenerator::CLUSTER, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1M, TestGenerator::CLUSTER, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1K, TestGenerator::CLUSTER, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_10K, TestGenerator::CLUSTER, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_100K, TestGenerator::CLUSTER, 100000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1M, TestGenerator::CLUSTER, 1000000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_benchmark.cc b/phtree/benchmark/insert_benchmark.cc deleted file mode 100644 index c48e7778..00000000 --- a/phtree/benchmark/insert_benchmark.cc +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" -#include "phtree/phtree.h" -#include - -using namespace improbable; -using namespace improbable::phtree; -using namespace improbable::phtree::phbenchmark; - -namespace { - -const int GLOBAL_MAX = 10000; - -enum InsertionType { - INSERT, - EMPLACE, - SQUARE_BR, -}; - -/* - * Benchmark for adding entries to the index. - */ -template -class IndexBenchmark { - public: - IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - InsertionType insertionType); - - void Benchmark(benchmark::State& state); - - private: - void SetupWorld(benchmark::State& state); - - void Insert(benchmark::State& state, PhTree& tree); - - const TestGenerator data_type_; - const int num_entities_; - const InsertionType insertion_type_; - std::vector> points_; -}; - -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, InsertionType insertionType) -: data_type_{data_type} -, num_entities_(num_entities) -, insertion_type_(insertionType) -, points_(num_entities) { - logging::SetupDefaultLogging(); - SetupWorld(state); -} - -template -void IndexBenchmark::Benchmark(benchmark::State& state) { - for (auto _ : state) { - state.PauseTiming(); - auto* tree = new PhTree(); - state.ResumeTiming(); - - Insert(state, *tree); - - // we do this top avoid measuring deallocation - state.PauseTiming(); - delete tree; - state.ResumeTiming(); - } -} - -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { - logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); - CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - - state.counters["total_put_count"] = benchmark::Counter(0); - state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); - - logging::info("World setup complete."); -} - -template -void IndexBenchmark::Insert(benchmark::State& state, PhTree& tree) { - switch (insertion_type_) { - case INSERT: { - for (int i = 0; i < num_entities_; ++i) { - tree.insert(points_[i], i); - } - break; - } - case EMPLACE: { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); - } - break; - } - case SQUARE_BR: { - for (int i = 0; i < num_entities_; ++i) { - tree[points_[i]] = i; - } - break; - } - } - - state.counters["total_put_count"] += num_entities_; - state.counters["put_rate"] += num_entities_; -} - -} // namespace - -template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; - benchmark.Benchmark(state); -} - -// index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000, INSERT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000, INSERT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000, INSERT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000, INSERT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000, INSERT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CU_1K, TestGenerator::CUBE, 1000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CU_10K, TestGenerator::CUBE, 10000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CU_100K, TestGenerator::CUBE, 100000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CU_1M, TestGenerator::CUBE, 1000000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CU_10M, TestGenerator::CUBE, 10000000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, SQB_CU_1K, TestGenerator::CUBE, 1000, SQUARE_BR) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, SQB_CU_10K, TestGenerator::CUBE, 10000, SQUARE_BR) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, SQB_CU_100K, TestGenerator::CUBE, 100000, SQUARE_BR) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, SQB_CU_1M, TestGenerator::CUBE, 1000000, SQUARE_BR) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, SQB_CU_10M, TestGenerator::CUBE, 10000000, SQUARE_BR) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CL_1K, TestGenerator::CLUSTER, 1000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CL_10K, TestGenerator::CLUSTER, 10000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CL_100K, TestGenerator::CLUSTER, 100000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CL_1M, TestGenerator::CLUSTER, 1000000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, EMP_CL_10M, TestGenerator::CLUSTER, 10000000, EMPLACE) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_MAIN(); diff --git a/phtree/benchmark/logging.cc b/phtree/benchmark/logging.cc deleted file mode 100644 index 51803f0c..00000000 --- a/phtree/benchmark/logging.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Improbable Worlds Ltd, All Rights Reserved -#include "logging.h" - -namespace improbable::phtree::phbenchmark::logging { - -void SetupDefaultLogging() { - SetupLogging({}, spdlog::level::warn); -} - -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { - auto& console_sink = sinks.emplace_back(std::make_shared()); - console_sink->set_level(log_level); - - // Find the minimum log level, in case one of the sinks passed to us has a lower log level. - const auto& sink_with_lowest_log_level = *std::min_element( - sinks.begin(), - sinks.end(), - [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { - return a->level() < b->level(); - }); - spdlog::level::level_enum min_log_level = - std::min(sink_with_lowest_log_level->level(), log_level); - - // Create the external logger, worker logger and the internal (default) logger from the same log - // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message - // was logged to. - spdlog::set_default_logger( - std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); - spdlog::set_level(min_log_level); - spdlog::flush_on(min_log_level); -} - -} // namespace improbable::phtree::phbenchmark::logging diff --git a/phtree/common/CMakeLists.txt b/phtree/common/CMakeLists.txt deleted file mode 100644 index bb07ca12..00000000 --- a/phtree/common/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.14) - -target_sources(phtree - PRIVATE - common.h - base_types.h - bits.h - distance.h - filter.h - flat_array_map.h - flat_sparse_map.h - converter.h - debug_helper.h - tree_stats.h - ) diff --git a/phtree/common/filter.h b/phtree/common/filter.h deleted file mode 100644 index 46eacee3..00000000 --- a/phtree/common/filter.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_COMMON_FILTERS_H -#define PHTREE_COMMON_FILTERS_H - -#include "base_types.h" -#include "bits.h" -#include "converter.h" -#include "flat_array_map.h" -#include "flat_sparse_map.h" -#include "tree_stats.h" -#include -#include -#include -#include -#include -#include - -namespace improbable::phtree { - -/* - * Any iterator that has a filter defined will traverse nodes or return values if and only if the - * filter function returns 'true'. The filter functions are called for every node and every entry - * (note: internally, nodes are also stored in entries, but these entries will be passed to the - * filter for nodes) that the iterator encounters. By implication, it will never call the filter - * function for nodes of entries if their respective parent node has already been rejected. - * - * There are separate filter functions for nodes and for key/value entries. - * - * Every filter needs to provide two functions: - * - bool IsEntryValid(const PhPoint& key, const T& value); - * This function is called for every key/value pair that the query encounters. The function - * should return 'true' iff the key/value should be added to the query result. - * The parameters are the key and value of the key/value pair. - * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); - * This function is called for every node that the query encounters. The function should - * return 'true' if the node should be traversed and searched for potential results. - * The parameters are the prefix of the node and the number of least significant bits of the - * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can - * have any value. - */ - -/* - * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. - */ -struct FilterNoOp { - /* - * @param key The key/coordinate of the entry. - * @param value The value of the entry. - * @returns This default implementation always returns `true`. - */ - template - constexpr bool IsEntryValid(const KEY& /*key*/, const T& /*value*/) const { - return true; - } - - /* - * @param prefix The prefix of node. Any coordinate in the nodes shares this prefix. - * @param bits_to_ignore The number of bits of the prefix that should be ignored because they - * are NOT the same for all coordinates in the node. For example, assuming 64bit values, if the - * node represents coordinates that all share the first 10 bits of the prefix, then the value of - * bits_to_ignore is 64-10=54. - * @returns This default implementation always returns `true`. - */ - template - constexpr bool IsNodeValid(const KEY& /*prefix*/, int /*bits_to_ignore*/) const { - return true; - } -}; - -/* - * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). - * The result is equivalent to that of the 'begin_query(...)' function. - */ -template > -class FilterAABB { - using KeyExternal = typename CONVERTER::KeyExternal; - using KeyInternal = typename CONVERTER::KeyInternal; - using ScalarInternal = typename CONVERTER::ScalarInternal; - - static constexpr auto DIM = CONVERTER::DimInternal; - - public: - FilterAABB( - const KeyExternal& min_include, - const KeyExternal& max_include, - CONVERTER converter = CONVERTER()) - : min_external_{min_include} - , max_external_{max_include} - , min_internal_{converter.pre(min_include)} - , max_internal_{converter.pre(max_include)} - , converter_{converter} {}; - - /* - * This function allows resizing/shifting the AABB while iterating over the tree. - */ - void set(const KeyExternal& min_include, const KeyExternal& max_include) { - min_external_ = min_include; - max_external_ = max_include; - min_internal_ = converter_.pre(min_include); - max_internal_ = converter_.pre(max_include); - } - - template - [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { - auto point = converter_.post(key); - for (dimension_t i = 0; i < DIM; ++i) { - if (point[i] < min_external_[i] || point[i] > max_external_[i]) { - return false; - } - } - return true; - } - - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { - // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { - return true; - } - ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; - ScalarInternal node_max_bits = ~node_min_bits; - - for (dimension_t i = 0; i < DIM; ++i) { - if ((prefix[i] | node_max_bits) < min_internal_[i] || - (prefix[i] & node_min_bits) > max_internal_[i]) { - return false; - } - } - return true; - } - - private: - const KeyExternal min_external_; - const KeyExternal max_external_; - const KeyInternal min_internal_; - const KeyInternal max_internal_; - const CONVERTER converter_; -}; - -/* - * The sphere filter can be used to query a point tree for a sphere. - */ -template < - typename CONVERTER = ConverterIEEE<3>, - typename DISTANCE = DistanceEuclidean> -class FilterSphere { - using KeyExternal = typename CONVERTER::KeyExternal; - using KeyInternal = typename CONVERTER::KeyInternal; - using ScalarInternal = typename CONVERTER::ScalarInternal; - using ScalarExternal = typename CONVERTER::ScalarExternal; - - static constexpr auto DIM = CONVERTER::DimInternal; - - public: - FilterSphere( - const KeyExternal& center, - const ScalarExternal& radius, - CONVERTER converter = CONVERTER(), - DISTANCE distance_function = DISTANCE()) - : center_external_{center} - , center_internal_{converter.pre(center)} - , radius_{radius} - , converter_{converter} - , distance_function_{distance_function} {}; - - template - [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { - KeyExternal point = converter_.post(key); - return distance_function_(center_external_, point) <= radius_; - } - - /* - * Calculate whether AABB encompassing all possible points in the node intersects with the - * sphere. - */ - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { - // we always want to traverse the root node (bits_to_ignore == 64) - - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { - return true; - } - - ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; - ScalarInternal node_max_bits = ~node_min_bits; - - KeyInternal closest_in_bounds; - for (dimension_t i = 0; i < DIM; ++i) { - // calculate lower and upper bound for dimension for given node - ScalarInternal lo = prefix[i] & node_min_bits; - ScalarInternal hi = prefix[i] | node_max_bits; - - // choose value closest to center for dimension - closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); - } - - KeyExternal closest_point = converter_.post(closest_in_bounds); - return distance_function_(center_external_, closest_point) <= radius_; - } - - private: - const KeyExternal center_external_; - const KeyExternal center_internal_; - const ScalarExternal radius_; - const CONVERTER converter_; - const DISTANCE distance_function_; -}; - -} // namespace improbable::phtree - -#endif // PHTREE_COMMON_FILTERS_H diff --git a/phtree/common/flat_array_map.h b/phtree/common/flat_array_map.h deleted file mode 100644 index 4171a3a7..00000000 --- a/phtree/common/flat_array_map.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_COMMON_FLAT_ARRAY_MAP_H -#define PHTREE_COMMON_FLAT_ARRAY_MAP_H - -#include "bits.h" -#include -#include -#include - -/* - * PLEASE do not include this file directly, it is included via common.h. - * - * This file contains the array_map implementation, which is used in low-dimensional nodes in the - * PH-Tree. - */ -namespace improbable::phtree { - -namespace { -template -class PhFlatMapIterator; - -template -using PhFlatMapPair = std::pair; - -using bit_string_t = std::uint64_t; -constexpr bit_string_t U64_ONE = bit_string_t(1); -} // namespace - -/* - * The array_map is a flat map implementation that uses an array of SIZE=2^DIM. The key is - * effectively the position in the array. - * - * It has O(1) insertion/removal time complexity, but O(2^DIM) space complexity, so it is best used - * when DIM is low and/or the map is known to have a high fill ratio. - */ -template -class array_map { - friend PhFlatMapIterator; - static_assert(SIZE <= 64); // or else we need to adapt 'occupancy' - static_assert(SIZE > 0); - - public: - ~array_map() { - if (occupancy != 0) { - for (size_t i = 0; i < SIZE; ++i) { - if (occupied(i)) { - data(i).~pair(); - } - } - } - } - - [[nodiscard]] auto find(size_t index) const { - return occupied(index) ? PhFlatMapIterator{index, *this} : end(); - } - - [[nodiscard]] auto lower_bound(size_t index) const { - size_t index2 = lower_bound_index(index); - if (index2 < SIZE) { - return PhFlatMapIterator{index2, *this}; - } - return end(); - } - - [[nodiscard]] auto begin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; - } - - [[nodiscard]] auto cbegin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; - } - - [[nodiscard]] auto end() const { - return PhFlatMapIterator{SIZE, *this}; - } - - template - auto emplace(Args&&... args) { - return try_emplace_base(std::forward(args)...); - } - - template - auto try_emplace(size_t index, Args&&... args) { - return try_emplace_base(index, std::forward(args)...); - } - - bool erase(size_t index) { - if (occupied(index)) { - data(index).~pair(); - occupied(index, false); - return true; - } - return false; - } - - bool erase(PhFlatMapIterator& iterator) { - return erase(iterator.first); - } - - [[nodiscard]] size_t size() const { - return std::bitset<64>(occupancy).count(); - } - - private: - template - std::pair*, bool> try_emplace_base(size_t index, Args&&... args) { - if (!occupied(index)) { - new (reinterpret_cast(&data_[index])) PhFlatMapPair( - std::piecewise_construct, - std::forward_as_tuple(index), - std::forward_as_tuple(std::forward(args)...)); - occupied(index, true); - return {&data(index), true}; - } - return {&data(index), false}; - } - - /* - * This returns the element at the given index, which is _not_ the n'th element (for n = index). - */ - PhFlatMapPair& data(size_t index) { - assert(occupied(index)); - return *std::launder(reinterpret_cast*>(&data_[index])); - } - - const PhFlatMapPair& data(size_t index) const { - assert(occupied(index)); - return *std::launder(reinterpret_cast*>(&data_[index])); - } - - [[nodiscard]] size_t lower_bound_index(size_t index) const { - assert(index < SIZE); - size_t num_zeros = CountTrailingZeros(occupancy >> index); - // num_zeros may be equal to SIZE if no bits remain - return std::min(SIZE, index + num_zeros); - } - - void occupied(size_t index, bool flag) { - (void)flag; - assert(index < SIZE); - assert(occupied(index) != flag); - // flip the bit - occupancy ^= (U64_ONE << index); - assert(occupied(index) == flag); - } - - [[nodiscard]] bool occupied(size_t index) const { - return (occupancy >> index) & U64_ONE; - } - - bit_string_t occupancy = 0; - // We use an untyped array to avoid implicit calls to constructors and destructors of entries. - std::aligned_storage_t), alignof(PhFlatMapPair)> data_[SIZE]; -}; - -namespace { -template -class PhFlatMapIterator { - friend array_map; - - public: - PhFlatMapIterator() : first{0}, map_{nullptr} {}; - - explicit PhFlatMapIterator(size_t index, const array_map& map) - : first{index}, map_{&map} { - assert(index <= SIZE); - } - - auto& operator*() const { - assert(first < SIZE && map_->occupied(first)); - return const_cast&>(map_->data(first)); - } - - auto* operator->() const { - assert(first < SIZE && map_->occupied(first)); - return const_cast*>(&map_->data(first)); - } - - auto& operator++() { - first = (first + 1) >= SIZE ? SIZE : map_->lower_bound_index(first + 1); - return *this; - } - - auto operator++(int) { - PhFlatMapIterator iterator(first, *map_); - ++(*this); - return iterator; - } - - friend bool operator==( - const PhFlatMapIterator& left, const PhFlatMapIterator& right) { - return left.first == right.first; - } - - friend bool operator!=( - const PhFlatMapIterator& left, const PhFlatMapIterator& right) { - return !(left == right); - } - - private: - size_t first; - const array_map* map_; -}; - -} // namespace -} // namespace improbable::phtree - -#endif // PHTREE_COMMON_FLAT_ARRAY_MAP_H diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc deleted file mode 100644 index f5470190..00000000 --- a/phtree/phtree_d_test_filter.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPointD; - -template -using TestTree = PhTreeD; - -class DoubleRng { - public: - DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} - - double next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_real_distribution rnd; -}; - -template -void generateCube(std::vector>& points, size_t N) { - DoubleRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void populate(TestTree& tree, std::vector>& points, size_t N) { - generateCube(points, N); - for (size_t i = 0; i < N; i++) { - ASSERT_TRUE(tree.insert(points[i], i).second); - } - ASSERT_EQ(N, tree.size()); -} diff --git a/phtree/phtree_test_unique_ptr_values.cc b/phtree/phtree_test_unique_ptr_values.cc deleted file mode 100644 index 7d7d6716..00000000 --- a/phtree/phtree_test_unique_ptr_values.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPoint; - -template -using TestTree = PhTree; - -class IntRng { - public: - IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} - - int next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_int_distribution rnd; -}; - -struct IdObj { - IdObj() = default; - - explicit IdObj(const size_t i) : _i(static_cast(i)){}; - - bool operator==(IdObj& rhs) { - return _i == rhs._i; - } - - IdObj& operator=(IdObj const& rhs) = default; - - int _i; -}; - -using Id = std::unique_ptr; - -struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} - - double _distance; - size_t _id; -}; - -bool comparePointDistance(PointDistance& i1, PointDistance& i2) { - return (i1._distance < i2._distance); -} - -template -double distance(const TestPoint& p1, const TestPoint& p2) { - double sum2 = 0; - for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; - sum2 += d * d; - } - return sqrt(sum2); -} - -template -double distanceL1(const TestPoint& p1, const TestPoint& p2) { - double sum = 0; - for (dimension_t i = 0; i < DIM; i++) { - sum += std::abs(p1[i] - p2[i]); - } - return sum; -} - -template -void generateCube(std::vector>& points, size_t N) { - IntRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void SmokeTestBasicOps(size_t N) { - TestTree tree; - std::vector> points; - generateCube(points, N); - - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - - if (i % 2 == 0) { - ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); - } else { - Id id = std::make_unique(i); - ASSERT_TRUE(tree.emplace(p, std::move(id)).second); - } - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - - // try add again - ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - ASSERT_FALSE(tree.empty()); - } - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - auto q = tree.begin_query({p, p}); - ASSERT_NE(q, tree.end()); - ASSERT_EQ(i, (*q)->_i); - q++; - ASSERT_EQ(q, tree.end()); - } - - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_NE(tree.find(p), tree.end()); - ASSERT_EQ(tree.count(p), 1); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(1, tree.erase(p)); - - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - - // try remove again - ASSERT_EQ(0, tree.erase(p)); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - if (i < N - 1) { - ASSERT_FALSE(tree.empty()); - } - } - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); -} - -TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { - SmokeTestBasicOps<3>(10000); - SmokeTestBasicOps<6>(10000); - SmokeTestBasicOps<10>(1000); - SmokeTestBasicOps<20>(100); -} diff --git a/phtree/testing/BUILD b/phtree/testing/BUILD deleted file mode 100644 index 2aed744c..00000000 --- a/phtree/testing/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -cc_library( - name = "testing", - testonly = True, - srcs = [ - ], - hdrs = [ - ], - visibility = [ - ], - deps = [ - ], -) diff --git a/phtree/testing/gtest_main/BUILD b/phtree/testing/gtest_main/BUILD deleted file mode 100644 index 0d591976..00000000 --- a/phtree/testing/gtest_main/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -cc_library( - name = "gtest_main", - testonly = True, - srcs = ["gtest_main.cc"], - visibility = [ - "//visibility:public", - ], - deps = [ - "@gtest", - ], - alwayslink = 1, -) diff --git a/phtree/testing/gtest_main/gtest_main.cc b/phtree/testing/gtest_main/gtest_main.cc deleted file mode 100644 index 1e11ab41..00000000 --- a/phtree/testing/gtest_main/gtest_main.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -int main(int argc, char** argv) { - testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt deleted file mode 100644 index 1aa65630..00000000 --- a/phtree/v16/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -cmake_minimum_required(VERSION 3.14) - -target_sources(phtree - PRIVATE - debug_helper_v16.h - node.h - entry.h - iterator_base.h - iterator_full.h - iterator_hc.h - iterator_knn_hs.h - iterator_simple.h - phtree_v16.h - ) diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h deleted file mode 100644 index 1c8610fc..00000000 --- a/phtree/v16/entry.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_ENTRY_H -#define PHTREE_V16_ENTRY_H - -#include "../../phtree/common/common.h" -#include "node.h" -#include -#include -#include - -namespace improbable::phtree::v16 { - -template -class Node; - -/* - * Nodes in the PH-Tree contain up to 2^DIM PhEntries, one in each geometric quadrant. - * PhEntries can contain two types of data: - * - A key/value pair (value of type T) - * - A prefix/child-node pair, where prefix is the prefix of the child node and the - * child node is contained in a unique_ptr. - */ -template -class Entry { - using KeyT = PhPoint; - using ValueT = std::remove_const_t; - using NodeT = Node; - - public: - /* - * Construct entry with existing node. - */ - Entry(const KeyT& k, std::unique_ptr&& node_ptr) - : kd_key_{k}, node_{std::move(node_ptr)}, value_{std::nullopt} {} - - /* - * Construct entry with a new node. - */ - Entry(bit_width_t infix_len, bit_width_t postfix_len) - : kd_key_(), node_{std::make_unique(infix_len, postfix_len)}, value_{std::nullopt} {} - - /* - * Construct entry with existing T. - */ - Entry(const KeyT& k, std::optional&& value) - : kd_key_{k}, node_{nullptr}, value_{std::move(value)} {} - - /* - * Construct entry with new T or moved T. - */ - template - explicit Entry(const KeyT& k, Args&&... args) - : kd_key_{k}, node_{nullptr}, value_{std::in_place, std::forward(args)...} {} - - [[nodiscard]] const KeyT& GetKey() const { - return kd_key_; - } - - [[nodiscard]] bool IsValue() const { - return value_.has_value(); - } - - [[nodiscard]] bool IsNode() const { - return node_.get() != nullptr; - } - - [[nodiscard]] T& GetValue() const { - assert(IsValue()); - return const_cast(*value_); - } - - [[nodiscard]] NodeT& GetNode() const { - assert(IsNode()); - return *node_; - } - - void SetNode(std::unique_ptr&& node) { - assert(!IsNode()); - node_ = std::move(node); - value_.reset(); - } - - [[nodiscard]] std::optional&& ExtractValue() { - assert(IsValue()); - return std::move(value_); - } - - [[nodiscard]] std::unique_ptr&& ExtractNode() { - assert(IsNode()); - return std::move(node_); - } - - void ReplaceNodeWithDataFromEntry(Entry&& other) { - assert(IsNode()); - kd_key_ = other.GetKey(); - - if (other.IsNode()) { - node_ = std::move(other.node_); - } else { - value_ = std::move(other.value_); - node_.reset(); - } - } - - private: - KeyT kd_key_; - std::unique_ptr node_; - std::optional value_; -}; -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h deleted file mode 100644 index 50ac8708..00000000 --- a/phtree/v16/iterator_base.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_ITERATOR_BASE_H -#define PHTREE_V16_ITERATOR_BASE_H - -#include "../common/common.h" -#include "entry.h" - -namespace improbable::phtree::v16 { - -template -class PhTreeV16; - -/* - * Base class for all PH-Tree iterators. - */ -template -class IteratorBase { - protected: - static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyInternal = typename CONVERT::KeyInternal; - using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = Entry; - friend PhTreeV16; - - public: - explicit IteratorBase(const CONVERT& converter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_{FILTER()} {} - - explicit IteratorBase(const CONVERT& converter, FILTER filter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_(std::move(filter)) {} - - T& operator*() const { - assert(current_result_); - return current_result_->GetValue(); - } - - T* operator->() const { - assert(current_result_); - return ¤t_result_->GetValue(); - } - - template - friend bool operator==( - const IteratorBase& left, - const IteratorBase& right) { - // Note: The following compares pointers to Entry objects so it should be - // a) fast (i.e. not comparing contents of entries) - // b) return `false` when comparing apparently identical entries from different PH-Trees (as - // intended) - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_result_ == right.GetCurrentResult()); - } - - template - friend bool operator!=( - const IteratorBase& left, - const IteratorBase& right) { - return !(left == right); - } - - auto first() const { - return converter_.post(current_result_->GetKey()); - } - - T& second() const { - return current_result_->GetValue(); - } - - [[nodiscard]] bool Finished() const { - return is_finished_; - } - - const EntryT* GetCurrentResult() const { - return current_result_; - } - - protected: - void SetFinished() { - is_finished_ = true; - current_result_ = nullptr; - } - - [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { - return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); - } - - void SetCurrentResult(const EntryT* current_result) { - current_result_ = current_result; - } - - void SetCurrentNodeEntry(const EntryT* current_node) { - assert(!current_node || current_node->IsNode()); - current_node_ = current_node; - } - - void SetParentNodeEntry(const EntryT* parent_node) { - assert(!parent_node || parent_node->IsNode()); - parent_node_ = parent_node; - } - - auto post(const KeyInternal& point) { - return converter_.post(point); - } - - private: - /* - * The parent entry contains the parent node. The parent node is the node ABOVE the current node - * which contains the current entry. - */ - const EntryT* GetCurrentNodeEntry() const { - return current_node_; - } - - const EntryT* GetParentNodeEntry() const { - return parent_node_; - } - - const EntryT* current_result_; - const EntryT* current_node_; - const EntryT* parent_node_; - bool is_finished_; - const CONVERT& converter_; - FILTER filter_; -}; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_ITERATOR_BASE_H diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h deleted file mode 100644 index 103b7870..00000000 --- a/phtree/v16/phtree_v16.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_PHTREE_V16_H -#define PHTREE_V16_PHTREE_V16_H - -#include "debug_helper_v16.h" -#include "for_each.h" -#include "for_each_hc.h" -#include "iterator_full.h" -#include "iterator_hc.h" -#include "iterator_knn_hs.h" -#include "iterator_simple.h" -#include "node.h" - -namespace improbable::phtree::v16 { - -/* - * The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each - * dimension is (by default) indexed by a 64 bit integer. The index ordered follows z-order / Morton - * order. The index is effectively a 'map', i.e. each key is associated with at most one value. - * - * Keys are points in n-dimensional space. - * - * This API behaves similar to std::map, see function descriptions for details. - * - * Loosely based on PH-Tree Java, V16, see http://www.phtree.org - * - * See also : - * - T. Zaeschke, C. Zimmerli, M.C. Norrie: - * "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) - * - T. Zaeschke: "The PH-Tree Revisited", (2015) - * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). - * - * @tparam T Value type. - * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. - * @tparam CONVERT A converter class with a 'pre()' and a 'post()' function. 'pre()' translates - * external KEYs into the internal PhPoint type. 'post()' - * translates the PhPoint back to the external KEY type. - */ -template > -class PhTreeV16 { - friend PhTreeDebugHelper; - using ScalarExternal = typename CONVERT::ScalarExternal; - using ScalarInternal = typename CONVERT::ScalarInternal; - using KeyT = typename CONVERT::KeyInternal; - using NodeT = Node; - using EntryT = Entry; - - public: - static_assert(!std::is_reference::value, "Reference type value are not supported."); - static_assert(std::is_signed::value, "ScalarInternal must be a signed type"); - static_assert( - std::is_integral::value, "ScalarInternal must be an integral type"); - static_assert( - std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); - static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); - - PhTreeV16(CONVERT& converter = ConverterNoOp()) - : num_entries_{0} - , root_{0, MAX_BIT_WIDTH - 1} - , the_end_{converter} - , converter_{converter} {} - - /* - * Attempts to build and insert a key and a value into the tree. - * - * @param key The key for the new entry. - * - * @param args Arguments used to generate a new value. - * - * @return A pair, whose first element points to the possibly inserted pair, - * and whose second element is a bool that is true if the pair was actually inserted. - * - * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is - * effectively a map, so if an entry with the same key was already in the tree, returns that - * entry instead of inserting a new one. - */ - template - std::pair emplace(const KeyT& key, Args&&... args) { - auto* current_entry = &root_; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); - } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; - } - - /* - * The emplace_hint() method uses an iterator as hint for insertion. - * The hint is ignored if it is not useful or is equal to end(). - * - * Iterators should normally not be used after the tree has been modified. As an exception to - * this rule, an iterator can be used as hint if it was previously used with at most one call - * to erase() and if no other modifications occurred. - * The following is valid: - * - * // Move value from key1 to key2 - * auto iter = tree.find(key1); - * auto value = iter.second(); // The value may become invalid in erase() - * erase(iter); - * emplace_hint(iter, key2, value); // the iterator can still be used as hint here - */ - template - std::pair emplace_hint(const ITERATOR& iterator, const KeyT& key, Args&&... args) { - // This function can be used to insert a value close to a known value - // or close to a recently removed value. The hint can only be used if the new key is - // inside one of the nodes provided by the hint iterator. - // The idea behind using the 'parent' is twofold: - // - The 'parent' node is one level above the iterator position, it therefore is spatially - // larger and has a better probability of containing the new position, allowing for - // fast track emplace. - // - Using 'parent' allows a scenario where the iterator was previously used with - // erase(iterator). This is safe because erase() will never erase the 'parent' node. - - if (!iterator.GetParentNodeEntry()) { - // No hint available, use standard emplace() - return emplace(key, std::forward(args)...); - } - - auto* parent_entry = iterator.GetParentNodeEntry(); - if (NumberOfDivergingBits(key, parent_entry->GetKey()) > - parent_entry->GetNode().GetPostfixLen() + 1) { - // replace higher up in the tree - return emplace(key, std::forward(args)...); - } - - // replace in node - auto* current_entry = parent_entry; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); - } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; - } - - /* - * See std::map::insert(). - * - * @return a pair consisting of the inserted element (or to the element that prevented the - * insertion) and a bool denoting whether the insertion took place. - */ - std::pair insert(const KeyT& key, const T& value) { - return emplace(key, value); - } - - /* - * @return the value stored at position 'key'. If no such value exists, one is added to the tree - * and returned. - */ - T& operator[](const KeyT& key) { - return emplace(key).first; - } - - /* - * Analogous to map:count(). - * - * @return '1', if a value is associated with the provided key, otherwise '0'. - */ - size_t count(const KeyT& key) const { - if (empty()) { - return 0; - } - auto* current_entry = &root_; - while (current_entry && current_entry->IsNode()) { - current_entry = current_entry->GetNode().Find(key); - } - return current_entry ? 1 : 0; - } - - /* - * Analogous to map:find(). - * - * Get an entry associated with a k dimensional key. - * @param key the key to look up - * @return an iterator that points either to the associated value or to {@code end()} if the key - * was found - */ - auto find(const KeyT& key) const { - if (empty()) { - return IteratorSimple(converter_); - } - - const EntryT* current_entry = &root_; - const EntryT* current_node = nullptr; - const EntryT* parent_node = nullptr; - while (current_entry && current_entry->IsNode()) { - parent_node = current_node; - current_node = current_entry; - current_entry = current_entry->GetNode().Find(key); - } - - return IteratorSimple(current_entry, current_node, parent_node, converter_); - } - - /* - * See std::map::erase(). Removes any value associated with the provided key. - * - * @return '1' if a value was found, otherwise '0'. - */ - size_t erase(const KeyT& key) { - auto* current_node = &root_.GetNode(); - NodeT* parent_node = nullptr; - bool found = false; - while (current_node) { - auto* child_node = current_node->Erase(key, parent_node, found); - parent_node = current_node; - current_node = child_node; - } - num_entries_ -= found; - return found; - } - - /* - * See std::map::erase(). Removes any value at the given iterator location. - * - * - * - * WARNING - * While this is guaranteed to work correctly, only iterators returned from find() - * will result in erase(iterator) being faster than erase(key). - * Iterators returned from other functions may be optimized in a future version. - * - * @return '1' if a value was found, otherwise '0'. - */ - template - size_t erase(const ITERATOR& iterator) { - if (iterator.Finished()) { - return 0; - } - if (!iterator.GetParentNodeEntry()) { - // Why may there be no parent? - // - we are in the root node - // - the iterator did not set this value - // In either case, we need to start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); - } - bool found = false; - assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); - iterator.GetCurrentNodeEntry()->GetNode().Erase( - iterator.GetCurrentResult()->GetKey(), - &iterator.GetParentNodeEntry()->GetNode(), - found); - - num_entries_ -= found; - return found; - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'FilterNoOp'. - * - * @param callback The callback function to be called for every entry that matches the query. - * The callback requires the following signature: callback(const PhPointD &, const T &) - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - ForEach(converter_, callback, filter).run(root_); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param query_box The query window. - * @param callback The callback function to be called for every entry that matches the query. - * The callback requires the following signature: callback(const PhPoint &, const T &) - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - */ - template - void for_each( - const PhBox& query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER()) const { - ForEachHC( - query_box.min(), query_box.max(), converter_, callback, filter) - .run(root_); - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'FilterNoOp'. - * - * @return an iterator over all (filtered) entries in the tree, - */ - template - auto begin(FILTER filter = FILTER()) const { - return IteratorFull(root_, converter_, filter); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param query_box The query window. - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - * @return Result iterator. - */ - template - auto begin_query(const PhBox& query_box, FILTER filter = FILTER()) const { - return IteratorHC( - root_, query_box.min(), query_box.max(), converter_, filter); - } - - /* - * Locate nearest neighbors for a given point in space. - * - * Example for distance function: auto fn = DistanceEuclidean - * auto iter = tree.begin_knn_query> - * - * @param min_results number of entries to be returned. More entries may or may not be returned - * when several entries have the same distance. - * @param center center point - * @param distance_function optional distance function, defaults to euclidean distance - * @param filter optional filter predicate that excludes nodes/entries before their distance is - * calculated. - * @return Result iterator. - */ - template - auto begin_knn_query( - size_t min_results, - const KeyT& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { - return IteratorKnnHS( - root_, min_results, center, converter_, distance_function, filter); - } - - /* - * @return An iterator representing the tree's 'end'. - */ - const auto& end() const { - return the_end_; - } - - /* - * Remove all entries from the tree. - */ - void clear() { - num_entries_ = 0; - root_ = EntryT(0, MAX_BIT_WIDTH - 1); - } - - /* - * @return the number of entries (key/value pairs) in the tree. - */ - [[nodiscard]] size_t size() const { - return num_entries_; - } - - /* - * @return 'true' if the tree is empty, otherwise 'false'. - */ - [[nodiscard]] bool empty() const { - return num_entries_ == 0; - } - - private: - /* - * This function is only for debugging. - */ - auto GetDebugHelper() const { - return DebugHelperV16(root_.GetNode(), num_entries_); - } - - private: - size_t num_entries_; - // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node - // that is allowed to have less than two entries. - EntryT root_; - IteratorEnd the_end_; - CONVERT converter_; -}; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_PHTREE_V16_H diff --git a/phtree/BUILD b/test/BUILD similarity index 50% rename from phtree/BUILD rename to test/BUILD index fe48ccc8..0d8d0d7f 100644 --- a/phtree/BUILD +++ b/test/BUILD @@ -1,19 +1,41 @@ package(default_visibility = ["//visibility:private"]) -cc_library( - name = "phtree", +cc_test( + name = "converter_test", + timeout = "long", srcs = [ + "converter_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], - hdrs = [ - "phtree.h", - "phtree_multimap.h", +) + +cc_test( + name = "distance_test", + timeout = "long", + srcs = [ + "distance_test.cc", ], linkstatic = True, - visibility = [ - "//visibility:public", + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "filter_test", + timeout = "long", + srcs = [ + "filter_test.cc", ], + linkstatic = True, deps = [ - "//phtree/v16", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -25,8 +47,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -38,8 +60,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -51,8 +73,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -64,21 +86,21 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "phtree_multimap_test_move_only_values", + name = "phtree_multimap_d_test_unique_ptr_values", timeout = "long", srcs = [ - "phtree_test_unique_ptr_values.cc", + "phtree_multimap_d_test_unique_ptr_values.cc", ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -90,8 +112,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -103,8 +125,60 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test_filter", + timeout = "long", + srcs = [ + "phtree_box_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_filter", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -116,8 +190,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -129,8 +203,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -142,8 +216,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -155,8 +229,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -168,8 +242,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -181,8 +255,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -194,8 +268,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -207,7 +281,20 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_test_issues", + timeout = "long", + srcs = [ + "phtree_test_issues.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..e687a839 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,100 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-tests LANGUAGES CXX) + +include(FetchContent) +include(common/scripts.cmake) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +if (MSVC) + # Avoids LNK2038 Error with MSVC + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +endif () +FetchContent_MakeAvailable(googletest) + +# The next line is optional, but keeps your CACHE cleaner: +mark_as_advanced( + BUILD_GTEST BUILD_SHARED_LIBS + gtest_build_samples gtest_build_tests + gtest_disable_pthreads gtest_force_shared_crt gtest_hide_internal_symbols +) + +# If you are interested in keeping IDEs that support folders clean, I would also add these lines: +set_target_properties(gtest PROPERTIES FOLDER extern) +set_target_properties(gtest_main PROPERTIES FOLDER extern) + +#include(GoogleTest) +#gtest_discover_tests(all_tests_driver) + +if (PHTREE_CODE_COVERAGE) + package_add_test_main(all_tests + all_tests.cc + converter_test.cc + distance_test.cc + filter_test.cc + phtree_test.cc + phtree_test_const_values.cc + phtree_test_issues.cc + phtree_test_ptr_values.cc + phtree_test_unique_ptr_values.cc + phtree_f_test.cc + phtree_d_test.cc + phtree_d_test_copy_move.cc + phtree_d_test_custom_key.cc + phtree_d_test_filter.cc + phtree_d_test_preprocessor.cc + phtree_box_f_test.cc + phtree_box_d_test.cc + phtree_box_d_test_filter.cc + phtree_box_d_test_query_types.cc + phtree_multimap_d_test.cc + phtree_multimap_d_test_copy_move.cc + phtree_multimap_d_test_filter.cc + phtree_multimap_d_test_unique_ptr_values.cc + phtree_multimap_box_d_test.cc + common/b_plus_tree_hash_map_test.cc + common/b_plus_tree_map_test.cc + common/base_types_test.cc + common/bits_test.cc + common/common_test.cc + common/flat_array_map_test.cc + common/flat_sparse_map_test.cc) + target_compile_definitions(all_tests PUBLIC SKIP_TEST_MEMORY_LEAKS=ON) +else () + package_add_test(phtree_test phtree_test.cc) + package_add_test(phtree_test_const_values phtree_test_const_values.cc) + package_add_test(phtree_test_issues phtree_test_issues.cc) + target_compile_definitions(phtree_test_issues PUBLIC SKIP_TEST_MEMORY_LEAKS=ON) + package_add_test(phtree_test_ptr_values phtree_test_ptr_values.cc) + package_add_test(phtree_test_unique_ptr_values phtree_test_unique_ptr_values.cc) + + package_add_test(phtree_f_test phtree_f_test.cc) + + package_add_test(phtree_d_test phtree_d_test.cc) + package_add_test(phtree_d_test_copy_move phtree_d_test_copy_move.cc) + package_add_test(phtree_d_test_custom_key phtree_d_test_custom_key.cc) + package_add_test(phtree_d_test_filter phtree_d_test_filter.cc) + package_add_test(phtree_d_test_preprocessor phtree_d_test_preprocessor.cc) + + package_add_test(phtree_box_f_test phtree_box_f_test.cc) + + package_add_test(phtree_box_d_test phtree_box_d_test.cc) + package_add_test(phtree_box_d_test_filter phtree_box_d_test_filter.cc) + package_add_test(phtree_box_d_test_query_types phtree_box_d_test_query_types.cc) + + package_add_test(phtree_multimap_d_test phtree_multimap_d_test.cc) + package_add_test(phtree_multimap_d_test_copy_move phtree_multimap_d_test_copy_move.cc) + package_add_test(phtree_multimap_d_test_filter phtree_multimap_d_test_filter.cc) + package_add_test(phtree_multimap_d_test_unique_ptr_values phtree_multimap_d_test_unique_ptr_values.cc) + + package_add_test(phtree_multimap_box_d_test phtree_multimap_box_d_test.cc) + + package_add_test(converter_test converter_test.cc) + package_add_test(distance_test distance_test.cc) + package_add_test(filter_test filter_test.cc) + + add_subdirectory(common) +endif () diff --git a/test/all_tests.cc b/test/all_tests.cc new file mode 100644 index 00000000..ddc6dfc6 --- /dev/null +++ b/test/all_tests.cc @@ -0,0 +1,11 @@ +#include + +// #include "gtest/gtest.h" + +//#include "phtree_f_test.cc" +//#include "phtree_test.cc" + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/phtree/common/BUILD b/test/common/BUILD similarity index 53% rename from phtree/common/BUILD rename to test/common/BUILD index 7ef3b6bf..8a6a2eff 100644 --- a/phtree/common/BUILD +++ b/test/common/BUILD @@ -1,26 +1,5 @@ package(default_visibility = ["//visibility:private"]) -cc_library( - name = "common", - hdrs = [ - "base_types.h", - "bits.h", - "common.h", - "converter.h", - "debug_helper.h", - "distance.h", - "filter.h", - "flat_array_map.h", - "flat_sparse_map.h", - "tree_stats.h", - ], - visibility = [ - "//visibility:public", - ], - deps = [ - ], -) - cc_test( name = "base_types_test", timeout = "long", @@ -29,8 +8,8 @@ cc_test( ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -42,8 +21,8 @@ cc_test( ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -55,72 +34,72 @@ cc_test( ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "distance_test", + name = "flat_array_map_test", timeout = "long", srcs = [ - "distance_test.cc", + "flat_array_map_test.cc", ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "filter_test", + name = "b_plus_tree_hash_map_test", timeout = "long", srcs = [ - "filter_test.cc", + "b_plus_tree_hash_map_test.cc", ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "flat_array_map_test", + name = "b_plus_tree_map_test", timeout = "long", srcs = [ - "flat_array_map_test.cc", + "b_plus_tree_map_test.cc", ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "flat_sparse_map_test", + name = "b_plus_tree_multimap_test", timeout = "long", srcs = [ - "flat_sparse_map_test.cc", + "b_plus_tree_multimap_test.cc", ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "preprocessor_test", + name = "flat_sparse_map_test", timeout = "long", srcs = [ - "converter_test.cc", + "flat_sparse_map_test.cc", ], linkstatic = True, deps = [ - ":common", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt new file mode 100644 index 00000000..b802ac04 --- /dev/null +++ b/test/common/CMakeLists.txt @@ -0,0 +1,11 @@ +include(scripts.cmake) + +package_add_test(b_plus_tree_hash_map_test b_plus_tree_hash_map_test.cc) +package_add_test(b_plus_tree_map_test b_plus_tree_map_test.cc) +package_add_test(b_plus_tree_multimap_test b_plus_tree_multimap_test.cc) +package_add_test(base_types_test base_types_test.cc) +package_add_test(bits_test bits_test.cc) +package_add_test(common_test common_test.cc) + +package_add_test(flat_array_map_test flat_array_map_test.cc) +package_add_test(flat_sparse_map_test flat_sparse_map_test.cc) diff --git a/test/common/b_plus_tree_hash_map_test.cc b/test/common/b_plus_tree_hash_map_test.cc new file mode 100644 index 00000000..98c04a28 --- /dev/null +++ b/test/common/b_plus_tree_hash_map_test.cc @@ -0,0 +1,479 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_hash_map.h" +#include +#include +#include + +using namespace improbable::phtree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +struct Id { + Id() : _i{0} { + ++default_construct_count_; + } + + explicit Id(const size_t i) : _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(const int i) : _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + ~Id() { + ++destruct_count_; + } + + int _i; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i % 10); + } +}; +}; // namespace std + +template +void CheckMapResult(const R& result, END end, const K& key, const V& val) { + ASSERT_NE(result, end); + ASSERT_EQ(result->first, key); + ASSERT_EQ(result->second, val); +} + +template +void CheckMapResultPair(const R& result, bool expected_success, const K& key, const V& val) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(result.first->first, key); + ASSERT_EQ(result.first->second, val); +} + +template +void CheckSetResult(const R& result, END end, const K& key) { + ASSERT_NE(result, end); + ASSERT_EQ(*result, key); +} + +template +void CheckSetResultPair(const R& result, bool expected_success, const K& key) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(*result.first, key); +} + +template +void SmokeTestMap() { + const int N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + size_t val = 0; + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map; + std::unordered_map reference_map; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + auto iter_lb = test_map.lower_bound(id); + bool hasValLB = iter_lb != test_map.end() && iter_lb->first == id; + ASSERT_EQ(hasVal, hasValLB); + + if (!hasVal) { + if (key % 6 == 0) { + CheckMapResultPair(test_map.emplace(id, val), true, id, val); + CheckMapResultPair(test_map.emplace(id, val), false, id, val); + } else if (key % 6 == 1) { + CheckMapResultPair(test_map.try_emplace(id, val), true, id, val); + CheckMapResultPair(test_map.try_emplace(id, val), false, id, val); + } else if (key % 6 == 2) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 3) { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 4) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } + test_map._check(); + reference_map.emplace(id, val); + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& entry : reference_map) { + const Id& kRef = entry.first; + size_t vMap = test_map.find(kRef)->second; + ASSERT_EQ(vMap, entry.second); + ASSERT_TRUE(test_map.count(kRef)); + } + for (auto& entry : test_map) { + Id& k = entry.first; + size_t vRef = reference_map.find(k)->second; + size_t vMap = test_map.find(k)->second; + ASSERT_EQ(vMap, vRef); + } + ++val; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestNonUnique) { + SmokeTestMap>(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestMap(); +} + +template +void SmokeTestSet() { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_set test_map; + std::unordered_set reference_map; + for (int j = 0; j < N; j++) { + { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + auto iter_lb = test_map.lower_bound(id); + bool hasValLB = iter_lb != test_map.end() && *iter_lb == id; + ASSERT_EQ(hasVal, hasValLB); + + if (!hasVal) { + if (key % 3 == 0) { + CheckSetResultPair(test_map.emplace(id), true, id); + CheckSetResultPair(test_map.emplace(key), false, id); + } else if (key % 3 == 1) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } + test_map._check(); + reference_map.emplace(id); + } + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& id : reference_map) { + Id& idMap = *test_map.find(id); + ASSERT_EQ(idMap, id); + } + for (auto& id : test_map) { + const Id& vRef = *reference_map.find(id); + Id& vMap = *test_map.find(id); + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptHashSetTest, SmokeTestNonUnique) { + SmokeTestSet>(); +} + +TEST(PhTreeBptHashSetTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestSet(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithTryEmplace) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map, std::equal_to<>> test_map; + std::map reference_map; + for (int j = 0; j < N; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto entry : reference_map) { + size_t vRef = entry.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto entry : test_map) { + size_t v = entry.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +template +void SmokeTestWithErase(bool by_iterator) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + reference_map.emplace(id, key); + test_map.try_emplace(id, key); + key_list.emplace_back(key); + } + + int x = 0; + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + Id id(key); + // This may try to erase an entry that does not exist! + auto it = test_map.find(id); + if (it == test_map.end()) { + ASSERT_EQ(0u, reference_map.erase(id)); + continue; + } + if (by_iterator) { + auto next = it; + ++next; + auto is_last = next == test_map.end(); + auto next_val = is_last ? Id(-1) : next->first; + auto result = test_map.erase(it); + if (is_last) { + ASSERT_EQ(test_map.end(), result); + } else { + ASSERT_NE(test_map.end(), result); + ASSERT_EQ(next_val, result->first); + } + } else { + test_map.erase(id); + } + test_map._check(); + ASSERT_EQ(1u, reference_map.erase(id)); + for (auto& entry : reference_map) { + const Id& vRef = entry.first; + Id& vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Id& v = entry.first; + const Id& vRef = reference_map.find(v)->first; + Id& vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + ++x; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithErase) { + SmokeTestWithErase>(true); + SmokeTestWithErase>(false); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithEraseSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestWithErase(true); + SmokeTestWithErase(false); +} + +template +void test_tree(TREE& tree) { + using Key = size_t; + using Value = size_t; + Key p{42}; + + // test various operations + tree.emplace(p, Value{2}); + Value id3{3}; + tree.emplace(p, id3); + ASSERT_EQ(tree.size(), 1u); + + auto q_extent = tree.begin(); + ASSERT_NE(q_extent, tree.end()); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + tree.erase(p); + ASSERT_EQ(0u, tree.size()); + tree.erase(p); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptHashMapTest, TestCopyConstruct) { + using TestTree = b_plus_tree_hash_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptHashMapTest, TestCopyAssign) { + using TestTree = b_plus_tree_hash_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptHashMapTest, TestMoveConstruct) { + using TestTree = b_plus_tree_hash_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptHashMapTest, TestMoveAssign) { + using TestTree = b_plus_tree_hash_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +TEST(PhTreeBptHashMapTest, TestMovableIterators) { + using Key = size_t; + using Value = size_t; + using TestTree = b_plus_tree_hash_map; + // Test edge case: only one entry in tree + Key p{42}; + auto tree = TestTree(); + tree.emplace(p, Value{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); +} diff --git a/test/common/b_plus_tree_map_test.cc b/test/common/b_plus_tree_map_test.cc new file mode 100644 index 00000000..8ae8eba9 --- /dev/null +++ b/test/common/b_plus_tree_map_test.cc @@ -0,0 +1,270 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_map.h" +#include +#include + +using namespace improbable::phtree; + +using KeyT = std::uint64_t; + +TEST(PhTreeBptMapTest, SmokeTest) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.emplace(val, val); + test_map._check(); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithTryEmplace) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithErase) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + key_list.emplace_back(val); + } + } + + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + if (key % 2 == 0) { + test_map.erase(key); + } else { + auto it = test_map.find(key); + ASSERT_NE(it, test_map.end()); + ASSERT_EQ(it->second, key); + test_map.erase(it); + } + test_map._check(); + reference_map.erase(key); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestLowerBound) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.lower_bound(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.lower_bound(v)->second; + ASSERT_EQ(vMap, vRef); + } + for (size_t v = 0; v < max_size + 5; ++v) { + auto itRef = reference_map.lower_bound(v); + auto itMap = test_map.lower_bound(v); + if (itRef == reference_map.end()) { + ASSERT_EQ(itMap, test_map.end()); + } else { + ASSERT_NE(itMap, test_map.end()); + // ASSERT_EQ(v, itRef->second); + ASSERT_EQ(itRef->second, itMap->second); + } + } + } + } +} + +template +void test_tree(TREE& tree) { + using Key = size_t; + using Value = size_t; + Key p{42}; + + // test various operations + tree.emplace(p, Value{2}); + Value id3{3}; + tree.emplace(p, id3); + ASSERT_EQ(tree.size(), 1u); + + auto q_extent = tree.begin(); + ASSERT_NE(q_extent, tree.end()); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + tree.erase(p); + ASSERT_EQ(0u, tree.size()); + tree.erase(p); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptMapTest, TestCopyConstruct) { + using TestTree = b_plus_tree_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMapTest, TestCopyAssign) { + using TestTree = b_plus_tree_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMapTest, TestMoveConstruct) { + using TestTree = b_plus_tree_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptMapTest, TestMoveAssign) { + using TestTree = b_plus_tree_map; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +TEST(PhTreeBptMapTest, TestMovableIterators) { + using Key = size_t; + using Value = size_t; + using TestTree = b_plus_tree_map; + // Test edge case: only one entry in tree + Key p{42}; + auto tree = TestTree(); + tree.emplace(p, Value{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); +} diff --git a/test/common/b_plus_tree_multimap_test.cc b/test/common/b_plus_tree_multimap_test.cc new file mode 100644 index 00000000..2725f4dc --- /dev/null +++ b/test/common/b_plus_tree_multimap_test.cc @@ -0,0 +1,539 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +template +void populate( + const size_t N, + b_plus_tree_multimap& test_map, + std::multimap& reference_map, + std::vector>& reverse_map, + std::default_random_engine& random_engine) { + std::uniform_int_distribution<> cube_distribution(0, (int)N / 2); + for (size_t j = 0; j < N; j++) { + Key key = cube_distribution(random_engine); + Value value = j; + bool hasVal = test_map.find(key) != test_map.end(); + bool hasValRef = reference_map.find(key) != reference_map.end(); + assert(hasVal == hasValRef); + reference_map.emplace(key, value); + test_map.try_emplace(key, value); + reverse_map.emplace_back(value, key); + } +} + +struct Id { + Id() : _i{0} { + ++default_construct_count_; + } + + explicit Id(const size_t i) : _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(const int i) : _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + ~Id() { + ++destruct_count_; + } + + int _i; +}; + +template +void CheckMapResult(const R& result, END end, const K& key, const V& val) { + ASSERT_NE(result, end); + ASSERT_EQ(result->first, key); + ASSERT_EQ(result->second, val); +} + +void SmokeTestMap() { + const int N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + using Key = size_t; + using Value = Id; + for (int i = 0; i < 10; i++) { + b_plus_tree_multimap test_map; + std::unordered_multimap reference_map{}; + std::vector> reverse_map; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + bool hasVal = test_map.find(key) != test_map.end(); + bool hasValRef = reference_map.find(key) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + + Value val{j}; + if (key % 6 == 0) { + CheckMapResult(test_map.emplace(key, val), test_map.end(), key, val); + } else if (key % 6 == 1) { + CheckMapResult(test_map.try_emplace(key, val), test_map.end(), key, val); + } else if (key % 6 == 2) { + // Leaf-hint of questionable quality + auto hint = test_map.find(key - 1); + CheckMapResult(test_map.try_emplace(hint, key, val), test_map.end(), key, val); + } else if (key % 6 == 3) { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.try_emplace(hint, key, val), test_map.end(), key, val); + } else if (key % 6 == 4) { + // Leaf-hint of questionable quality + auto hint = test_map.find(key - 1); + CheckMapResult(test_map.emplace_hint(hint, key, val), test_map.end(), key, val); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.emplace_hint(hint, key, val), test_map.end(), key, val); + } + test_map._check(); + reference_map.emplace(key, val); + reverse_map.emplace_back(val, key); + + ASSERT_EQ(test_map.size(), reference_map.size()); + ASSERT_EQ(test_map.size(), j + 1u); + + Key prev_key = 0; + for (auto& entry : test_map) { + auto& eMap = *test_map.find(entry.first); + ASSERT_EQ(entry.first, eMap.first); + ASSERT_LE(prev_key, eMap.first); + prev_key = eMap.first; + auto& eRef = reverse_map[eMap.second._i]; + ASSERT_EQ(eMap.second, eRef.first); + ASSERT_EQ(eMap.first, eRef.second); + } + } + } +} + +TEST(PhTreeBptMulitmapTest, SmokeTestNonUnique) { + SmokeTestMap(); +} + +TEST(PhTreeBptMulitmapTest, SmokeTestWithTryEmplace) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + using Key = size_t; + using Val = size_t; + for (int i = 0; i < 10; i++) { + b_plus_tree_multimap test_map; + std::map reference_map; + for (int j = 0; j < N; j++) { + Key key = cube_distribution(random_engine); + Val val = key; + bool hasVal = test_map.find(key) != test_map.end(); + bool hasValRef = reference_map.find(key) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(key, val); + test_map.try_emplace(key, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto entry : reference_map) { + size_t vRef = entry.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto entry : test_map) { + size_t v = entry.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMulitmapTest, SmokeTestWithEraseByKey) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + using Key = size_t; + using Value = size_t; + for (int i = 0; i < 10; i++) { + b_plus_tree_multimap test_map{}; + std::multimap reference_map{}; + std::vector> reverse_map{}; + populate(N, test_map, reference_map, reverse_map, random_engine); + + std::shuffle(reverse_map.begin(), reverse_map.end(), random_engine); + // We iterator over all entries even though every erase() may remove several entries. + // -> This also tests behavior for non-existing keys (that have already been removed). + for (auto reverse_pair : reverse_map) { + auto key = reverse_pair.second; + + auto result_test = test_map.erase(key); + auto result_ref = reference_map.erase(key); + assert(result_test == result_ref); + ASSERT_EQ(result_test, result_ref); + + test_map._check(); + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& entry : reference_map) { + const Key& vRef = entry.first; + Key vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Key v = entry.first; + const Key& vRef = reference_map.find(v)->first; + Key vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMulitmapTest, SmokeTestWithEraseByIterator) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + using Key = size_t; + using Value = size_t; + for (int i = 0; i < 10; i++) { + b_plus_tree_multimap test_map{}; + std::multimap reference_map{}; + std::vector> reverse_map{}; + populate(N, test_map, reference_map, reverse_map, random_engine); + + std::shuffle(reverse_map.begin(), reverse_map.end(), random_engine); + for (auto reverse_pair : reverse_map) { + auto key = reverse_pair.second; + auto val = reverse_pair.first; + + auto it = test_map.find(key); + while (it->second != val) { + ++it; + } + auto next = it; + ++next; + auto is_last = next == test_map.end(); + auto next_val = is_last ? -1 : next->first; + auto result = test_map.erase(it); + if (is_last) { + ASSERT_EQ(test_map.end(), result); + } else { + ASSERT_NE(test_map.end(), result); + ASSERT_EQ(next_val, result->first); + } + auto ref_iter = reference_map.find(key); + while (ref_iter != reference_map.end() && ref_iter->second != val) { + ++ref_iter; + } + reference_map.erase(ref_iter); + + test_map._check(); + for (auto& entry : reference_map) { + const Key& vRef = entry.first; + Key vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Key v = entry.first; + const Key& vRef = reference_map.find(v)->first; + Key vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +void SmokeTestWithErase(bool use_begin, bool use_end) { + const int N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution<> real_distribution(0, 1); + + using Key = size_t; + using Value = size_t; + for (int i = 0; i < 500; i++) { + b_plus_tree_multimap test_map{}; + std::multimap reference_map{}; + std::vector> key_list{}; + populate(N, test_map, reference_map, key_list, random_engine); + + // Pick some random keys + auto key1 = std::min(key_list[0].second, key_list[1].second); + auto key2 = std::max(key_list[0].second, key_list[1].second); + + auto it_test_1 = use_begin ? test_map.begin() : test_map.find(key1); + auto it_test_2 = use_end ? test_map.end() : test_map.find(key2); + auto it_ref_1 = use_begin ? reference_map.begin() : reference_map.find(key1); + auto it_ref_2 = use_end ? reference_map.end() : reference_map.find(key2); + auto result_test = test_map.erase(it_test_1, it_test_2); + auto result_ref = reference_map.erase(it_ref_1, it_ref_2); + + if (result_ref != reference_map.end()) { + ASSERT_EQ(result_test->first, result_ref->first); + } else { + ASSERT_EQ(result_test, test_map.end()); + } + + // check len: + size_t n = 0; + for (auto& e : test_map) { + (void)e; + ++n; + } + ASSERT_EQ(n, test_map.size()); + + test_map._check(); + auto it_test = test_map.begin(); + auto it_ref = reference_map.begin(); + while (it_test != test_map.end()) { + ASSERT_NE(it_ref, reference_map.end()); + auto& r = *it_ref; + auto& e = *it_test; + ASSERT_EQ(e.first, r.first); + // ASSERT_EQ(e.second, r.second); std::multi_map is inserttion ordered, b_p_t is not. + ++it_test; + ++it_ref; + } + ASSERT_EQ(it_test, test_map.end()); + ASSERT_EQ(it_ref, reference_map.end()); + } +} + +TEST(PhTreeBptMulitmapTest, SmokeTestWithEraseInterval) { + SmokeTestWithErase(false, false); + SmokeTestWithErase(false, true); + SmokeTestWithErase(true, false); + SmokeTestWithErase(true, true); +} + +TEST(PhTreeBptMulitmapTest, SmokeTestUpdateByIterator) { + // This tests repeated erase()/insert() + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 20); + + using Key = size_t; + using Value = size_t; + b_plus_tree_multimap test_map{}; + std::multimap reference_map{}; + std::vector> reverse_map{}; + populate(N, test_map, reference_map, reverse_map, random_engine); + for (int i = 0; i < 100; i++) { + std::shuffle(reverse_map.begin(), reverse_map.end(), random_engine); + for (auto& reverse_pair : reverse_map) { + auto key = reverse_pair.second; + auto val = reverse_pair.first; + + // reference map + auto ref_iter = reference_map.find(key); + while (ref_iter != reference_map.end() && ref_iter->second != val) { + ++ref_iter; + } + ASSERT_NE(ref_iter, reference_map.end()); + reference_map.erase(ref_iter); + + // tested map + auto it = test_map.find(key); + ASSERT_NE(it, test_map.end()); + while (it->second != val) { + ++it; + } + auto next = it; + // verify return value + ++next; + auto is_last = next == test_map.end(); + auto next_val = is_last ? -1 : next->first; + auto result = test_map.erase(it); + if (is_last) { + ASSERT_EQ(test_map.end(), result); + } else { + ASSERT_NE(test_map.end(), result); + ASSERT_EQ(next_val, result->first); + } + + test_map._check(); + + // insert again + reverse_pair.second = cube_distribution(random_engine); + test_map.emplace(reverse_pair.second, reverse_pair.first); + reference_map.emplace(reverse_pair.second, reverse_pair.first); + + test_map._check(); + + for (auto& entry : reference_map) { + const Key& vRef = entry.first; + Key vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Key v = entry.first; + const Key& vRef = reference_map.find(v)->first; + Key vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +template +void test_tree(TREE& tree) { + using Key = size_t; + using Value = size_t; + Key p{42}; + + // test various operations + tree.emplace(p, Value{2}); + Value id3{3}; + tree.emplace(p, id3); + ASSERT_EQ(tree.size(), 3u); + + auto q_extent = tree.begin(); + ASSERT_NE(q_extent, tree.end()); + ++q_extent; + ASSERT_NE(q_extent, tree.end()); + ++q_extent; + ASSERT_NE(q_extent, tree.end()); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + ASSERT_EQ(3u, tree.erase(p)); + ASSERT_EQ(0u, tree.size()); + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptMulitmapTest, TestCopyConstruct) { + using TestTree = b_plus_tree_multimap; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestCopyAssign) { + using TestTree = b_plus_tree_multimap; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestMoveConstruct) { + using TestTree = b_plus_tree_multimap; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptMulitmapTest, TestMoveAssign) { + using TestTree = b_plus_tree_multimap; + TestTree tree1; + tree1.emplace(42, 1); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +TEST(PhTreeBptMulitmapTest, TestMovableIterators) { + using Key = size_t; + using Value = size_t; + using TestTree = b_plus_tree_multimap; + // Test edge case: only one entry in tree + Key p{42}; + auto tree = TestTree(); + tree.emplace(p, Value{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); +} diff --git a/phtree/common/base_types_test.cc b/test/common/base_types_test.cc similarity index 96% rename from phtree/common/base_types_test.cc rename to test/common/base_types_test.cc index 04a45d6a..389dbf74 100644 --- a/phtree/common/base_types_test.cc +++ b/test/common/base_types_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "base_types.h" -#include +#include "phtree/common/base_types.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/bits_test.cc b/test/common/bits_test.cc similarity index 95% rename from phtree/common/bits_test.cc rename to test/common/bits_test.cc index e4129bf3..bc64c5cb 100644 --- a/phtree/common/bits_test.cc +++ b/test/common/bits_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "bits.h" -#include +#include "phtree/common/bits.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/common_test.cc b/test/common/common_test.cc similarity index 88% rename from phtree/common/common_test.cc rename to test/common/common_test.cc index 788c9fd2..eff99aad 100644 --- a/phtree/common/common_test.cc +++ b/test/common/common_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "common.h" -#include -#include +#include "phtree/common/common.h" +#include "phtree/converter.h" +#include using namespace improbable::phtree; @@ -30,26 +30,26 @@ TEST(PhTreeCommonTest, NumberOfDivergingBits) { scalar_64_t l_max = std::numeric_limits::max(); bit_width_t x = NumberOfDivergingBits(PhPoint<2>({l1, l1}), PhPoint<2>({l2, l2})); - ASSERT_EQ(64, x); + ASSERT_EQ(64u, x); x = NumberOfDivergingBits(PhPoint<2>({-1, -1}), PhPoint<2>({l_min, l_min})); - ASSERT_EQ(63, x); + ASSERT_EQ(63u, x); x = NumberOfDivergingBits(PhPoint<2>({1, 1}), PhPoint<2>({l_max, l_max})); - ASSERT_EQ(63, x); + ASSERT_EQ(63u, x); x = NumberOfDivergingBits(PhPoint<2>({l1, l2}), PhPoint<2>({l1, l2})); - ASSERT_EQ(0, x); + ASSERT_EQ(0u, x); // PhPointD{679.186, 519.897, 519.897} PhPoint<3> p1{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; // PhPointD{35.5375, 8.69049, 8.69049} PhPoint<3> p2{0x4041c4ce0e8a359e, 0x40216187a0776fd5, 0x40216187a0776fd5}; x = NumberOfDivergingBits(p1, p2); - ASSERT_EQ(56, x); + ASSERT_EQ(56u, x); // PhPointD{132.406, 219.74, 219.74} PhPoint<3> p20{0x40608cffffe5b480, 0x406b77aff096adc1, 0x406b77aff096adc1}; // PhPointD{679.186, 519.897, 519.897} PhPoint<3> p21{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; x = NumberOfDivergingBits(p20, p21); - ASSERT_EQ(56, x); + ASSERT_EQ(56u, x); } diff --git a/phtree/common/flat_array_map_test.cc b/test/common/flat_array_map_test.cc similarity index 94% rename from phtree/common/flat_array_map_test.cc rename to test/common/flat_array_map_test.cc index e0250820..a636da00 100644 --- a/phtree/common/flat_array_map_test.cc +++ b/test/common/flat_array_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_array_map.h" -#include +#include "phtree/common/flat_array_map.h" +#include #include using namespace improbable::phtree; @@ -27,7 +27,7 @@ TEST(PhTreeFlatArrayMapTest, SmokeTest) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - array_map test_map; + array_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine); @@ -61,7 +61,7 @@ TEST(PhTreeFlatArrayMapTest, SmokeTestWithTryEmplace) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - array_map test_map; + array_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine); @@ -91,7 +91,7 @@ TEST(PhTreeFlatArrayMapTest, SmokeTestWithTryEmplace) { TEST(PhTreeFlatArrayMapTest, IteratorPostIncrementTest) { const int num_entries = 3; - array_map test_map; + array_map test_map; for (int j = 0; j < num_entries; j++) { size_t val = j * 2; bool hasVal = test_map.find(val) != test_map.end(); diff --git a/phtree/common/flat_sparse_map_test.cc b/test/common/flat_sparse_map_test.cc similarity index 94% rename from phtree/common/flat_sparse_map_test.cc rename to test/common/flat_sparse_map_test.cc index dcb72bba..b4c3adc3 100644 --- a/phtree/common/flat_sparse_map_test.cc +++ b/test/common/flat_sparse_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_sparse_map.h" -#include +#include "phtree/common/flat_sparse_map.h" +#include #include using namespace improbable::phtree; @@ -27,7 +27,7 @@ TEST(PhTreeFlatSparseMapTest, SmokeTest) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - sparse_map test_map; + sparse_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine); @@ -61,7 +61,7 @@ TEST(PhTreeFlatSparseMapTest, SmokeTestWithTryEmplace) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - sparse_map test_map; + sparse_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine); diff --git a/test/common/scripts.cmake b/test/common/scripts.cmake new file mode 100644 index 00000000..f317c6f4 --- /dev/null +++ b/test/common/scripts.cmake @@ -0,0 +1,31 @@ +macro(package_add_test TESTNAME) + # create an executable in which the tests will be stored + add_executable(${TESTNAME} ${ARGN}) + # link the Google test infrastructure, mocking library, and a default main function to + # the test executable. Remove g_test_main if writing your own main function. + target_link_libraries(${TESTNAME} GTest::gtest_main phtree) + # gtest_discover_tests replaces gtest_add_tests, + # see https://cmake.org/cmake/help/v3.10/module/GoogleTest.html for more options to pass to it + gtest_discover_tests(${TESTNAME} + # set a working directory so your project root so that you can find test data via paths relative to the project root + WORKING_DIRECTORY ${PROJECT_DIR} + PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_DIR}" + ) + set_target_properties(${TESTNAME} PROPERTIES FOLDER test) +endmacro() + +macro(package_add_test_main TESTNAME) + # create an executable in which the tests will be stored + add_executable(${TESTNAME} ${ARGN}) + # link the Google test infrastructure, mocking library, and a default main function to + # the test executable. Remove g_test_main if writing your own main function. + target_link_libraries(${TESTNAME} gtest phtree) + # gtest_discover_tests replaces gtest_add_tests, + # see https://cmake.org/cmake/help/v3.10/module/GoogleTest.html for more options to pass to it + gtest_discover_tests(${TESTNAME} + # set a working directory so your project root so that you can find test data via paths relative to the project root + WORKING_DIRECTORY ${PROJECT_DIR} + PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_DIR}" + ) + set_target_properties(${TESTNAME} PROPERTIES FOLDER test) +endmacro() \ No newline at end of file diff --git a/phtree/common/converter_test.cc b/test/converter_test.cc similarity index 95% rename from phtree/common/converter_test.cc rename to test/converter_test.cc index c9ede115..667cd221 100644 --- a/phtree/common/converter_test.cc +++ b/test/converter_test.cc @@ -14,9 +14,8 @@ * limitations under the License. */ -#include "converter.h" -#include "common.h" -#include +#include "phtree/converter.h" +#include using namespace improbable::phtree; diff --git a/phtree/common/distance_test.cc b/test/distance_test.cc similarity index 95% rename from phtree/common/distance_test.cc rename to test/distance_test.cc index 0038285a..d0652934 100644 --- a/phtree/common/distance_test.cc +++ b/test/distance_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/distance.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/filter_test.cc b/test/filter_test.cc similarity index 50% rename from phtree/common/filter_test.cc rename to test/filter_test.cc index 41905421..abc712cd 100644 --- a/phtree/common/filter_test.cc +++ b/test/filter_test.cc @@ -14,14 +14,15 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/filter.h" +#include #include using namespace improbable::phtree; TEST(PhTreeFilterTest, FilterSphereTest) { - FilterSphere, DistanceEuclidean<2>> filter{{5, 3}, 5}; + ConverterNoOp<2, scalar_64_t> conv{}; + FilterSphere filter{{5, 3}, 5, conv, DistanceEuclidean<2>{}}; // root is always valid ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); // valid because node encompasses the circle @@ -44,8 +45,9 @@ TEST(PhTreeFilterTest, FilterSphereTest) { ASSERT_FALSE(filter.IsEntryValid({3, 8}, nullptr)); } -TEST(PhTreeFilterTest, BoxFilterTest) { - FilterAABB> filter{{3, 3}, {7, 7}}; +TEST(PhTreeFilterTest, FilterAABBTest) { + ConverterNoOp<2, scalar_64_t> conv{}; + FilterAABB filter{{3, 3}, {7, 7}, conv}; // root is always valid ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); // valid because node encompasses the AABB @@ -63,4 +65,62 @@ TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { auto filter = FilterNoOp(); ASSERT_TRUE(filter.IsNodeValid>({3, 7, 2}, 10)); ASSERT_TRUE(filter.IsEntryValid>({3, 7, 2}, 10)); -} \ No newline at end of file +} + +template +void TestAssignability() { + ASSERT_TRUE(std::is_copy_constructible_v); + ASSERT_TRUE(std::is_copy_assignable_v); + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); +} + +TEST(PhTreeFilterTest, FilterAssignableTest) { + using CONV = ConverterIEEE<3>; + using DIST = DistanceEuclidean<3>; + TestAssignability(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); +} + +TEST(PhTreeFilterTest, ConverterAssignableTest) { + TestAssignability>(); + TestAssignability(); +} + +class TestConverter : public ConverterMultiply<2, 1, 1> { + public: + TestConverter() = default; + + TestConverter(const TestConverter&) = delete; + TestConverter(TestConverter&&) = delete; + TestConverter& operator=(const TestConverter&) = delete; + TestConverter& operator=(TestConverter&&) = delete; +}; + +TEST(PhTreeFilterTest, ConstructFilterAABBTest) { + TestConverter conv; + FilterAABB filter1{{3, 3}, {7, 7}, conv}; + ASSERT_TRUE(filter1.IsNodeValid({0, 0}, 63)); + + FilterAABB filter2{{3, 3}, {7, 7}, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} + +TEST(PhTreeFilterTest, ConstructFilterSphereTest) { + DistanceL1<2> dist; + TestConverter conv; + FilterSphere filter1a{{3, 3}, 7, conv}; + ASSERT_TRUE(filter1a.IsNodeValid({0, 0}, 63)); + FilterSphere filter1b{{3, 3}, 7, conv, {}}; + ASSERT_TRUE(filter1b.IsNodeValid({0, 0}, 63)); + FilterSphere filter1c{{3, 3}, 7, conv, dist}; + ASSERT_TRUE(filter1c.IsNodeValid({0, 0}, 63)); + FilterSphere filter1d{{3, 3}, 7, conv, DistanceL1<2>{}}; + ASSERT_TRUE(filter1d.IsNodeValid({0, 0}, 63)); + + FilterSphere filter2{{3, 3}, 7, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} diff --git a/phtree/phtree_box_d_test.cc b/test/phtree_box_d_test.cc similarity index 78% rename from phtree/phtree_box_d_test.cc rename to test/phtree_box_d_test.cc index 8f630be1..9cd9c84b 100644 --- a/phtree/phtree_box_d_test.cc +++ b/test/phtree_box_d_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_box_d_test { + class DoubleRng { public: DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} @@ -39,12 +41,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -172,7 +172,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxDTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -181,7 +181,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxDTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -274,7 +274,7 @@ TEST(PhTreeBoxDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -432,8 +432,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { PhBoxD pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -461,8 +461,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplaceHint) { PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -475,6 +475,146 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplaceHint) { tree.clear(); } +TEST(PhTreeBoxDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 1; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeBoxDTest, TestUpdateWithRelocateCorenerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{{1, 2, 3}, {2, 3, 4}}; + TestPoint point1{{4, 5, 6}, {5, 6, 7}}; + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(point0, point1)); + ASSERT_EQ(0, tree.size()); + + // Check that small tree works + tree.emplace(point0, 1); + ASSERT_EQ(1, tree.relocate(point0, point1)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(Id(1), *tree.find(point1)); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); +} + +TEST(PhTreeBoxDTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 1; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + TestPoint pNew{min, max}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); +} + TEST(PhTreeBoxDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -486,8 +626,8 @@ TEST(PhTreeBoxDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -505,8 +645,8 @@ TEST(PhTreeBoxDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -651,7 +791,7 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -754,3 +894,5 @@ TEST(PhTreeBoxDTest, SmokeTestTreeAPI) { PhTreeBoxD<3, const Id> treeConst; treeConst.emplace(PhBoxD<3>({1, 2, 3}, {4, 5, 6}), Id(1)); } + +} // namespace phtree_box_d_test diff --git a/test/phtree_box_d_test_filter.cc b/test/phtree_box_d_test_filter.cc new file mode 100644 index 00000000..f457421f --- /dev/null +++ b/test/phtree_box_d_test_filter.cc @@ -0,0 +1,636 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_box_d_test_filter { + +template +using TestKey = PhBoxD; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeBoxD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = PhPointD{rng.next(), rng.next(), rng.next()}; + auto box = PhBoxD{point, {point[0] + 1, point[1] + 1, point[2] + 1}}; + if (refTree.count(box) != 0) { + i--; + continue; + } + + refTree.emplace(box, i); + points.push_back(box); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestKey, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestKey, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +// TEST(PhTreeTest, TestFilterAPI_KNN) { +// // Test edge case: only one entry in tree +// TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; +// auto tree = TestTree<3, Id>(); +// tree.emplace(p, Id{1}); +// +// FilterCount<3, Id> filter{}; +// DistanceCount<3> dist_fn{}; +// // lvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // rvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, +// 1); ASSERT_EQ(2, f_construct_ + f_default_construct_); ASSERT_LE(0, f_copy_construct_ + +// f_move_construct_ + f_copy_assign_ + f_move_assign_); f_reset_id_counters(); +// +// // rvalue #2 +// auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; +// ASSERT_EQ(a, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // const Tree: just test that it compiles +// const TestTree<3, Id>& treeC = tree; +// // lvalue +// FilterConst<3, Id> filterC; +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); +// // rvalue +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, +// 1); f_reset_id_counters(); +// } + +template +double distance(const TestPoint& p1, const TestKey& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double closest = std::clamp(p1[i], p2.min()[i], p2.max()[i]); + double d2 = p1[i] + closest; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= (p.min()[i] <= center[i] + radius) && (p.max()[i] >= center[i] - radius); + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} + +} // namespace phtree_box_d_test_filter diff --git a/phtree/phtree_box_d_test_query_types.cc b/test/phtree_box_d_test_query_types.cc similarity index 93% rename from phtree/phtree_box_d_test_query_types.cc rename to test/phtree_box_d_test_query_types.cc index c5460665..84a77a83 100644 --- a/phtree/phtree_box_d_test_query_types.cc +++ b/test/phtree_box_d_test_query_types.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_box_d_test_query_types { + template using TestPoint = PhBoxD; @@ -60,3 +62,5 @@ TEST(PhTreeBoxDTestQueryTypes, SmokeTestQuery) { q3++; ASSERT_EQ(q3, tree.end()); } + +} // namespace phtree_box_d_test_query_types diff --git a/phtree/phtree_box_f_test.cc b/test/phtree_box_f_test.cc similarity index 97% rename from phtree/phtree_box_f_test.cc rename to test/phtree_box_f_test.cc index 05cfbe55..e95f8334 100644 --- a/phtree/phtree_box_f_test.cc +++ b/test/phtree_box_f_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_box_f_test { + template using TestPoint = PhBoxF; @@ -46,12 +48,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -173,7 +173,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxFTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -182,7 +182,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxFTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -275,7 +275,7 @@ TEST(PhTreeBoxFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -368,7 +368,7 @@ void populate( template void populate( TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { - generateCube(points, N, boxLen); + generateCube(points, N, (float)boxLen); for (size_t i = 0; i < N; i++) { ASSERT_TRUE(tree.emplace(points[i], i + 1).second); } @@ -436,8 +436,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplace) { TestPoint pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -452,7 +452,7 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; - std::array deltas{0, 0.1, 1, 10}; + std::array deltas{0.f, 0.1f, 1.f, 10.f}; std::vector> points; populate(tree, points, N); @@ -465,8 +465,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { PhPointF max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -490,8 +490,8 @@ TEST(PhTreeBoxFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -509,8 +509,8 @@ TEST(PhTreeBoxFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -758,3 +758,5 @@ TEST(PhTreeBoxFTest, SmokeTestTreeAPI) { PhTreeBoxF<3, const Id> treeConst; treeConst.emplace(TestPoint<3>({1, 2, 3}, {4, 5, 6}), Id(1)); } + +} // namespace phtree_box_f_test diff --git a/phtree/phtree_d_test.cc b/test/phtree_d_test.cc similarity index 83% rename from phtree/phtree_d_test.cc rename to test/phtree_d_test.cc index 6e966906..c18d559d 100644 --- a/phtree/phtree_d_test.cc +++ b/test/phtree_d_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test { + template using TestPoint = PhPointD; @@ -42,13 +44,18 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} + + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; int _i; }; @@ -284,7 +291,7 @@ TEST(PhTreeDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -328,7 +335,7 @@ TEST(PhTreeDTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -441,8 +448,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -468,8 +475,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { double delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -482,6 +489,142 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { tree.clear(); } +TEST(PhTreeDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateCorenerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(point0, point1)); + ASSERT_EQ(0, tree.size()); + + // Check that small tree works + tree.emplace(point0, 1); + ASSERT_EQ(1, tree.relocate(point0, point1)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(Id(1), *tree.find(point1)); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); +} + TEST(PhTreeDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -493,8 +636,8 @@ TEST(PhTreeDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -512,8 +655,8 @@ TEST(PhTreeDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -688,9 +831,9 @@ TEST(PhTreeDTest, TestWindowQueryManyMoving) { double query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (long i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -724,7 +867,7 @@ TEST(PhTreeDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -1011,3 +1154,5 @@ TEST(PhTreeDTest, SmokeTestTreeAPI) { PhTreeD<3, const Id> treeConst; treeConst.emplace(PhPointD<3>{1, 2, 3}, Id(1)); } + +} // namespace phtree_d_test diff --git a/test/phtree_d_test_copy_move.cc b/test/phtree_d_test_copy_move.cc new file mode 100644 index 00000000..d4c6abb8 --- /dev/null +++ b/test/phtree_d_test_copy_move.cc @@ -0,0 +1,301 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_d_test_copy_move { + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{i} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + // IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(const IdCopyOnly& other) { + _i = other._i; + return *this; + } + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + TestPoint point{}; + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_knn_query(1, p, DistanceEuclidean()); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + TestPoint pOld = p; + for (dimension_t d = 0; d < DIM; ++d) { + p[d] += 10000; + } + auto r = tree.relocate(pOld, p); + ASSERT_EQ(r, 1u); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, tree.find(p)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1u, tree.erase(p)); + } else { + auto iter = tree.find(p); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 2) { + tree[p] = id; + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyOnly) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } else if (i % 4 == 1) { + tree[p] = Id(i); + } else { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} + +} // namespace phtree_d_test_copy_move diff --git a/phtree/phtree_d_test_custom_key.cc b/test/phtree_d_test_custom_key.cc similarity index 97% rename from phtree/phtree_d_test_custom_key.cc rename to test/phtree_d_test_custom_key.cc index aa293f1d..3e2c5368 100644 --- a/phtree/phtree_d_test_custom_key.cc +++ b/test/phtree_d_test_custom_key.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test_custom_key { + static const double MY_MULTIPLIER = 1000000.; /* @@ -106,14 +108,12 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -215,3 +215,5 @@ void SmokeTestBasicOps() { TEST(PhTreeDTestCustomKey, SmokeTestBasicOps) { SmokeTestBasicOps<3>(); } + +} // namespace phtree_d_test_custom_key diff --git a/test/phtree_d_test_filter.cc b/test/phtree_d_test_filter.cc new file mode 100644 index 00000000..86ef2c3f --- /dev/null +++ b/test/phtree_d_test_filter.cc @@ -0,0 +1,484 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_d_test_filter { + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestPoint, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeDFilterTest, TestSphereQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testSphereQuery(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeDFilterTest, TestSphereQueryMany) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +TEST(PhTreeDFilterTest, TestSphereQueryAll) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +} // namespace phtree_d_test_filter diff --git a/phtree/phtree_d_test_preprocessor.cc b/test/phtree_d_test_preprocessor.cc similarity index 94% rename from phtree/phtree_d_test_preprocessor.cc rename to test/phtree_d_test_preprocessor.cc index 7e2e9010..588a2a20 100644 --- a/phtree/phtree_d_test_preprocessor.cc +++ b/test/phtree_d_test_preprocessor.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test_preprocessor { + template using TestPoint = PhPointD; @@ -42,14 +44,12 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -153,3 +153,5 @@ TEST(PhTreeDTestPreprocessor, SmokeTestBasicOps) { SmokeTestBasicOps<10>(); SmokeTestBasicOps<20>(); } + +} // namespace phtree_d_test_preprocessor diff --git a/phtree/phtree_f_test.cc b/test/phtree_f_test.cc similarity index 96% rename from phtree/phtree_f_test.cc rename to test/phtree_f_test.cc index 9e2e3a93..5ea94648 100644 --- a/phtree/phtree_f_test.cc +++ b/test/phtree_f_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_f_test { + template using TestPoint = PhPointF; @@ -43,14 +45,14 @@ class FloatRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} + + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -69,7 +71,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)(p1[i]) - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -283,7 +285,7 @@ TEST(PhTreeFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -327,7 +329,7 @@ TEST(PhTreeFTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -443,8 +445,8 @@ TEST(PhTreeFTest, TestUpdateWithEmplace) { static_cast(pOld[0] + delta), static_cast(pOld[1] + delta), static_cast(pOld[2] + delta)}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -466,8 +468,8 @@ TEST(PhTreeFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -485,8 +487,8 @@ TEST(PhTreeFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -945,22 +947,30 @@ TEST(PhTreeFTest, SmokeTestPointInfinity) { // Note that the tree returns result in z-order, however, since the z-order is based on // the (unsigned) bit representation, negative values come _after_ positive values. auto q_window = tree.begin_query({p_neg, p_pos}); - ASSERT_EQ(1, q_window->_i); + std::set result; + result.emplace(q_window->_i); ++q_window; - ASSERT_EQ(10, q_window->_i); + result.emplace(q_window->_i); ++q_window; - ASSERT_EQ(-10, q_window->_i); + result.emplace(q_window->_i); ++q_window; ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(1, result.count(1)); + ASSERT_EQ(1, result.count(10)); + ASSERT_EQ(1, result.count(-10)); auto q_extent = tree.begin(); - ASSERT_EQ(1, q_extent->_i); + result.clear(); + result.emplace(q_extent->_i); ++q_extent; - ASSERT_EQ(10, q_extent->_i); + result.emplace(q_extent->_i); ++q_extent; - ASSERT_EQ(-10, q_extent->_i); + result.emplace(q_extent->_i); ++q_extent; ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(1, result.count(1)); + ASSERT_EQ(1, result.count(10)); + ASSERT_EQ(1, result.count(-10)); auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(1, q_knn->_i); @@ -993,3 +1003,5 @@ TEST(PhTreeFTest, SmokeTestTreeAPI) { PhTreeF<3, const Id> treeConst; treeConst.emplace(PhPointF<3>{1, 2, 3}, Id(1)); } + +} // namespace phtree_f_test diff --git a/phtree/phtree_multimap_box_d_test.cc b/test/phtree_multimap_box_d_test.cc similarity index 85% rename from phtree/phtree_multimap_box_d_test.cc rename to test/phtree_multimap_box_d_test.cc index d1f19a85..abb4ada7 100644 --- a/phtree/phtree_multimap_box_d_test.cc +++ b/test/phtree_multimap_box_d_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_multimap_box_d_test { + // Number of entries that have the same coordinate static const size_t NUM_DUPL = 4; static const double WORLD_MIN = -1000; @@ -49,29 +51,30 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; +} // namespace phtree_multimap_box_d_test namespace std { template <> -struct hash { - size_t operator()(const Id& x) const { +struct hash { + size_t operator()(const phtree_multimap_box_d_test::Id& x) const { return std::hash{}(x._i); } }; }; // namespace std +namespace phtree_multimap_box_d_test { + struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -99,7 +102,7 @@ void generateCube(std::vector>& points, size_t N, double box_Len points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -123,7 +126,6 @@ void generateCube(std::vector>& points, size_t N, double box_Len template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -139,17 +141,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -413,6 +417,13 @@ TEST(PhTreeMMBoxDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -488,7 +499,7 @@ TEST(PhTreeMMBoxDTest, TestUpdateWithEmplaceHint) { tree.clear(); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -496,29 +507,91 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; - PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; - TestPoint pNew{min, max}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + PhPointD min{ + pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{ + pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + pNew = {min, max}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMBoxDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMBoxDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMBoxDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{{1, 2, 3}, {2, 3, 4}}; + TestPoint point1{{2, 3, 4}, {3, 4, 5}}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMBoxDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -583,15 +656,19 @@ TEST(PhTreeMMBoxDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + return value._i % 2 == 0; + } }; -TEST(PhTreeMMDTest, TestExtentFilter) { +TEST(PhTreeMMBoxDTest, TestExtentFilter) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -608,7 +685,7 @@ TEST(PhTreeMMDTest, TestExtentFilter) { ASSERT_EQ(N, num_e * 2); } -TEST(PhTreeMMDTest, TestExtentForEachFilter) { +TEST(PhTreeMMBoxDTest, TestExtentForEachFilter) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -652,7 +729,7 @@ TEST(PhTreeMMBoxDTest, TestRangeBasedForLoop) { ASSERT_EQ(N, num_e2); } -TEST(PhTreeMMDTest, TestEstimateCountIntersect) { +TEST(PhTreeMMBoxDTest, TestEstimateCountIntersect) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -680,7 +757,7 @@ TEST(PhTreeMMDTest, TestEstimateCountIntersect) { ASSERT_EQ(N, n_all); } -TEST(PhTreeMMDTest, TestEstimateCountInclude) { +TEST(PhTreeMMBoxDTest, TestEstimateCountInclude) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -809,7 +886,7 @@ TEST(PhTreeMMBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -905,3 +982,5 @@ TEST(PhTreeMMBoxDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +} // namespace phtree_multimap_box_d_test diff --git a/phtree/phtree_multimap_d_test.cc b/test/phtree_multimap_d_test.cc similarity index 78% rename from phtree/phtree_multimap_d_test.cc rename to test/phtree_multimap_d_test.cc index d695ec91..bc31a504 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/test/phtree_multimap_d_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_multimap_d_test { + // Number of entries that have the same coordinate static const size_t NUM_DUPL = 4; static const double WORLD_MIN = -1000; @@ -29,7 +31,7 @@ template using TestPoint = PhPointD; template -using TestTree = PhTreeMultiMap>; +using TestTree = PhTreeMultiMapD; class DoubleRng { public: @@ -47,36 +49,31 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const int i) : _i{i}, data_{0} {} + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; +} namespace std { template <> -struct hash { - size_t operator()(const Id& x) const { +struct hash { + size_t operator()(const phtree_multimap_d_test::Id& x) const { return std::hash{}(x._i); } }; }; // namespace std -struct IdHash { - template - std::size_t operator()(std::pair const& v) const { - return std::hash()(v.size()); - } -}; +namespace phtree_multimap_d_test { struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -113,7 +110,7 @@ void generateCube(std::vector>& points, size_t N) { points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -136,7 +133,6 @@ void generateCube(std::vector>& points, size_t N) { template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -152,17 +148,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -421,6 +419,13 @@ TEST(PhTreeMMDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -490,9 +495,13 @@ TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -500,27 +509,87 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -585,12 +654,16 @@ TEST(PhTreeMMDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) const { + return value._i % 2 == 0; + } }; TEST(PhTreeMMDTest, TestExtentFilter) { @@ -782,7 +855,7 @@ TEST(PhTreeMMDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -813,7 +886,7 @@ TEST(PhTreeMMDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -1058,22 +1131,30 @@ TEST(PhTreeMMDTest, SmokeTestPointInfinity) { // Note that the tree returns result in z-order, however, since the z-order is based on // the (unsigned) bit representation, negative values come _after_ positive values. auto q_window = tree.begin_query({p_neg, p_pos}); - ASSERT_EQ(1, q_window->_i); + std::set result; + result.emplace(q_window->_i); ++q_window; - ASSERT_EQ(10, q_window->_i); + result.emplace(q_window->_i); ++q_window; - ASSERT_EQ(-10, q_window->_i); + result.emplace(q_window->_i); ++q_window; ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(1, result.count(1)); + ASSERT_EQ(1, result.count(10)); + ASSERT_EQ(1, result.count(-10)); auto q_extent = tree.begin(); - ASSERT_EQ(1, q_extent->_i); + result.clear(); + result.emplace(q_extent->_i); ++q_extent; - ASSERT_EQ(10, q_extent->_i); + result.emplace(q_extent->_i); ++q_extent; - ASSERT_EQ(-10, q_extent->_i); + result.emplace(q_extent->_i); ++q_extent; ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(1, result.count(1)); + ASSERT_EQ(1, result.count(10)); + ASSERT_EQ(1, result.count(-10)); auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); ASSERT_EQ(1, q_knn->_i); @@ -1102,3 +1183,123 @@ TEST(PhTreeMMDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +template +void test_tree(TREE& tree) { + PhPointD<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.count(p), 3); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p, Id(2))->_i, 2); + ASSERT_EQ(tree.find(p, Id(3))->_i, 3); + + auto q_window = tree.begin_query({p, p}); + std::set wq_result; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(3, wq_result.size()); + + auto q_extent = tree.begin(); + std::set eq_result; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(3, eq_result.size()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + std::set knn_result; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + ASSERT_EQ(3, knn_result.size()); + + ASSERT_EQ(1, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(0, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(1, tree.erase(p, Id{2})); + ASSERT_EQ(1, tree.erase(p, Id{3})); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeMMDTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +TEST(PhTreeMMDTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterMultiMapAABB filter(p, p, tree.converter()); + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} + +TEST(PhTreeMMTest, FuzzTest1) { + // See issue #115 + const dimension_t DIM = 1; + // using Key = PhPoint; + using Value = std::uint8_t; + PhTreeMultiMap> tree{}; + tree.emplace({0}, 63); + tree.emplace({0}, 214); + tree.relocate({0}, {17}, 0); +} + +} // namespace phtree_multimap_d_test diff --git a/test/phtree_multimap_d_test_copy_move.cc b/test/phtree_multimap_d_test_copy_move.cc new file mode 100644 index 00000000..ed3c652a --- /dev/null +++ b/test/phtree_multimap_d_test_copy_move.cc @@ -0,0 +1,330 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_copy_move { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{static_cast(i)} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + int _i{}; + int _data{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; +} + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdCopyOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdMoveOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdCopyOrMove& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +namespace phtree_multimap_d_test_copy_move { + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOps) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} + +} // namespace phtree_multimap_d_test_copy_move diff --git a/test/phtree_multimap_d_test_filter.cc b/test/phtree_multimap_d_test_filter.cc new file mode 100644 index 00000000..89b04057 --- /dev/null +++ b/test/phtree_multimap_d_test_filter.cc @@ -0,0 +1,692 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_filter { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +[[maybe_unused]] static const double WORLD_MIN = -1000; +[[maybe_unused]] static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; +} // namespace phtree_multimap_d_test_filter + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_filter::Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +namespace phtree_multimap_d_test_filter { + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT& bucket) { + assert(!bucket.empty()); + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const PhPoint&, const T2& value) { + last_known = value; + return true; + } + + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(const TestPoint, const Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +/* + * General comment: We are testing several thing here. + * - If we pass lvalue filters/callbacks/... we want to ensure that they do not get copied or + * moved at all. We need to ensure that the lvalue argument is the same instance that is + * used internally by the iterator. + * - If we pass a rvalue filters/callbacks/..., preventing copies/moves is harder. We are testing + * somewhat arbitrarily for a limit of 3 moves/copies per argument. + * - We want to ensure that both rvalue/lvalue arguments work. + * - We also do some limited testing that it works with 'const' trees. + * - Finally, we test separately that the old legacy filters still work + */ + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= std::abs(p[i] - center[i]) <= radius; + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeMMDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} + +} // namespace phtree_multimap_d_test_filter diff --git a/test/phtree_multimap_d_test_unique_ptr_values.cc b/test/phtree_multimap_d_test_unique_ptr_values.cc new file mode 100644 index 00000000..5364804a --- /dev/null +++ b/test/phtree_multimap_d_test_unique_ptr_values.cc @@ -0,0 +1,388 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_unique_ptr_values { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const int i) : _i(i), data_{0} {}; + explicit IdObj(const size_t i) : _i(static_cast(i)), data_{0} {}; + + bool operator==(const IdObj& rhs) const noexcept { + return _i == rhs._i; + } + + int _i; + int data_; +}; + +using Id = std::unique_ptr; +} // namespace phtree_multimap_d_test_unique_ptr_values + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_unique_ptr_values::Id& x) const { + return std::hash{}(x->_i); + } +}; +}; // namespace std +struct equal_to_content { + bool operator()( + const phtree_multimap_d_test_unique_ptr_values::Id& x1, + const phtree_multimap_d_test_unique_ptr_values::Id& x2) const { + return (*x1) == (*x2); + } +}; +struct less_content { + bool operator()( + const phtree_multimap_d_test_unique_ptr_values::Id& x1, + const phtree_multimap_d_test_unique_ptr_values::Id& x2) const { + return (*x1)._i < (*x2)._i; + } +}; + +namespace phtree_multimap_d_test_unique_ptr_values { + +template +using TestTree = PhTreeMultiMap< + DIM, + T, + ConverterIEEE, + b_plus_tree_hash_set, equal_to_content>>; +// using TestTree = PhTreeMultiMap, std::unordered_set, +// equal_to_content>>; using TestTree = PhTreeMultiMap, std::set>; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.emplace_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id2(new IdObj{i}); + // Id id3(new IdObj{i}); + // ASSERT_EQ(id2.get(), id3.get()); + // ASSERT_TRUE(id2 == id3); + // ASSERT_EQ(id2, id3); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, new IdObj{i}).second); + } else if (i % 4 == 2) { + ASSERT_TRUE(tree.try_emplace(p, new IdObj{i}).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + Id id = std::make_unique(i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, id))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, std::make_unique(i)).second); + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)->_i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + if (i % 3 == 0) { + ASSERT_EQ(1u, tree.erase(p, std::make_unique(i))); + } else { + auto iter = tree.find(p, std::make_unique(i)); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p, std::make_unique(i))); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate(pOld, pNew, std::make_unique(i))); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, std::make_unique(i))); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + p = pNew; + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(42))); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, std::make_unique(1))); + ASSERT_EQ(tree.end(), tree.find(point0, std::make_unique(1))); + ASSERT_EQ(1, (*tree.find(point1, std::make_unique(1)))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + size_t done = 0; + for (int i = 0; size_t(i) < N; ++i) { + auto pred = [&i](const Id& id) { return id->_i == i; }; + auto pOld = points[i]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + ++done; + points[i] = pNew; + } + ASSERT_EQ(done, N); + PhTreeDebugHelper::CheckConsistency(tree); + } + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIfCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + auto TRUE = [](const Id&) { return true; }; + auto TWO = [](const Id& id) { return id->_i == 2; }; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate_if(point0, point1, TRUE)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(1, (*tree.find(point1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); +} + +} // namespace phtree_multimap_d_test_unique_ptr_values diff --git a/phtree/phtree_test.cc b/test/phtree_test.cc similarity index 70% rename from phtree/phtree_test.cc rename to test/phtree_test.cc index fe323c39..f471a52a 100644 --- a/phtree/phtree_test.cc +++ b/test/phtree_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test { + template using TestPoint = PhPoint; @@ -57,6 +59,13 @@ static void reset_id_counters() { destruct_count_ = 0; } +static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + struct Id { Id() : _i{0} { ++default_construct_count_; @@ -64,7 +73,7 @@ struct Id { explicit Id(const size_t i) : _i{static_cast(i)} { ++construct_count_; - }; + } Id(const Id& other) { ++copy_construct_count_; @@ -76,13 +85,18 @@ struct Id { _i = other._i; } - bool operator==(const Id& rhs) const { + Id& operator=(const Id& other) noexcept { ++copy_assign_count_; - return _i == rhs._i; + _i = other._i; + return *this; } - - bool operator==(Id&& rhs) const { + Id& operator=(Id&& other) noexcept { ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { return _i == rhs._i; } @@ -90,8 +104,6 @@ struct Id { ++destruct_count_; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -110,7 +122,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -166,17 +178,19 @@ void SmokeTestBasicOps(size_t N) { ASSERT_EQ(tree.end(), tree.find(p)); Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, i).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, i).second); } ASSERT_EQ(tree.count(p), 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try insert/emplace again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), 1); @@ -221,7 +235,9 @@ void SmokeTestBasicOps(size_t N) { ASSERT_TRUE(tree.empty()); PhTreeDebugHelper::CheckConsistency(tree); - ASSERT_EQ(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + // Normal construction and destruction should be symmetric. Move-construction is ignored. + ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); // The following assertions exist only as sanity checks and may need adjusting. // There is nothing fundamentally wrong if a change in the implementation violates // any of the following assertions, as long as performance/memory impact is observed. @@ -237,7 +253,10 @@ void SmokeTestBasicOps(size_t N) { // small node require a lot of copying/moving ASSERT_GE(construct_count_ * 3, move_construct_count_); } else { - ASSERT_GE(construct_count_ * 2, move_construct_count_); + if (construct_count_ * 15 < move_construct_count_) { + print_id_counters(); + } + ASSERT_GE(construct_count_ * 15, move_construct_count_); } } @@ -247,6 +266,7 @@ TEST(PhTreeTest, SmokeTestBasicOps) { SmokeTestBasicOps<6>(10000); SmokeTestBasicOps<10>(10000); SmokeTestBasicOps<20>(10000); + SmokeTestBasicOps<32>(1000); SmokeTestBasicOps<63>(100); } @@ -342,7 +362,7 @@ TEST(PhTreeTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -499,8 +519,8 @@ TEST(PhTreeTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -526,8 +546,8 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { int delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -538,6 +558,147 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithTryEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + int delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); + tree.try_emplace(iter, pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + tree.try_emplace(tree.end(), {11, 21, 31}, 421); + tree.try_emplace(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); } TEST(PhTreeTest, TestEraseByIterator) { @@ -551,10 +712,13 @@ TEST(PhTreeTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -570,8 +734,11 @@ TEST(PhTreeTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -717,6 +884,32 @@ TEST(PhTreeTest, TestWindowQuery1) { ASSERT_EQ(N, n); } +TEST(PhTreeTest, TestWindowQuery1_WithFilter) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(TestPoint, Id& t) { + ++n_; + id_ = t; + } + Id id_{}; + size_t n_ = 0; + }; + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Counter callback{}; + FilterAABB filter(p, p, tree.converter()); + tree.for_each(callback, filter); + ASSERT_EQ(i, callback.id_._i); + ASSERT_EQ(1, callback.n_); + } +} + TEST(PhTreeTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; @@ -746,7 +939,7 @@ TEST(PhTreeTest, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -780,7 +973,7 @@ TEST(PhTreeTest, TestWindowForEachManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -812,6 +1005,25 @@ TEST(PhTreeTest, TestWindowForEachManyMoving) { ASSERT_GE(5000, nn); } +TEST(PhTreeTest, TestWindowForEachExact) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + size_t nn = 0; + for (size_t i = 0; i < N; i++) { + size_t n = 0; + tree.for_each({points[i], points[i]}, [&](TestPoint, Id&) { + ++n; + ++nn; + }); + ASSERT_EQ(1, n); + } + ASSERT_EQ(N, nn); +} + TEST(PhTreeTest, TestWindowQueryIterators) { size_t N = 1000; const dimension_t dim = 3; @@ -1030,3 +1242,143 @@ TEST(PhTreeTest, SmokeTestPoint1) { ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); } + +template +void test_tree(TREE& tree) { + PhPoint<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); // already exists + Id id3{3}; + tree.insert(p, id3); // already exists + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +size_t count_pre{0}; +size_t count_post{0}; +size_t count_query{0}; + +template +struct DebugConverterNoOp : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + + constexpr const PointInternal& pre(const Point& point) const { + ++count_pre; + ++const_cast(count_pre_local); + return point; + } + + constexpr const Point& post(const PointInternal& point) const { + ++count_post; + ++const_cast(count_post_local); + return point; + } + + constexpr const PhBox& pre_query(const PhBox& box) const { + ++count_query; + ++const_cast(count_query_local); + return box; + } + + size_t count_pre_local{0}; + size_t count_post_local{0}; + size_t count_query_local{0}; +}; + +TEST(PhTreeTest, TestMoveAssignCustomConverter) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto converter = DebugConverterNoOp<3>(); + auto tree1 = PhTree<3, Id, DebugConverterNoOp<3>>(converter); + tree1.emplace(p, Id{1}); + ASSERT_GE(tree1.converter().count_pre_local, 1); + ASSERT_EQ(tree1.converter().count_pre_local, count_pre); + + PhTree<3, Id, DebugConverterNoOp<3>> tree{}; + tree = std::move(tree1); + // Assert that converter got moved (or copied?): + ASSERT_GE(tree.converter().count_pre_local, 1); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + + test_tree(tree); + ASSERT_GE(tree.converter().count_pre_local, 2); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); +} + +TEST(PhTreeTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterEvenId<3, Id> filter{}; + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} + +} // namespace phtree_test diff --git a/phtree/phtree_test_const_values.cc b/test/phtree_test_const_values.cc similarity index 97% rename from phtree/phtree_test_const_values.cc rename to test/phtree_test_const_values.cc index 2fcb123e..64dd432d 100644 --- a/phtree/phtree_test_const_values.cc +++ b/test/phtree_test_const_values.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test_const_values { + template using TestPoint = PhPoint; @@ -42,14 +44,12 @@ class IntRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +68,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -276,7 +276,7 @@ TEST(PhTreeTestConst, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -409,8 +409,8 @@ TEST(PhTreeTestConst, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -590,7 +590,7 @@ TEST(PhTreeTestConst, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -699,3 +699,5 @@ TEST(PhTreeTestConst, TestKnnQuery) { ASSERT_EQ(Nq, n); } } + +} // namespace phtree_test_const_values diff --git a/test/phtree_test_issues.cc b/test/phtree_test_issues.cc new file mode 100644 index 00000000..de9c67f1 --- /dev/null +++ b/test/phtree_test_issues.cc @@ -0,0 +1,208 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include +#include +#include + +using namespace improbable::phtree; + +using namespace std; + +namespace phtree_test_issues { + +#if defined(__clang__) || defined(__GNUC__) + +void mem_usage(double& vm_usage, double& resident_set) { + vm_usage = 0.0; + resident_set = 0.0; + ifstream stat_stream("/proc/self/stat", ios_base::in); // get info from proc directory + // create some variables to get info + string pid, comm, state, ppid, pgrp, session, tty_nr; + string tpgid, flags, minflt, cminflt, majflt, cmajflt; + string utime, stime, cutime, cstime, priority, nice; + string O, itrealvalue, starttime; + unsigned long vsize; + long rss; + stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr >> tpgid >> flags >> + minflt >> cminflt >> majflt >> cmajflt >> utime >> stime >> cutime >> cstime >> priority >> + nice >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest + stat_stream.close(); + long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // for x86-64 is configured to use 2MB pages + vm_usage = vsize / 1024.0; + resident_set = rss * page_size_kb; +} + +int get_resident_mem_kb() { + double vm, rss; + mem_usage(vm, rss); + return rss; +} + +void print_mem() { + double vm, rss; + mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl + << " Resident set size: " << rss << " KB" << endl; +} + +#elif defined(_MSC_VER) +int get_resident_mem_kb() { + return 0; +} + +void print_mem() { + double vm = 0, rss = 0; + // mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl + << " Resident set size: " << rss << " KB" << endl; +} +#endif + +auto start_timer() { + return std::chrono::steady_clock::now(); +} + +template +void end_timer(T start, const char* prefix) { + auto end = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_seconds1 = end - start; + std::cout << "elapsed time " << prefix << " = " << elapsed_seconds1.count() << " s" + << std::endl; +} + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60) { + // auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the + // memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60_minimal) { + // auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the + // memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +TEST(PhTreeTestIssues, TestIssue6_3_MAP) { + auto tree = PhTreeD<2, int>(); + std::vector> vecPos; + int dim = 10000; + + int num = 100000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + print_mem(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp); + } + print_mem(); +} + +} // namespace phtree_test_issues diff --git a/phtree/phtree_test_ptr_values.cc b/test/phtree_test_ptr_values.cc similarity index 97% rename from phtree/phtree_test_ptr_values.cc rename to test/phtree_test_ptr_values.cc index a120ad1b..6368b477 100644 --- a/phtree/phtree_test_ptr_values.cc +++ b/test/phtree_test_ptr_values.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test_ptr_values { + template using TestPoint = PhPoint; @@ -44,12 +46,10 @@ struct Id { explicit Id(const size_t i) : _i((int)i){}; - bool operator==(Id& rhs) const { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +68,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -286,7 +286,7 @@ TEST(PhTreeTestPtr, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first->_i); ASSERT_EQ(tree.count(p), 1); @@ -296,11 +296,11 @@ TEST(PhTreeTestPtr, TestEmplace) { tree.emplace(p, id2).first->_i++; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); tree.emplace(p, id2).first = id2; - ASSERT_EQ(-i, tree.emplace(p, id).first->_i); + ASSERT_EQ(i + N, tree.emplace(p, id).first->_i); // Replace it with previous value tree.emplace(p, id2).first = id; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); - id->_i = i; + id->_i = (int)i; ASSERT_EQ(i, tree.emplace(p, id).first->_i); delete id2; } @@ -334,13 +334,13 @@ TEST(PhTreeTestPtr, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); Id* id = new Id(i); - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(nullptr, tree[p]); tree[p] = id2; - ASSERT_EQ(-i, tree[p]->_i); + ASSERT_EQ(i + N, tree[p]->_i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]->_i = i; + tree[p]->_i = (int)i; ASSERT_EQ(i, id2->_i); delete id; } else { @@ -666,9 +666,9 @@ TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { - TestPoint min{i * 10, i * 9, i * 11}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + for (std::int64_t i = -120; i < 120; i++) { + TestPoint min{i * 10l, i * 9l, i * 11l}; + TestPoint max{i * 10l + query_length, i * 9l + query_length, i * 11l + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -782,3 +782,5 @@ TEST(PhTreeTestPtr, TestKnnQuery) { } depopulate(values); } + +} // namespace phtree_test_ptr_values diff --git a/test/phtree_test_unique_ptr_values.cc b/test/phtree_test_unique_ptr_values.cc new file mode 100644 index 00000000..1be2bc0a --- /dev/null +++ b/test/phtree_test_unique_ptr_values.cc @@ -0,0 +1,301 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_test_unique_ptr_values { + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const size_t i) : _i(static_cast(i)){}; + + bool operator==(const IdObj& rhs) const { + return _i == rhs._i; + } + + IdObj& operator=(IdObj const& rhs) = default; + + int _i; +}; + +using Id = std::unique_ptr; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(1u, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], std::make_unique(1)); + tree.emplace(points[1], std::make_unique(2)); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id->_i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); +} + +} // namespace phtree_test_unique_ptr_values diff --git a/tools/bazel b/tools/bazel deleted file mode 100755 index 03324532..00000000 --- a/tools/bazel +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash - -TOOLS_DIR="$(dirname "$0")" - -source "${TOOLS_DIR}"/../ci/includes/os.sh -source "${TOOLS_DIR}"/../ci/includes/bazel.sh - -# All information required for the script to select or, if necessary, install bazel is contained -# in this code block. -# If a higher version of bazel is required, update `REQUIRED_BAZEL_VERSION` and the -# `REQUIRED_BAZEL_SHA256` values for each platform. -REQUIRED_BAZEL_VERSION="$(getBazelVersion)" -BAZEL_INSTALLATION_DIR="${HOME}/.bazel_installations/${REQUIRED_BAZEL_VERSION}" -if isLinux; then - DOWNLOAD_CMD="wget -q --no-clobber -O bazel" - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-linux-x86_64" - - if which clang-10 1>/dev/null; then - # We follow the symlink of clang-10 here to avoid a bug with the LLVM package when combined with -no-canonical-prefixes. - export CC="$(readlink -f "$(which clang-10)")" - else - echo -e "\033[0;33mWarning: You don't seem to have clang-9 correctly installed. Please check README.md to ensure your compiler is set up correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" - fi -elif isMacOS; then - DOWNLOAD_CMD="wget -q --no-clobber -O bazel" - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-darwin-x86_64" -else - DOWNLOAD_CMD="curl -L -s -o bazel.exe" - # Windows does not have an installer but retrieves the executable directly. - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-windows-x86_64.exe" - - export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC" - if [[ ! -d "$BAZEL_VC" ]]; then - export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - fi - if [[ ! -d "$BAZEL_VC" ]]; then - echo -e "\033[0;33mWarning: You don't seem to have Visual Studio 2019 installed correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" - fi -fi - -BAZEL_TARGET_PATH="${BAZEL_INSTALLATION_DIR}/bin/bazel" - -# Check if correct version is already installed. -if [[ -f "${BAZEL_TARGET_PATH}" ]]; then - if [[ ! -x "${BAZEL_TARGET_PATH}" ]]; then - echo "ERROR: Bazel executable at '${BAZEL_TARGET_PATH}' does not have execute permission" - stat "${BAZEL_TARGET_PATH}" - exit 1 - fi - BAZEL_SUBCOMMAND="$1" - shift - exec -a "$0" "${BAZEL_TARGET_PATH}" "$BAZEL_SUBCOMMAND" "$@" -fi - -cat << EOM -================================================= -Bazel version ${REQUIRED_BAZEL_VERSION} is not -installed under ~/.bazel_installations - -Installing bazel ${REQUIRED_BAZEL_VERSION} now... -================================================= -EOM - -# Create root directory if needed. -if [[ ! -d "${BAZEL_INSTALLATION_DIR}" ]]; then - echo "Installation directory created." - mkdir -p "${BAZEL_INSTALLATION_DIR}" -fi - -# Install correct bazel version. -# If we don't have a local Bazel install at this point we need to retrieve the right version from GitHub. -mkdir -p "${BAZEL_INSTALLATION_DIR}/bin/tmp" -pushd "${BAZEL_INSTALLATION_DIR}/bin/tmp" -rm bazel 2>/dev/null || true # Remove bazel binary if already present in tmp dir - indicates previous failed download. -echo "Starting download of bazel ${REQUIRED_BAZEL_VERSION}..." -${DOWNLOAD_CMD} "https://github.com/bazelbuild/bazel/releases/download/${REQUIRED_BAZEL_VERSION}/${BAZEL_EXE}" -echo "Download finished." -# Mark downloaded file executable and move out of tmp directory. -chmod a+x "bazel" -mv bazel .. -popd - -echo "Executing downloaded bazel..." -BAZEL_SUBCOMMAND="$1" -shift -exec -a "$0" "${BAZEL_TARGET_PATH}" "$BAZEL_SUBCOMMAND" "$@" diff --git a/tools/build_rules/http.bzl b/tools/build_rules/http.bzl deleted file mode 100644 index 26e5ba2f..00000000 --- a/tools/build_rules/http.bzl +++ /dev/null @@ -1,463 +0,0 @@ -# Copyright 2016 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Rules for downloading files and archives over HTTP. -### Setup -To use these rules, load them in your `WORKSPACE` file as follows: -```python -load( - "@bazel_tools//tools/build_defs/repo:http.bzl", - "http_archive", - "http_file", - "http_jar", -) -``` -These rules are improved versions of the native http rules and will eventually -replace the native rules. -""" - -load( - ":utils.bzl", - "patch", - "read_netrc", - "update_attrs", - "use_netrc", - "workspace_and_buildfile", -) - -# Shared between http_jar, http_file and http_archive. -_AUTH_PATTERN_DOC = """An optional dict mapping host names to custom authorization patterns. -If a URL's host name is present in this dict the value will be used as a pattern when -generating the authorization header for the http request. This enables the use of custom -authorization schemes used in a lot of common cloud storage providers. -The pattern currently supports 2 tokens: <login> and -<password>, which are replaced with their equivalent value -in the netrc file for the same host name. After formatting, the result is set -as the value for the Authorization field of the HTTP request. -Example attribute and netrc for a http download to an oauth2 enabled API using a bearer token: -
-auth_patterns = {
-    "storage.cloudprovider.com": "Bearer <password>"
-}
-
-netrc: -
-machine storage.cloudprovider.com
-        password RANDOM-TOKEN
-
-The final HTTP request would have the following header: -
-Authorization: Bearer RANDOM-TOKEN
-
-""" - -def _get_auth(ctx, urls): - """Given the list of URLs obtain the correct auth dict.""" - if ctx.attr.netrc: - netrc = read_netrc(ctx, ctx.attr.netrc) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - if "HOME" in ctx.os.environ and not ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["HOME"]) - if ctx.execute(["test", "-f", netrcfile]).return_code == 0: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - if "USERPROFILE" in ctx.os.environ and ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["USERPROFILE"]) - if ctx.path(netrcfile).exists: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - return {} - -def _http_archive_impl(ctx): - """Implementation of the http_archive rule.""" - if not ctx.attr.url and not ctx.attr.urls: - fail("At least one of url and urls must be provided") - if ctx.attr.build_file and ctx.attr.build_file_content: - fail("Only one of build_file and build_file_content can be provided.") - - all_urls = [] - if ctx.attr.urls: - all_urls = ctx.attr.urls - if ctx.attr.url: - all_urls = [ctx.attr.url] + all_urls - - auth = _get_auth(ctx, all_urls) - - download_info = ctx.download_and_extract( - all_urls, - "", - ctx.attr.sha256, - ctx.attr.type, - ctx.attr.strip_prefix, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - workspace_and_buildfile(ctx) - patch(ctx) - - return update_attrs(ctx.attr, _http_archive_attrs.keys(), {"sha256": download_info.sha256}) - -_HTTP_FILE_BUILD = """ -package(default_visibility = ["//visibility:public"]) -filegroup( - name = "file", - srcs = ["{}"], -) -""" - -def _http_file_impl(ctx): - """Implementation of the http_file rule.""" - repo_root = ctx.path(".") - forbidden_files = [ - repo_root, - ctx.path("WORKSPACE"), - ctx.path("BUILD"), - ctx.path("BUILD.bazel"), - ctx.path("file/BUILD"), - ctx.path("file/BUILD.bazel"), - ] - downloaded_file_path = ctx.attr.downloaded_file_path - download_path = ctx.path("file/" + downloaded_file_path) - if download_path in forbidden_files or not str(download_path).startswith(str(repo_root)): - fail("'%s' cannot be used as downloaded_file_path in http_file" % ctx.attr.downloaded_file_path) - auth = _get_auth(ctx, ctx.attr.urls) - download_info = ctx.download( - ctx.attr.urls, - "file/" + downloaded_file_path, - ctx.attr.sha256, - ctx.attr.executable, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - ctx.file("WORKSPACE", "workspace(name = \"{name}\")".format(name = ctx.name)) - ctx.file("file/BUILD", _HTTP_FILE_BUILD.format(downloaded_file_path)) - - return update_attrs(ctx.attr, _http_file_attrs.keys(), {"sha256": download_info.sha256}) - -_HTTP_JAR_BUILD = """ -load("@rules_java//java:defs.bzl", "java_import") -package(default_visibility = ["//visibility:public"]) -java_import( - name = 'jar', - jars = ['downloaded.jar'], - visibility = ['//visibility:public'], -) -filegroup( - name = 'file', - srcs = ['downloaded.jar'], - visibility = ['//visibility:public'], -) -""" - -def _http_jar_impl(ctx): - """Implementation of the http_jar rule.""" - all_urls = [] - if ctx.attr.urls: - all_urls = ctx.attr.urls - if ctx.attr.url: - all_urls = [ctx.attr.url] + all_urls - auth = _get_auth(ctx, all_urls) - download_info = ctx.download( - all_urls, - "jar/downloaded.jar", - ctx.attr.sha256, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - ctx.file("WORKSPACE", "workspace(name = \"{name}\")".format(name = ctx.name)) - ctx.file("jar/BUILD", _HTTP_JAR_BUILD) - return update_attrs(ctx.attr, _http_jar_attrs.keys(), {"sha256": download_info.sha256}) - -_http_archive_attrs = { - "url": attr.string( - doc = - """A URL to a file that will be made available to Bazel. -This must be a file, http or https URL. Redirections are followed. -Authentication is not supported. -This parameter is to simplify the transition from the native http_archive -rule. More flexibility can be achieved by the urls parameter that allows -to specify alternative URLs to fetch from. -""", - ), - "urls": attr.string_list( - doc = - """A list of URLs to a file that will be made available to Bazel. -Each entry must be a file, http or https URL. Redirections are followed. -Authentication is not supported.""", - ), - "sha256": attr.string( - doc = """The expected SHA-256 of the file downloaded. -This must match the SHA-256 of the file downloaded. _It is a security risk -to omit the SHA-256 as remote files can change._ At best omitting this -field will make your build non-hermetic. It is optional to make development -easier but should be set before shipping.""", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "strip_prefix": attr.string( - doc = """A directory prefix to strip from the extracted files. -Many archives contain a top-level directory that contains all of the useful -files in archive. Instead of needing to specify this prefix over and over -in the `build_file`, this field can be used to strip it from all of the -extracted files. -For example, suppose you are using `foo-lib-latest.zip`, which contains the -directory `foo-lib-1.2.3/` under which there is a `WORKSPACE` file and are -`src/`, `lib/`, and `test/` directories that contain the actual code you -wish to build. Specify `strip_prefix = "foo-lib-1.2.3"` to use the -`foo-lib-1.2.3` directory as your top-level directory. -Note that if there are files outside of this directory, they will be -discarded and inaccessible (e.g., a top-level license file). This includes -files/directories that start with the prefix but are not in the directory -(e.g., `foo-lib-1.2.3.release-notes`). If the specified prefix does not -match a directory in the archive, Bazel will return an error.""", - ), - "type": attr.string( - doc = """The archive type of the downloaded file. -By default, the archive type is determined from the file extension of the -URL. If the file has no extension, you can explicitly specify one of the -following: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, -`"tar.xz"`, or `tar.bz2`.""", - ), - "patches": attr.label_list( - default = [], - doc = - "A list of files that are to be applied as patches after " + - "extracting the archive. By default, it uses the Bazel-native patch implementation " + - "which doesn't support fuzz match and binary patch, but Bazel will fall back to use " + - "patch command line tool if `patch_tool` attribute is specified or there are " + - "arguments other than `-p` in `patch_args` attribute.", - ), - "patch_tool": attr.string( - default = "", - doc = "The patch(1) utility to use. If this is specified, Bazel will use the specifed " + - "patch tool instead of the Bazel-native patch implementation.", - ), - "patch_args": attr.string_list( - default = ["-p0"], - doc = - "The arguments given to the patch tool. Defaults to -p0, " + - "however -p1 will usually be needed for patches generated by " + - "git. If multiple -p arguments are specified, the last one will take effect." + - "If arguments other than -p are specified, Bazel will fall back to use patch " + - "command line tool instead of the Bazel-native patch implementation. When falling " + - "back to patch command line tool and patch_tool attribute is not specified, " + - "`patch` will be used.", - ), - "patch_cmds": attr.string_list( - default = [], - doc = "Sequence of Bash commands to be applied on Linux/Macos after patches are applied.", - ), - "patch_cmds_win": attr.string_list( - default = [], - doc = "Sequence of Powershell commands to be applied on Windows after patches are " + - "applied. If this attribute is not set, patch_cmds will be executed on Windows, " + - "which requires Bash binary to exist.", - ), - "build_file": attr.label( - allow_single_file = True, - doc = - "The file to use as the BUILD file for this repository." + - "This attribute is an absolute label (use '@//' for the main " + - "repo). The file does not need to be named BUILD, but can " + - "be (something like BUILD.new-repo-name may work well for " + - "distinguishing it from the repository's actual BUILD files. " + - "Either build_file or build_file_content can be specified, but " + - "not both.", - ), - "build_file_content": attr.string( - doc = - "The content for the BUILD file for this repository. " + - "Either build_file or build_file_content can be specified, but " + - "not both.", - ), - "workspace_file": attr.label( - doc = - "The file to use as the `WORKSPACE` file for this repository. " + - "Either `workspace_file` or `workspace_file_content` can be " + - "specified, or neither, but not both.", - ), - "workspace_file_content": attr.string( - doc = - "The content for the WORKSPACE file for this repository. " + - "Either `workspace_file` or `workspace_file_content` can be " + - "specified, or neither, but not both.", - ), -} - -http_archive = repository_rule( - implementation = _http_archive_impl, - attrs = _http_archive_attrs, - doc = - """Downloads a Bazel repository as a compressed archive file, decompresses it, -and makes its targets available for binding. -It supports the following file extensions: `"zip"`, `"jar"`, `"war"`, `"tar"`, -`"tar.gz"`, `"tgz"`, `"tar.xz"`, and `tar.bz2`. -Examples: - Suppose the current repository contains the source code for a chat program, - rooted at the directory `~/chat-app`. It needs to depend on an SSL library - which is available from http://example.com/openssl.zip. This `.zip` file - contains the following directory structure: - ``` - WORKSPACE - src/ - openssl.cc - openssl.h - ``` - In the local repository, the user creates a `openssl.BUILD` file which - contains the following target definition: - ```python - cc_library( - name = "openssl-lib", - srcs = ["src/openssl.cc"], - hdrs = ["src/openssl.h"], - ) - ``` - Targets in the `~/chat-app` repository can depend on this target if the - following lines are added to `~/chat-app/WORKSPACE`: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - http_archive( - name = "my_ssl", - urls = ["http://example.com/openssl.zip"], - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - build_file = "@//:openssl.BUILD", - ) - ``` - Then targets would specify `@my_ssl//:openssl-lib` as a dependency. -""", -) - -_http_file_attrs = { - "executable": attr.bool( - doc = "If the downloaded file should be made executable.", - ), - "downloaded_file_path": attr.string( - default = "downloaded", - doc = "Path assigned to the file downloaded", - ), - "sha256": attr.string( - doc = """The expected SHA-256 of the file downloaded. -This must match the SHA-256 of the file downloaded. _It is a security risk -to omit the SHA-256 as remote files can change._ At best omitting this -field will make your build non-hermetic. It is optional to make development -easier but should be set before shipping.""", - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "urls": attr.string_list( - mandatory = True, - doc = """A list of URLs to a file that will be made available to Bazel. -Each entry must be a file, http or https URL. Redirections are followed. -Authentication is not supported.""", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), -} - -http_file = repository_rule( - implementation = _http_file_impl, - attrs = _http_file_attrs, - doc = - """Downloads a file from a URL and makes it available to be used as a file -group. -Examples: - Suppose you need to have a debian package for your custom rules. This package - is available from http://example.com/package.deb. Then you can add to your - WORKSPACE file: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") - http_file( - name = "my_deb", - urls = ["http://example.com/package.deb"], - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - ) - ``` - Targets would specify `@my_deb//file` as a dependency to depend on this file. -""", -) - -_http_jar_attrs = { - "sha256": attr.string( - doc = "The expected SHA-256 of the file downloaded.", - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "url": attr.string( - doc = - "The URL to fetch the jar from. It must end in `.jar`.", - ), - "urls": attr.string_list( - doc = - "A list of URLS the jar can be fetched from. They have to end " + - "in `.jar`.", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), -} - -http_jar = repository_rule( - implementation = _http_jar_impl, - attrs = _http_jar_attrs, - doc = - """Downloads a jar from a URL and makes it available as java_import -Downloaded files must have a .jar extension. -Examples: - Suppose the current repository contains the source code for a chat program, rooted at the - directory `~/chat-app`. It needs to depend on an SSL library which is available from - `http://example.com/openssl-0.2.jar`. - Targets in the `~/chat-app` repository can depend on this target if the following lines are - added to `~/chat-app/WORKSPACE`: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_jar") - http_jar( - name = "my_ssl", - url = "http://example.com/openssl-0.2.jar", - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - ) - ``` - Targets would specify @my_ssl//jar as a dependency to depend on this jar. - You may also reference files on the current system (localhost) by using "file:///path/to/file" - if you are on Unix-based systems. If you're on Windows, use "file:///c:/path/to/file". In both - examples, note the three slashes (`/`) -- the first two slashes belong to `file://` and the third - one belongs to the absolute path to the file. -""", -) diff --git a/tools/build_rules/utils.bzl b/tools/build_rules/utils.bzl deleted file mode 100644 index b2a70051..00000000 --- a/tools/build_rules/utils.bzl +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright 2018 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils for manipulating external repositories, once fetched. -### Setup -These utilities are intended to be used by other repository rules. They -can be loaded as follows. -```python -load( - "@bazel_tools//tools/build_defs/repo:utils.bzl", - "workspace_and_buildfile", - "patch", - "update_attrs", -) -``` -""" - -def workspace_and_buildfile(ctx): - """Utility function for writing WORKSPACE and, if requested, a BUILD file. - This rule is intended to be used in the implementation function of a - repository rule. - It assumes the parameters `name`, `build_file`, `build_file_content`, - `workspace_file`, and `workspace_file_content` to be - present in `ctx.attr`; the latter four possibly with value None. - Args: - ctx: The repository context of the repository rule calling this utility - function. - """ - if ctx.attr.build_file and ctx.attr.build_file_content: - ctx.fail("Only one of build_file and build_file_content can be provided.") - - if ctx.attr.workspace_file and ctx.attr.workspace_file_content: - ctx.fail("Only one of workspace_file and workspace_file_content can be provided.") - - if ctx.attr.workspace_file: - ctx.file("WORKSPACE", ctx.read(ctx.attr.workspace_file)) - elif ctx.attr.workspace_file_content: - ctx.file("WORKSPACE", ctx.attr.workspace_file_content) - else: - ctx.file("WORKSPACE", "workspace(name = \"{name}\")\n".format(name = ctx.name)) - - if ctx.attr.build_file: - ctx.file("BUILD.bazel", ctx.read(ctx.attr.build_file)) - elif ctx.attr.build_file_content: - ctx.file("BUILD.bazel", ctx.attr.build_file_content) - -def _is_windows(ctx): - return ctx.os.name.lower().find("windows") != -1 - -def _use_native_patch(patch_args): - """If patch_args only contains -p options, we can use the native patch implementation.""" - for arg in patch_args: - if not arg.startswith("-p"): - return False - return True - -def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_tool = None, patch_args = None): - """Implementation of patching an already extracted repository. - This rule is intended to be used in the implementation function of - a repository rule. If the parameters `patches`, `patch_tool`, - `patch_args`, `patch_cmds` and `patch_cmds_win` are not specified - then they are taken from `ctx.attr`. - Args: - ctx: The repository context of the repository rule calling this utility - function. - patches: The patch files to apply. List of strings, Labels, or paths. - patch_cmds: Bash commands to run for patching, passed one at a - time to bash -c. List of strings - patch_cmds_win: Powershell commands to run for patching, passed - one at a time to powershell /c. List of strings. If the - boolean value of this parameter is false, patch_cmds will be - used and this parameter will be ignored. - patch_tool: Path of the patch tool to execute for applying - patches. String. - patch_args: Arguments to pass to the patch tool. List of strings. - """ - bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash" - powershell_exe = ctx.os.environ["BAZEL_POWERSHELL"] if "BAZEL_POWERSHELL" in ctx.os.environ else "powershell.exe" - - if patches == None and hasattr(ctx.attr, "patches"): - patches = ctx.attr.patches - if patches == None: - patches = [] - - if patch_cmds == None and hasattr(ctx.attr, "patch_cmds"): - patch_cmds = ctx.attr.patch_cmds - if patch_cmds == None: - patch_cmds = [] - - if patch_cmds_win == None and hasattr(ctx.attr, "patch_cmds_win"): - patch_cmds_win = ctx.attr.patch_cmds_win - if patch_cmds_win == None: - patch_cmds_win = [] - - if patch_tool == None and hasattr(ctx.attr, "patch_tool"): - patch_tool = ctx.attr.patch_tool - if not patch_tool: - patch_tool = "patch" - native_patch = True - else: - native_patch = False - - if patch_args == None and hasattr(ctx.attr, "patch_args"): - patch_args = ctx.attr.patch_args - if patch_args == None: - patch_args = [] - - if len(patches) > 0 or len(patch_cmds) > 0: - ctx.report_progress("Patching repository") - - if native_patch and _use_native_patch(patch_args): - if patch_args: - strip = int(patch_args[-1][2:]) - else: - strip = 0 - for patchfile in patches: - ctx.patch(patchfile, strip) - else: - for patchfile in patches: - command = "{patchtool} {patch_args} < {patchfile}".format( - patchtool = patch_tool, - patchfile = ctx.path(patchfile), - patch_args = " ".join([ - "'%s'" % arg - for arg in patch_args - ]), - ) - st = ctx.execute([bash_exe, "-c", command]) - if st.return_code: - fail("Error applying patch %s:\n%s%s" % - (str(patchfile), st.stderr, st.stdout)) - - if _is_windows(ctx) and patch_cmds_win: - for cmd in patch_cmds_win: - st = ctx.execute([powershell_exe, "/c", cmd]) - if st.return_code: - fail("Error applying patch command %s:\n%s%s" % - (cmd, st.stdout, st.stderr)) - else: - for cmd in patch_cmds: - st = ctx.execute([bash_exe, "-c", cmd]) - if st.return_code: - fail("Error applying patch command %s:\n%s%s" % - (cmd, st.stdout, st.stderr)) - -def update_attrs(orig, keys, override): - """Utility function for altering and adding the specified attributes to a particular repository rule invocation. - This is used to make a rule reproducible. - Args: - orig: dict of actually set attributes (either explicitly or implicitly) - by a particular rule invocation - keys: complete set of attributes defined on this rule - override: dict of attributes to override or add to orig - Returns: - dict of attributes with the keys from override inserted/updated - """ - result = {} - for key in keys: - if getattr(orig, key) != None: - result[key] = getattr(orig, key) - result["name"] = orig.name - result.update(override) - return result - -def maybe(repo_rule, name, **kwargs): - """Utility function for only adding a repository if it's not already present. - This is to implement safe repositories.bzl macro documented in - https://docs.bazel.build/versions/master/skylark/deploying.html#dependencies. - Args: - repo_rule: repository rule function. - name: name of the repository to create. - **kwargs: remaining arguments that are passed to the repo_rule function. - Returns: - Nothing, defines the repository when needed as a side-effect. - """ - if not native.existing_rule(name): - repo_rule(name = name, **kwargs) - -def read_netrc(ctx, filename): - """Utility function to parse at least a basic .netrc file. - Args: - ctx: The repository context of the repository rule calling this utility - function. - filename: the name of the .netrc file to read - Returns: - dict mapping a machine names to a dict with the information provided - about them - """ - contents = ctx.read(filename) - - # Parse the file. This is mainly a token-based update of a simple state - # machine, but we need to keep the line structure to correctly determine - # the end of a `macdef` command. - netrc = {} - currentmachinename = None - currentmachine = {} - macdef = None - currentmacro = "" - cmd = None - for line in contents.splitlines(): - if line.startswith("#"): - # Comments start with #. Ignore these lines. - continue - elif macdef: - # as we're in a macro, just determine if we reached the end. - if line: - currentmacro += line + "\n" - else: - # reached end of macro, add it - currentmachine[macdef] = currentmacro - macdef = None - currentmacro = "" - else: - # Essentially line.split(None) which starlark does not support. - tokens = [ - w.strip() - for w in line.split(" ") - if len(w.strip()) > 0 - ] - for token in tokens: - if cmd: - # we have a command that expects another argument - if cmd == "machine": - # a new machine definition was provided, so save the - # old one, if present - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - currentmachine = {} - currentmachinename = token - elif cmd == "macdef": - macdef = "macdef %s" % (token,) - # a new macro definition; the documentation says - # "its contents begin with the next .netrc line [...]", - # so should there really be tokens left in the current - # line, they're not part of the macro. - - else: - currentmachine[cmd] = token - cmd = None - elif token in [ - "machine", - "login", - "password", - "account", - "macdef", - ]: - # command takes one argument - cmd = token - elif token == "default": - # defines the default machine; again, store old machine - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - - # We use the empty string for the default machine, as that - # can never be a valid hostname ("default" could be, in the - # default search domain). - currentmachinename = "" - currentmachine = {} - else: - fail("Unexpected token '%s' while reading %s" % - (token, filename)) - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - return netrc - -def use_netrc(netrc, urls, patterns): - """Compute an auth dict from a parsed netrc file and a list of URLs. - Args: - netrc: a netrc file already parsed to a dict, e.g., as obtained from - read_netrc - urls: a list of URLs. - patterns: optional dict of url to authorization patterns - Returns: - dict suitable as auth argument for ctx.download; more precisely, the dict - will map all URLs where the netrc file provides login and password to a - dict containing the corresponding login, password and optional authorization pattern, - as well as the mapping of "type" to "basic" or "pattern". - """ - auth = {} - for url in urls: - schemerest = url.split("://", 1) - if len(schemerest) < 2: - continue - if not (schemerest[0] in ["http", "https"]): - # For other protocols, bazel currently does not support - # authentication. So ignore them. - continue - host = schemerest[1].split("/")[0].split(":")[0] - if not host in netrc: - continue - authforhost = netrc[host] - if host in patterns: - auth_dict = { - "type": "pattern", - "pattern": patterns[host], - } - - if "login" in authforhost: - auth_dict["login"] = authforhost["login"] - - if "password" in authforhost: - auth_dict["password"] = authforhost["password"] - - auth[url] = auth_dict - elif "login" in authforhost and "password" in authforhost: - auth[url] = { - "type": "basic", - "login": authforhost["login"], - "password": authforhost["password"], - } - - return auth diff --git a/tools/runners/sanitizers/msan/BUILD b/tools/runners/sanitizers/msan/BUILD new file mode 100644 index 00000000..bc7d5f6f --- /dev/null +++ b/tools/runners/sanitizers/msan/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "msan", + srcs = ["msan.sh"], + data = [ + "msan-suppressions.txt", + ], +) diff --git a/tools/build_rules/BUILD b/tools/runners/sanitizers/msan/msan-suppressions.txt similarity index 100% rename from tools/build_rules/BUILD rename to tools/runners/sanitizers/msan/msan-suppressions.txt diff --git a/tools/runners/sanitizers/msan/msan.sh b/tools/runners/sanitizers/msan/msan.sh new file mode 100755 index 00000000..c796ac7a --- /dev/null +++ b/tools/runners/sanitizers/msan/msan.sh @@ -0,0 +1 @@ +MSAN_OPTIONS=suppressions="tools/runners/sanitizers/msan/msan-suppressions.txt ${MSAN_OPTIONS}" "${@}"