diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 00000000..54d31d36 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,102 @@ +# general build options +build --compilation_mode=dbg +build --verbose_failures +build --experimental_strict_action_env +build --experimental_guard_against_concurrent_changes +# This causes bazel to pass --config=windows/macos/linux automatically for us. +build --enable_platform_specific_config +# We enable the nocopts flag because it's used in the OpenSSL rules +build --noincompatible_disable_nocopts +# Enable symlinking of runfiles on all platforms (including Windows, where it's disable by default). +build --enable_runfiles --build_runfile_links +# Avoids downloading any remote build outputs to the local machine, except the ones required by local actions. +# Commenting out for now as it may be causing builds to flake. +#build --remote_download_minimal + +# general test options - note that test inherits from build, so these are in addition to the ones +# above +test --test_output=errors + +build:release --compilation_mode=opt + +build:ci --keep_going +build:ci --announce_rc + +#build:linux --copt="-O1" +#build:linux --copt="-march=skylake" +build:linux --copt="-fvisibility=hidden" +build:linux --copt="-fno-omit-frame-pointer" # for friendlier stack traces +build:linux --copt="-Wno-error" +build:linux --copt="-mavx" +build:linux --cxxopt="-std=c++17" +build:linux --linkopt="-lm" +build:linux --linkopt="-latomic" +build:linux --linkopt="-ldl" + +build:linux-release --config=release +build:linux-release --config=linux +build:linux-release --copt="-O3" + +build:macos --copt="-fvisibility=hidden" +build:macos --copt="-Wno-error" +build:macos --cxxopt="-std=c++17" + +build:windows --cxxopt="/std:c++17" +# Disables wingdi.h, which avoids defining a macro called ERROR. +build:windows --cxxopt="/DNOGDI" +build:windows --features=static_link_msvcrt +# We fix the temp directory, as otherwise it is different across different Windows BK agents, which causes +# the remote cache to never be hit due to differing build graph hashes. +build:windows --action_env TMP=C:/Windows/Temp +build:windows --action_env TEMP=C:/Windows/Temp + +# Config for when tests are running in a "slow" environment such as Valgrind or TSan +build:slow-tests --copt="-DIMPROBABLE_SLOW_TEST" + +# Valgrind config. +build:valgrind-memcheck --config=slow-tests +test:valgrind-memcheck --run_under=//tools/runners/sanitizers/valgrind-memcheck + +# Sanitizer configs; for an overview of the sanitizers, see https://github.com/google/sanitizers/wiki +# For more specific information on sanitizers: +# - https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html +# - https://clang.llvm.org/docs/AddressSanitizer.html +# - https://clang.llvm.org/docs/ThreadSanitizer.html +build:base-sanitizer --copt="-fno-omit-frame-pointer" # for friendlier stack traces +build:base-sanitiser -strip=never + +build:asan --config=base-sanitizer +build:asan --copt="-O1" +build:asan --copt="-fno-optimize-sibling-calls" +build:asan --copt="-fsanitize=address" +build:asan --linkopt="-fsanitize=address" +test:asan --test_env="ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-9/bin/llvm-symbolizer" +test:asan --run_under=//tools/runners/sanitizers/asan + +build:tsan --config=base-sanitizer +build:tsan --config=slow-tests +build:tsan --copt="-O1" +build:tsan --copt="-fno-optimize-sibling-calls" +build:tsan --copt="-fsanitize=thread" +build:tsan --linkopt="-fsanitize=thread" +#test:tsan --test_env="TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-9/bin/llvm-symbolizer" +test:tsan --run_under=//tools/runners/sanitizers/tsan + +build:ubsan --config=base-sanitizer +build:ubsan --copt="-O1" +build:ubsan --copt="-fsanitize=undefined" +build:ubsan --copt="-fno-sanitize-recover=all" +# Since Bazel uses clang instead of clang++, enabling -fsanitize=vptr would +# require extra linkopts that cause segmentation faults on pure C code. +build:ubsan --copt="-fno-sanitize=function" +build:ubsan --linkopt="-fsanitize=undefined" +build:ubsan --linkopt="-lubsan" +test:ubsan --run_under=//tools/runners/sanitizers/ubsan + +# MSAN is disabled for now, as there are false positives and we can't suppress them easily. +#build:msan --config=base-sanitizer +#build:msan --copt="-fsanitize=memory" +#build:msan --linkopt="-fsanitize=memory" +#test:msan --run_under=//tools/runners/sanitizers/msan + +build:lint --define linting_only=true diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..14d4b38b --- /dev/null +++ b/.clang-format @@ -0,0 +1,158 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -2 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: true +BreakConstructorInitializers: BeforeComma +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 0 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '".*"' + Priority: 1 + - Regex: '^' + Priority: 3 + - Regex: '^<.*>' + Priority: 5 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +PenaltyBreakAssignment: 1 +PenaltyBreakBeforeFirstCallParameter: 10 +PenaltyBreakComment: 400 +PenaltyBreakFirstLessLess: 200 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 1000000 +PointerAlignment: Left +RawStringFormats: + - Language: TextProto + Delimiters: + - 'pb' + BasedOnStyle: google +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +--- +Language: Proto +BasedOnStyle: Google +ColumnLimit: 100 +--- +Language: Java +BasedOnStyle: Google +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakAfterJavaFieldAnnotations: true +BreakStringLiterals: true +ColumnLimit: 120 +ContinuationIndentWidth: 4 +ExperimentalAutoDetectBinPacking: false +... diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d5868fdf --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.* +!.bazelrc +!.clang-format +!.gitignore +bazel-* +!bazel-*.sh +compile_commands.json +perf.data* +build + diff --git a/BUILD b/BUILD new file mode 100644 index 00000000..91b986e5 --- /dev/null +++ b/BUILD @@ -0,0 +1,54 @@ +package(default_visibility = ["//visibility:public"]) + +# Platform configuration definitions for select() + +config_setting( + name = "linux", + constraint_values = ["@bazel_tools//platforms:linux"], +) + +config_setting( + name = "macos", + constraint_values = ["@bazel_tools//platforms:osx"], +) + +config_setting( + name = "windows", + constraint_values = ["@bazel_tools//platforms:windows"], +) + +config_setting( + name = "windows_debug", + constraint_values = ["@bazel_tools//platforms:windows"], + values = { + "compilation_mode": "dbg", + }, +) + +config_setting( + name = "windows_release", + constraint_values = ["@bazel_tools//platforms:windows"], + values = { + "compilation_mode": "opt", + }, +) + +# Buildifier + +sh_binary( + name = "buildifier", + srcs = select( + { + ":linux": ["@buildifier_linux//file"], + ":macos": ["@buildifier_macos//file"], + ":windows": ["@buildifier_windows//file"], + }, + ), +) + +# Aspect-based clang-format + +filegroup( + name = "dot_clang_format", + srcs = [".clang-format"], +) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..3abdd2bb --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,25 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## 0.1.0 - 2020-07-02 +### Added +- Initial version. + +### Changed +- Everything. + +### Removed +- Nothing. + +### Fixed +- Nothing. + + +[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...HEAD +[1.0.0]: https://github.com/improbable-eng/phtree-cpp/compare/v0.1.0...v1.0.0 +[0.2.0]: https://github.com/improbable-eng/phtree-cpp/compare/v0.1.0...v0.2.0 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..e755db68 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14) + +# set the project name +project(PH_Tree_Main VERSION 0.1.0 + DESCRIPTION "PH-Tree C++" + LANGUAGES CXX) + +# specify the C++ standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3") + +add_subdirectory(phtree) +add_subdirectory(examples) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..f5d7a1fc --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at xxx@improbable.io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..98139941 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# Contributing + +We would really like to have people help and contribute back to the project. + +We hope you do not mind signing our CLA first. + + +## Style Guide + +This project folows mostly the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e46c5961 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Improbable Worlds Limited + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 9f758192..1acd77b3 100644 --- a/README.md +++ b/README.md @@ -1 +1,249 @@ -# phtree-cpp +[![Build status](TODO)](TODO) + +# PH-Tree C++ + +The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each +dimension is (by default) indexed by a 64 bit integer. The index order follows z-order / Morton +order. The default implementation is effectively a 'map', i.e. each key is associated with at most one value. +Keys are points or boxes in n-dimensional space. + + + +One strength of PH-Trees are fast insert/removal operations and scalability with large datasets. +It also provides fast window queries and _k_-nearest neighbor queries, and it scales well with higher dimenions. +The default implementation is limited to 63 dimensions. + +The API ist mostly analogous to STL's `std::map`, see function descriptions for details. + +See also : +- T. Zaeschke, C. Zimmerli, M.C. Norrie: + "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) +- T. Zaeschke: "The PH-Tree Revisited", (2015) +- T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). + +More information about PH-Trees (including a Java implementation) is available [here](http://www.phtree.org). + + +## Usage + +#### Key Types + +The PH-Tree supports three types of keys: +- `PhTreeD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. +- `PhTreeBoxD` uses `PhBoxD` keys, which consist of two `PhPointD` that define an axis-aligned rectangle/box. +- `PhTree` uses `PhPoint` keys, which are vectors/points of `std::int64` + +#### Basic Operations +```C++ +class MyData { ... }; +MyData my_data; + +// Create a 3D point tree with floating point coordinates and a value type of `MyData`. +auto tree = PhTreeD<3, MyData>(); + +// Create coordinate +PhPointD<3> p{1.1, 1.0, 10.}; + +// Some operations +tree.emplace(p, my_data); +tree.insert(p, my_data); +tree[p] = my_data; +tree.count(p); +tree.find(p); +tree.erase(p); +tree.size(); +tree.empty(); +tree.clear(); +``` + +#### Queries + +* Iterator over all elements: `auto q = tree.begin();` +* Iterator for box shaped window queries: `auto q = tree.begin_query(min, max);` +* Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point);` + +```C++ +// Iterate over all entries +for (auto it : tree) { + ... +} + +// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}); it != tree.end(); ++it) { + ... +} + +// Find 5 nearest neighbors of (1,1,1) +for (auto it = tree.begin_knn_query(5, {1, 1, 1}); it != tree.end(); ++it) { + ... +} +``` + +All queries allow specifying an additional filter. The filter is called for every key/value pair that the would +normally be returned (subject to query constraints) and to every node in the tree that the query decides to +traverse (also subject to query constraints). Returning `true` in the filter does not change query behaviour, +returning `false` means that the current value or child node is not returned or traversed. +An example of a geometric filter can be found in `phtree/common/ph_filter.h` in `PhFilterAABB`. +```C++ +template +struct FilterByValueId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + // Arbitrary example: Only allow values with even values of id_ + return value.id_ % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // Allow all nodes + return true; + } +}; + +// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3). +// Return only entries that suffice the filter condition. +for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}, FilterByValueId<3, T>())); it != tree.end(); ++it) { + ... +} +``` + +Nearest neighbor queries can also use custom distance metrics, such as L1 distance. +Note that this returns a special iterator that provides a function to get the distance of the +current entry: +```C++ +#include "phtree/phtree_d.h" + +// Find 5 nearest neighbors of (1,1,1) using L1 distance +for (auto it = tree.begin_knn_query(5, {1, 1, 1}, PhDistanceLongL1<3>())); it != tree.end(); ++it) { + std::cout << "distance = " << it.distance() << std::endl; + ... +} + +``` + +#### Pre- & Post-Processors +The PH-Tree can internally only process integer keys. In order to use floating point coordinates, the floating point +coordinates must be converted to integer coordinates. The `PhTreeD` and `PhTreeBoxD` use by default the +`PreprocessIEEE` & `PostProcessIEEE` functions. The `IEEE` processor is a loss-less converter (in term of numeric +precision) that simply takes the 64bits of a double value and treats them as if they were a 64bit integer +(it is slightly more complicated than that, see discussion in the papers referenced above). +In other words, it treats the IEEE 754 representation of the double value as integer, hence the name `IEEE` converter. + +The `IEEE` conversion is fast and reversible without loss of precision. However, it has been shown that other +converters can result in indexes that are up to 20% faster. +One useful alternative is a `Multiply` converter that convert floating point to integer by multiplication +and casting: +```C++ +double my_float = ...; +// Convert to int +std::int64_t my_int = (std::int64_t) my_float * 1000000.; + +// Convert back +double resultung_float = ((double)my_int) / 1000000.; +``` +It is obvious that this approach leads to a loss of numerical precision. Moreover, the loss of precision depends +on the actual range of the double values and the constant. +The chosen constant should probably be as large as possible, but small enough such that converted +values do not exceed the 64bit limit of `std::int64_4`. + +```C++ +static const double MY_MULTIPLIER = 1000000.; +static const double MY_DIVIDER = 1./MY_MULTIPLIER; + +template +PhPoint PreprocessMultiply(const PhPointD& point) { + PhPoint out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = point[i] * MY_MULTIPLIER; + } + return out; +} + +template +PhPointD PostprocessMultiply(const PhPoint& in) { + PhPointD out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = ((double)in[i]) * MY_DIVIDER; + } + return out; +} + +template +using MyTree = PhTreeD, PreprocessMultiply, PostprocessMultiply>; +``` + +It is also worth trying out constants that are 1 or 2 orders of magnitude smaller or larger than this maximum value. +Experience shows that this may affect query performance by up to 10%. The reason for this is currently unknown. + + + + +#### Restrictions + +* **C++**: Supports value types of `T` and `T*`, but not `T&` +* **C++**: Return types of `find()`, `emplace()`, ... differ slightly from `std::map`, they have function `first()`, `second()` instead of fields of the same name. +* **General**: PH-Trees are **maps**, i.e. each coordinate can hold only *one* entry. In order to hold multiple coordinates per entry, one needs to insert lists or hashmaps as values. +* **General**: PH-Trees order entries internally in z-order (Morton order). However, the order is based on the (unsigned) bit represenation of keys, so negative coordinates are returned *after* positive coordinates. +* **General**: The current implementation support between 2 and 63 dimensions. +* **Differences to std::map**: There are several differences to `std::map`. Most notably for the iterators: + * `begin()`/`end()` are not comparable with `<` or `>`. Only `it == tree.end()` and `it != tree.end()` is supported. + * Value of `end()`: The tree has no linear memory layout, so there is no useful definition of a pointer pointing _after_ the last entry or any entry. This should be irrelevant for normal usage. + + +## Compiling the PH-Tree + +This section will guide you through the initial build system and IDE you need to go through in order to build and run custom versions of the PH-Tree on your machine. + +### Build system & dependencies + +PH-Tree can be built with *cmake 3.14* or [Bazel](https://bazel.build) as build system. All of the code is written in C++ targeting the C++17 standard. +The code has been verified to compile with Clang 9 on Linux and Visual Studio 2019 on Windows. + +#### Ubuntu Linux + +Installing clang & bazel: +``` +echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list +curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - +curl https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - +sudo apt-add-repository 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main' +sudo apt-get update +sudo apt-get install clang-9 bazel +``` + +To install [*cmake*](https://launchpad.net/~hnakamur/+archive/ubuntu/cmake): +``` +sudo add-apt-repository ppa:hnakamur/libarchive +sudo add-apt-repository ppa:hnakamur/libzstd +sudo add-apt-repository ppa:hnakamur/cmake +sudo apt update +sudo apt install cmake +``` + +#### Windows + +To build on Windows, you'll need to have a version of Visual Studio 2019 installed (likely Professional), in addition to the latest version of +[Bazel](https://docs.bazel.build/versions/master/windows.html). + + +### Bazel +Once you have set up your dependencies, you should be able to build the PH-Tree repository by running: +``` +bazel build ... +``` + +Similarly, you can run all unit tests with: +``` +bazel test ... +``` + +### cmake +``` +mkdir build +cd build +cmake .. +cmake --build . +./example/Example +``` + +## Troubleshooting + +**Problem**: The PH-Tree appears to be losing updates/insertions. +**Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 00000000..789ab5b5 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,80 @@ +# Bazel bootstrapping + +load("//tools/build_rules:http.bzl", "http_archive", "http_file") + +http_archive( + name = "bazel_skylib", + sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", + url = "https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz", +) + +load("@bazel_skylib//lib:versions.bzl", "versions") + +versions.check( + minimum_bazel_version = "2.0.0", + maximum_bazel_version = "2.0.0", +) + +# NOTE: We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without +# having all targets defined in third-party BUILD files in that directory buildable. +local_repository( + name = "third_party", + path = "third_party", +) + +# External PH-Tree dependencies + +http_archive( + name = "spdlog", + build_file = "@third_party//spdlog:BUILD", + sha256 = "160845266e94db1d4922ef755637f6901266731c4cb3b30b45bf41efa0e6ab70", + strip_prefix = "spdlog-1.3.1", + url = "https://github.com/gabime/spdlog/archive/v1.3.1.tar.gz", +) + +http_archive( + name = "gbenchmark", + sha256 = "3c6a165b6ecc948967a1ead710d4a181d7b0fbcaa183ef7ea84604994966221a", + strip_prefix = "benchmark-1.5.0", + url = "https://github.com/google/benchmark/archive/v1.5.0.tar.gz", +) + +http_archive( + name = "gtest", + build_file = "@third_party//gtest:BUILD", + sha256 = "9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb", + strip_prefix = "googletest-release-1.10.0", + url = "https://github.com/google/googletest/archive/release-1.10.0.tar.gz", +) + +# Development environment tooling + +BUILDIFIER_VERSION = "0.29.0" + +http_file( + name = "buildifier_linux", + executable = True, + sha256 = "4c985c883eafdde9c0e8cf3c8595b8bfdf32e77571c369bf8ddae83b042028d6", + urls = ["https://github.com/bazelbuild/buildtools/releases/download/{version}/buildifier".format(version = BUILDIFIER_VERSION)], +) + +http_file( + name = "buildifier_macos", + executable = True, + sha256 = "9b108decaa9a624fbac65285e529994088c5d15fecc1a30866afc03a48619245", + urls = ["https://github.com/bazelbuild/buildtools/releases/download/{version}/buildifier.mac".format(version = BUILDIFIER_VERSION)], +) + +http_file( + name = "buildifier_windows", + executable = True, + sha256 = "dc5d6ed5e3e0dbe9955f7606939c627af5a2be7f9bdd8814e77a22109164394f", + urls = ["https://github.com/bazelbuild/buildtools/releases/download/{version}/buildifier.exe".format(version = BUILDIFIER_VERSION)], +) + +http_archive( + name = "bazel_compilation_database", + sha256 = "bb1b812396e2ee36a50a13b03ae6833173ce643e8a4bd50731067d0b4e5c6e86", + strip_prefix = "bazel-compilation-database-0.3.5", + url = "https://github.com/grailbio/bazel-compilation-database/archive/0.3.5.tar.gz", +) diff --git a/ci/includes/bazel.sh b/ci/includes/bazel.sh new file mode 100755 index 00000000..a2aa3c85 --- /dev/null +++ b/ci/includes/bazel.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +source ci/includes/os.sh + +# Main function that should be used by scripts sourcing this file. +function runBazel() { + BAZEL_SUBCOMMAND="$1" + shift + "$(pwd)/tools/bazel" "$BAZEL_SUBCOMMAND" ${BAZEL_CI_CONFIG:-} "$@" +} + +function getBazelVersion() { + echo "2.0.0" +} diff --git a/ci/includes/os.sh b/ci/includes/os.sh new file mode 100755 index 00000000..a4e1e1cb --- /dev/null +++ b/ci/includes/os.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +function isLinux() { + [[ "$(uname -s)" == "Linux" ]]; +} + +function isMacOS() { + [[ "$(uname -s)" == "Darwin" ]]; +} + +function isWindows() { + ! ( isLinux || isMacOS ); +} diff --git a/ci/linting/buildifier.sh b/ci/linting/buildifier.sh new file mode 100755 index 00000000..1be7b1c3 --- /dev/null +++ b/ci/linting/buildifier.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -x -e -u -o pipefail + +cd "$(dirname "$0")/../../" + +source ci/includes/bazel.sh +source ci/includes/os.sh + +MAYBEARG='-mode=check' +if [ $# -eq 1 ]; then + if [ "$1" = "-fix" ]; then + echo -e "\033[0;34mAttempting to fix linting errors automatically as '-fix' is specified.\033[0m" + MAYBEARG='' + fi +fi + +# Ensure Bazel is installed. +runBazel version + +if runBazel run buildifier -- ${MAYBEARG} -v $(find "$(pwd)/" \( -name BUILD -o -name WORKSPACE \) -type f); then + echo -e "\033[0;32mAll BUILD and WORKSPACE files passed buildifier linting check.\033[0m" +else + echo -e "\033[0;31mThe above listed BUILD and WORKSPACE file(s) didn't pass the buildifier linting check!\033[0m" + echo -e "\033[0;34mYou can run 'ci/linting/buildifier.sh -fix' to fix them automatically.\033[0m" + exit 1 +fi + diff --git a/ci/linting/clang-format.sh b/ci/linting/clang-format.sh new file mode 100755 index 00000000..3ef66c29 --- /dev/null +++ b/ci/linting/clang-format.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +set -e -u -o pipefail + +source ci/includes/os.sh +source ci/includes/bazel.sh + +TARGETS="//..." +EXCLUDED_TARGETS="" + +# NOTE: We need to set the 'MSYS_ARG_CONV_EXCL' environment variable in various places, because +# otherwise on Windows, MSYS2 does weird things with Bazel target paths (see +# http://www.mingw.org/wiki/Posix_path_conversion). +export MSYS2_ARG_CONV_EXCL="//..." + +# Function to join arrays with a specified string. +# Taken from: https://stackoverflow.com/questions/1527049/how-can-i-join-elements-of-an-array-in-bash#comment37571340_17841619 +function joinBy { perl -e '$s = shift @ARGV; print join($s, @ARGV);' "$@"; } + +function clangFormatLocation() { + local CLANG_FORMAT_VERSION=9.0.0 + # Use find to get the path for either clang-format (macOS / Linux) or clang-format.exe (Windows) + local CLANG_FORMAT_EXE=clang-format + local CLANG_FORMAT_SHIM=clang-format + if isWindows; then + CLANG_FORMAT_EXE=clang-format.exe + fi + + CLANG_FORMAT_VERSION_CALL="$("${CLANG_FORMAT_SHIM}" -version)" # Ensures that the binary is downloaded. + + local CLANG_FORMAT=CLANG_FORMAT_EXE + if [[ ${CLANG_FORMAT} == "" ]]; then + echo "ERROR: could not locate clang-format" + echo "clang-format -version: ${CLANG_FORMAT_VERSION_CALL}" + echo "which clang-format: $(which clang-format)" + exit 1 + fi + echo "${CLANG_FORMAT}" +} + +# Generates a string of the form `... -...` +function generateBuildTargetString() { + TARGET_STRING="${TARGETS}" + + # Append target exclusions only if there are excluded targets. + if ! [[ -z "${EXCLUDED_TARGETS}" ]]; then + TARGET_STRING="${TARGET_STRING} -$(joinBy " -" ${EXCLUDED_TARGETS})" + fi + + echo "${TARGET_STRING}" +} + +# Generates a string of the form ` [union ]* except ( [union ]*)` +# i.e. worker_sdk/... union applications except (worker_sdk/common:some_target union worker_sdk/common:some_other_target) +function generateAqueryTargetString() { + TARGET_STRING="$(joinBy " union " ${TARGETS})" + + # Append target exclusions only if there are excluded targets. + if ! [[ -z "${EXCLUDED_TARGETS}" ]]; then + TARGET_STRING="${TARGET_STRING} except ($(joinBy " union " ${EXCLUDED_TARGETS}))" + fi + + echo "${TARGET_STRING}" +} + +function bazelLintTest() { + # Use bazel to create patch files for all eligible source files. + # Fail if any of the patch files are non-empty (i.e. lint was detected). + CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_test -- $(generateBuildTargetString) +} + +function bazelLintFix() { + # Use bazel to create patch files for all eligible source files. + CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_patches_only -- $(generateBuildTargetString) + + # Find bazel-bin prefix. + BAZEL_BIN=$(runBazel info bazel-bin) + # I.e. on Linux, this is `bazel-out/k8-gcc-opt/bin`. + PREFIX=${BAZEL_BIN#$(runBazel info execution_root)/} + + # Use aquery to get the list of output files of the `CreatePatch` action, + # Then strip the patch path down to that of its source file, and apply + # the patch file generated by Bazel to the original source file. + CLANG_FORMAT="$(clangFormatLocation)" runBazel aquery --config lint --include_aspects --output_groups clang_format_patches_only "mnemonic(\"CreatePatch\", $(generateAqueryTargetString))" --output textproto \ + `# Get relative paths to source files` \ + `# perl used instead of grep --perl-regexp since grep macOS doesnt support it` \ + | perl -ne "while(/(?<=exec_path: \"${PREFIX//\//\\/}\/).*\.patch_.+(?=\")/g){print \"\$&\n\";}" \ + `# Create the patch commands which, when executed patch the source files.` \ + `# --binary flag used to correctly handle line endings on Windows.` \ + | xargs -L1 -I 'PATH_TO_SOURCE_FILE' echo 'FILE_PATH="PATH_TO_SOURCE_FILE"; echo patch --binary "${FILE_PATH%.patch_*}" "'${BAZEL_BIN}'/${FILE_PATH}"' \ + `# De-duplicate the patch commands, such that there is at most one patch command for each source file.` \ + `# There are N patch files per source files, where N is the number of bazel targets directly including the target.` \ + `# The format of the commands being de-duplicated is: patch --binary -source file- -patch file-` \ + | sh `# Calls the echo command generated by xargs, resulting in the patch command to be run by the next sh invocation below`\ + | sort --unique --field-separator=" " --key=3,3 `# Remove duplicate patch commands to the same source file` \ + | sh `# Execute patch commands` +} + +MAYBEARG='-mode=check' +if [ $# -eq 1 ]; then + if [ "$1" = "-fix" ]; then + MAYBEARG='' + fi +fi + +if [[ -z "$MAYBEARG" ]]; then + echo -e "\033[0;34mAttempting to fix linting errors automatically as '-fix' is specified.\033[0m" + bazelLintFix +elif bazelLintTest; then + echo -e "\033[0;32mAll source files passed clang-format linting check.\033[0m" +else + echo -e "\033[0;31mThe above listed source file(s) didn't pass the clang-format linting check!\033[0m" + echo -e "\033[0;34mYou can run 'ci/linting/clang-format.sh -fix' to fix them automatically.\033[0m" + exit 1 +fi diff --git a/examples/BUILD b/examples/BUILD new file mode 100644 index 00000000..56f61fe1 --- /dev/null +++ b/examples/BUILD @@ -0,0 +1,12 @@ +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "example", + srcs = ["example.cc"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//phtree", + ], +) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..370887f6 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.14) +project(Example) + +set(SOURCE_FILES example.cc) +add_executable(Example ${SOURCE_FILES}) +target_link_libraries(Example phtree) diff --git a/examples/example.cc b/examples/example.cc new file mode 100644 index 00000000..9dad4b07 --- /dev/null +++ b/examples/example.cc @@ -0,0 +1,58 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../phtree/phtree_d.h" +#include + +using namespace improbable::phtree; + +int main() { + std::cout << "PH-Tree example with 3D `double` coordinates." << std::endl; + PhPointD<3> p1({1, 1, 1}); + PhPointD<3> p2({2, 2, 2}); + PhPointD<3> p3({3, 3, 3}); + PhPointD<3> p4({4, 4, 4}); + + PhTreeD<3, int> tree; + tree.emplace(p1, 1); + tree.emplace(p2, 2); + tree.emplace(p3, 3); + tree.emplace(p4, 4); + + std::cout << "All values:" << std::endl; + for (auto it : tree) { + std::cout << " id=" << it << std::endl; + } + std::cout << std::endl; + + std::cout << "All points in range:" << p1 << "/" << p2 << std::endl; + for (auto it = tree.begin_query(p2, p4); it != tree.end(); ++it) { + std::cout << " " << it.second() << " -> " << it.first() << std::endl; + } + std::cout << std::endl; + + std::cout << "PH-Tree is a MAP which means that, like std::map, every position " << std::endl; + std::cout << " (=key) can have only ONE value." << std::endl; + std::cout << "Storing multiple values for a single coordinate requires storing " << std::endl; + std::cout << "lists or sets, for example using PhTree<3, std::vector>." << std::endl; + + PhPointD<3> p4b({4, 4, 4}); + tree.emplace(p4b, 5); + // Still showing '4' after emplace() + std::cout << "ID at " << p4b << ": " << tree.find(p4b).second() << std::endl; + + std::cout << "Done." << std::endl; +} \ No newline at end of file diff --git a/phtree/BUILD b/phtree/BUILD new file mode 100644 index 00000000..f94404b6 --- /dev/null +++ b/phtree/BUILD @@ -0,0 +1,97 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "phtree", + srcs = [ + ], + hdrs = [ + "phtree.h", + "phtree_box_d.h", + "phtree_d.h", + ], + linkstatic = True, + visibility = [ + "//visibility:public", + ], + deps = [ + "//phtree/v16", + ], +) + +cc_test( + name = "phtree_test", + timeout = "long", + srcs = [ + "phtree_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_const_values", + timeout = "long", + srcs = [ + "phtree_test_const_values.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_ptr_values", + timeout = "long", + srcs = [ + "phtree_test_ptr_values.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test", + timeout = "long", + srcs = [ + "phtree_d_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_preprocessor", + timeout = "long", + srcs = [ + "phtree_d_test_preprocessor.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test", + timeout = "long", + srcs = [ + "phtree_box_d_test.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) diff --git a/phtree/CMakeLists.txt b/phtree/CMakeLists.txt new file mode 100644 index 00000000..53761cd5 --- /dev/null +++ b/phtree/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree) + +add_library(phtree STATIC "") +add_subdirectory(common) +add_subdirectory(v16) + +set_target_properties(phtree PROPERTIES LINKER_LANGUAGE CXX) + diff --git a/phtree/benchmark/BUILD b/phtree/benchmark/BUILD new file mode 100644 index 00000000..37d15f7d --- /dev/null +++ b/phtree/benchmark/BUILD @@ -0,0 +1,229 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "benchmark", + testonly = True, + srcs = [ + ], + hdrs = [ + "benchmark_util.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "@gbenchmark//:benchmark", + "@spdlog", + ], + alwayslink = 1, +) + +cc_binary( + name = "erase_benchmark", + testonly = True, + srcs = [ + "erase_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "erase_d_benchmark", + testonly = True, + srcs = [ + "erase_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "extent_benchmark", + testonly = True, + srcs = [ + "extent_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "extent_benchmark_weird", + testonly = True, + srcs = [ + "extent_benchmark_weird.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "find_benchmark", + testonly = True, + srcs = [ + "find_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "insert_benchmark", + testonly = True, + srcs = [ + "insert_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "insert_d_benchmark", + testonly = True, + srcs = [ + "insert_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "insert_box_d_benchmark", + testonly = True, + srcs = [ + "insert_box_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "knn_d_benchmark", + testonly = True, + srcs = [ + "knn_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_benchmark", + testonly = True, + srcs = [ + "query_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_box_d_benchmark", + testonly = True, + srcs = [ + "query_box_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_d_benchmark", + testonly = True, + srcs = [ + "query_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "update_d_benchmark", + testonly = True, + srcs = [ + "update_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "update_box_d_benchmark", + testonly = True, + srcs = [ + "update_box_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) diff --git a/phtree/benchmark/benchmark_util.h b/phtree/benchmark/benchmark_util.h new file mode 100644 index 00000000..7776ec75 --- /dev/null +++ b/phtree/benchmark/benchmark_util.h @@ -0,0 +1,157 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_BENCHMARK_UTIL_H +#define PHTREE_BENCHMARK_UTIL_H + +#include "phtree/common/ph_common.h" +#include +#include +#include +#include + +namespace improbable::phtree::phbenchmark { + +using namespace improbable::phtree; + +namespace { +template +auto CreateDataCUBE = [](auto& points, + size_t num_entities, + std::uint32_t seed, + double world_length, + auto set_coordinate) { + std::default_random_engine random_engine{seed}; + std::uniform_real_distribution<> distribution(0, world_length); + for (size_t i = 0; i < num_entities; ++i) { + auto& p = points[i]; + for (dimension_t d = 0; d < DIM; ++d) { + set_coordinate(p, d, distribution(random_engine)); + } + } +}; + +template +auto CreateDataCLUSTER = [](auto& points, + size_t num_entities, + std::uint32_t seed, + double world_length, + auto set_coordinate) { + std::default_random_engine random_engine{seed}; + std::uniform_real_distribution<> distribution(0, world_length); + // SIGMA = 0.0001 + std::normal_distribution<> gauss_distribution(0, 0.0001); + const int NUM_PT_PER_CLUSTER = 100; + // 1000 points per cluster, minimum is 1 cluster. + size_t num_cluster = std::max(1, (int)(num_entities / NUM_PT_PER_CLUSTER)); + + // loop over clusters + PhPointD cp; // center point of cluster + size_t id = 0; + for (int c = 0; c < num_cluster; ++c) { + for (dimension_t d = 0; d < DIM; ++d) { + cp[d] = distribution(random_engine); + } + for (size_t i = 0; i < NUM_PT_PER_CLUSTER; ++i) { + auto& p = points[id++]; + // int ii = (c * N_C + i) * DIM; + for (dimension_t d = 0; d < DIM; ++d) { + // double x = (R.nextGaussian() - 0.5) * GAUSS_SIGMA; // confine to small rectangle + double x = gauss_distribution(random_engine); + x *= world_length; // stretch if domain>1.0 + x += cp[d]; // offset of cluster + set_coordinate(p, d, x); + } + } + } +}; + +auto CreateDuplicates = [](auto& points, size_t num_entities, std::uint32_t seed) { + std::default_random_engine random_engine{seed}; + std::uniform_int_distribution<> distribution(0, points.size()); + for (int i = points.size(); i < num_entities; ++i) { + // copy some random other point or box + points[i] = points[distribution(random_engine)]; + } +}; +} // namespace + +enum TestGenerator { CUBE, CLUSTER }; + +template +auto CreatePointData = [](auto& points, + TestGenerator test_generator, + size_t num_entities, + int seed, + double world_length, + double fraction_of_duplicates = 0.) { + auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { p[dim] = value; }; + // Create at least 1 unique point + // Note that the following point generator is likely, but not guaranteed, to created unique + // points. + int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + points.reserve(num_entities); + switch (test_generator) { + case CUBE: + CreateDataCUBE(points, num_unique_entries, seed, world_length, set_coordinate_lambda); + break; + case CLUSTER: + CreateDataCLUSTER( + points, num_unique_entries, seed, world_length, set_coordinate_lambda); + break; + default: + assert(false); + } + + // Create duplicates + CreateDuplicates(points, num_entities, seed); +}; + +template +auto CreateBoxData = [](auto& points, + TestGenerator test_generator, + size_t num_entities, + int seed, + double world_length, + double box_length, + double fraction_of_duplicates = 0.) { + auto set_coordinate_lambda = [box_length](auto& p, dimension_t dim, auto value) { + p.min()[dim] = value; + p.max()[dim] = value + box_length; + }; + // Create at least 1 unique point + // Note that the following point generator is likely, but not guaranteed, to created unique + // points. + int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + points.reserve(num_entities); + switch (test_generator) { + case CUBE: + CreateDataCUBE(points, num_unique_entries, seed, world_length, set_coordinate_lambda); + break; + case CLUSTER: + CreateDataCLUSTER( + points, num_unique_entries, seed, world_length, set_coordinate_lambda); + break; + default: + assert(false); + } + + // Create duplicates + CreateDuplicates(points, num_entities, seed); +}; +} // namespace improbable::phtree::phbenchmark + +#endif // PHTREE_BENCHMARK_UTIL_H diff --git a/phtree/benchmark/erase_benchmark.cc b/phtree/benchmark/erase_benchmark.cc new file mode 100644 index 00000000..38713a57 --- /dev/null +++ b/phtree/benchmark/erase_benchmark.cc @@ -0,0 +1,153 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTree& tree); + void Remove(benchmark::State& state, PhTree& tree); + + const TestGenerator data_type_; + const int num_entities_; + + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities) +: data_type_{data_type} +, num_entities_(num_entities) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTree(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, PhTree& tree) { + for (int i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { + int n = 0; + for (int i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, REM_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_10M, TestGenerator::CUBE, 10000000) + ->Unit(benchmark::kMillisecond); + +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, REM_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_10M, TestGenerator::CLUSTER, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/erase_d_benchmark.cc b/phtree/benchmark/erase_d_benchmark.cc new file mode 100644 index 00000000..8f55c761 --- /dev/null +++ b/phtree/benchmark/erase_d_benchmark.cc @@ -0,0 +1,154 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const int num_entities_; + + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities) +: data_type_{data_type} +, num_entities_(num_entities) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { + for (int i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + int n = 0; + for (int i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, REM_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CU_10M, TestGenerator::CUBE, 10000000) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, REM_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, REM_CL_10M, TestGenerator::CLUSTER, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/extent_benchmark.cc b/phtree/benchmark/extent_benchmark.cc new file mode 100644 index 00000000..5a8bd9e4 --- /dev/null +++ b/phtree/benchmark/extent_benchmark.cc @@ -0,0 +1,140 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for iterating over all entries in the tree. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state); + + const TestGenerator data_type_; + const int num_entities_; + + PhTree tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities) +: data_type_{data_type} +, num_entities_(num_entities) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + QueryWorld(state); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state) { + int n = 0; + auto q = tree_.begin(); + while (q != tree_.end()) { + // just read the entry + ++q; + ++n; + } + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, EXT_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CL_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/extent_benchmark_weird.cc b/phtree/benchmark/extent_benchmark_weird.cc new file mode 100644 index 00000000..f36984c5 --- /dev/null +++ b/phtree/benchmark/extent_benchmark_weird.cc @@ -0,0 +1,282 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for iterating over all entries while using a filter. + * + * TODO + * This benchmarks shows some weird behaviour, see below. + * It should probably be removed at some point. + */ + +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, int type = 0); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state); + + const TestGenerator data_type_; + const int num_entities_; + + PhTree tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; + int type_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, int type) +: data_type_{data_type} +, num_entities_(num_entities) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) +, type_{type} { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + QueryWorld(state); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template < + dimension_t DIM, + typename KEY = PhPoint, + PhPreprocessor PRE = PrePostNoOp> +class PhFilterBoxIntersection { + public: + PhFilterBoxIntersection(const PhPoint& minInclude, const PhPoint& maxInclude) + : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; + + void set(const PhPointD& minExclude, const PhPointD& maxExclude) { + minIncludeBits = PRE(minExclude); + maxIncludeBits = PRE(maxExclude); + } + + [[nodiscard]] bool IsEntryValid(const PhPoint& key, const int& value) const { + for (int i = 0; i < DIM; ++i) { + if (key[i] < minIncludeBits[i] || key[i] > maxIncludeBits[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // skip this for root node (bitsToIgnore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + bit_mask_t maskMin = MAX_MASK << bits_to_ignore; + bit_mask_t maskMax = ~maskMin; + + for (size_t i = 0; i < prefix.size(); ++i) { + scalar_t minBits = prefix[i] & maskMin; + scalar_t maxBits = prefix[i] | maskMax; + if (maxBits < minIncludeBits[i] || minBits > maxIncludeBits[i]) { + return false; + } + } + return true; + } + + private: + const PhPoint minIncludeBits; + const PhPoint maxIncludeBits; +}; + +template < + dimension_t DIM, + typename KEY = PhPoint, + PhPreprocessor PRE = PrePostNoOp> +class PhFilterTrue { + public: + PhFilterTrue(const PhPoint& minInclude, const PhPoint& maxInclude) + : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; + + void set(const PhPointD& minExclude, const PhPointD& maxExclude) { + minIncludeBits = PRE(minExclude); + maxIncludeBits = PRE(maxExclude); + } + + [[nodiscard]] bool IsEntryValid(const PhPoint& key, const int& value) const { + return true; + } + + [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } + + private: + const PhPoint minIncludeBits; + const PhPoint maxIncludeBits; +}; + +template < + dimension_t DIM, + typename KEY = PhPoint, + PhPreprocessor PRE = PrePostNoOp> +class PhFilterTrue2 { + public: + PhFilterTrue2() : minIncludeBits{}, maxIncludeBits{} {}; + + [[nodiscard]] bool IsEntryValid(const PhPoint& key, const int& value) const { + return true; + } + + [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } + + private: + const PhPoint minIncludeBits; + const PhPoint maxIncludeBits; +}; + +template +struct PhFilterTrue3 { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return true; + } + + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +template +void IndexBenchmark::QueryWorld(benchmark::State& state) { + int n = 0; + // TODO This is all really weird. + // reenabling one of the following filters has the follwing effects: + // 1) Some of the filters in the first branch will affect performance of the + // second branch (?!?!?!) + // 2) Performance is often different from the second branch if the the filters are + // logically the same. + // Differences are usually between 5% and 15%, but confidence is pretty high + // if the tests at thye end of the file are enabled (notice the somewhat irregular + // pattern of the tests that will find itself clearly in the results: + // Order: 0 1 0 0 1 1 0 1 + // + // Some observations: + // - Whichever test if in the 'if' part is hardly slowed down, but the 'else' + // part is clearly slowed down. + // - Compiling with -falign-functions=32 or -falign-functions=64 did not help + if (type_ == 0) { + // PhPoint min{-GLOBAL_MAX, -GLOBAL_MAX, -GLOBAL_MAX}; + // PhPoint max{GLOBAL_MAX, GLOBAL_MAX, GLOBAL_MAX}; + // PhFilterAABB filter(min, max); + // PhFilterBoxIntersection filter(min, max); + // PhFilterNoOp filter; + // PhFilterTrue filter(min, max); + // PhFilterTrue2 filter; + PhFilterTrue3 filter; + auto q = tree_.begin(filter); + + // auto q = tree_.begin(); + while (q != tree_.end()) { + // just read the entry + ++q; + ++n; + } + } else { + auto q = tree_.begin(); + while (q != tree_.end()) { + // just read the entry + ++q; + ++n; + } + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, filter selector +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 0)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 1)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 0)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 0)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 1)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 1)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 0)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EXT_CU_1K, TestGenerator::CUBE, 1000, 1)->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/find_benchmark.cc b/phtree/benchmark/find_benchmark.cc new file mode 100644 index 00000000..c8341692 --- /dev/null +++ b/phtree/benchmark/find_benchmark.cc @@ -0,0 +1,212 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum QueryType { + FIND, + COUNT, +}; + +/* + * Benchmark for looking up entries by their key. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + int QueryWorldCount(benchmark::State& state); + int QueryWorldFind(benchmark::State& state); + + const TestGenerator data_type_; + const int num_entities_; + const QueryType query_type_; + + PhTree tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type) +: data_type_{data_type} +, num_entities_(num_entities) +, query_type_(query_type) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + int num_inner = 0; + int num_found = 0; + switch (query_type_) { + case COUNT: { + for (auto _ : state) { + num_found += QueryWorldCount(state); + ++num_inner; + } + break; + } + case FIND: { + for (auto _ : state) { + num_found += QueryWorldFind(state); + ++num_inner; + } + break; + } + } + // Moved outside of the loop because EXPENSIVE + state.counters["total_result_count"] += num_found; + state.counters["query_rate"] += num_inner; + state.counters["result_rate"] += num_found; + state.counters["avg_result_count"] += num_found; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +int IndexBenchmark::QueryWorldCount(benchmark::State& state) { + static int pos = 0; + pos = (pos + 1) % num_entities_; + bool found = true; + if (pos % 2 == 0) { + assert(tree_.find(points_.at(pos)) != tree_.end()); + } else { + int x = pos % GLOBAL_MAX; + PhPoint p = PhPoint({x, x, x}); + found = tree_.find(p) != tree_.end(); + } + return found; +} + +template +int IndexBenchmark::QueryWorldFind(benchmark::State& state) { + static int pos = 0; + pos = (pos + 1) % num_entities_; + bool found = true; + if (pos % 2 == 0) { + assert(tree_.find(points_.at(pos)) != tree_.end()); + } else { + int x = pos % GLOBAL_MAX; + PhPoint p = PhPoint({x, x, x}); + found = tree_.find(p) != tree_.end(); + } + return found; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1K, TestGenerator::CUBE, 1000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_10K, TestGenerator::CUBE, 10000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_100K, TestGenerator::CUBE, 100000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1M, TestGenerator::CUBE, 1000000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1K, TestGenerator::CUBE, 1000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CU_10K, TestGenerator::CUBE, 10000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CU_100K, TestGenerator::CUBE, 100000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1M, TestGenerator::CUBE, 1000000, FIND) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_generator, num_entities, function_to_call +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1K, TestGenerator::CLUSTER, 1000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_10K, TestGenerator::CLUSTER, 10000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_100K, TestGenerator::CLUSTER, 100000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1M, TestGenerator::CLUSTER, 1000000, COUNT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1K, TestGenerator::CLUSTER, 1000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CL_10K, TestGenerator::CLUSTER, 10000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CL_100K, TestGenerator::CLUSTER, 100000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1M, TestGenerator::CLUSTER, 1000000, FIND) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_benchmark.cc b/phtree/benchmark/insert_benchmark.cc new file mode 100644 index 00000000..6be2c5cd --- /dev/null +++ b/phtree/benchmark/insert_benchmark.cc @@ -0,0 +1,200 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +enum InsertionType { + INSERT, + EMPLACE, + SQUARE_BR, +}; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + InsertionType insertionType); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void Insert(benchmark::State& state, PhTree& tree); + + const TestGenerator data_type_; + const int num_entities_; + const InsertionType insertion_type_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, InsertionType insertionType) +: data_type_{data_type} +, num_entities_(num_entities) +, insertion_type_(insertionType) +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTree(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, PhTree& tree) { + switch (insertion_type_) { + case INSERT: { + for (int i = 0; i < num_entities_; ++i) { + tree.insert(points_[i], i); + } + break; + } + case EMPLACE: { + for (int i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], i); + } + break; + } + case SQUARE_BR: { + for (int i = 0; i < num_entities_; ++i) { + tree[points_[i]] = i; + } + break; + } + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities, function_to_call +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000, INSERT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000, INSERT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000, INSERT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000, INSERT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000, INSERT) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CU_1K, TestGenerator::CUBE, 1000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CU_10K, TestGenerator::CUBE, 10000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CU_100K, TestGenerator::CUBE, 100000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CU_1M, TestGenerator::CUBE, 1000000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CU_10M, TestGenerator::CUBE, 10000000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, SQB_CU_1K, TestGenerator::CUBE, 1000, SQUARE_BR) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, SQB_CU_10K, TestGenerator::CUBE, 10000, SQUARE_BR) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, SQB_CU_100K, TestGenerator::CUBE, 100000, SQUARE_BR) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, SQB_CU_1M, TestGenerator::CUBE, 1000000, SQUARE_BR) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, SQB_CU_10M, TestGenerator::CUBE, 10000000, SQUARE_BR) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CL_1K, TestGenerator::CLUSTER, 1000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CL_10K, TestGenerator::CLUSTER, 10000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CL_100K, TestGenerator::CLUSTER, 100000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CL_1M, TestGenerator::CLUSTER, 1000000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, EMP_CL_10M, TestGenerator::CLUSTER, 10000000, EMPLACE) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_box_d_benchmark.cc b/phtree/benchmark/insert_box_d_benchmark.cc new file mode 100644 index 00000000..ffdd45c1 --- /dev/null +++ b/phtree/benchmark/insert_box_d_benchmark.cc @@ -0,0 +1,137 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_box_d.h" +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +const double BOX_LEN = 10; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void Insert(benchmark::State& state, PhTreeBoxD& tree); + + const TestGenerator data_type_; + const int num_entities_; + std::vector> boxes_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities) +: data_type_{data_type}, num_entities_(num_entities), boxes_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeBoxD(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, PhTreeBoxD& tree) { + for (int i = 0; i < num_entities_; ++i) { + PhBoxD& p = boxes_[i]; + tree.emplace(p, i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_10M, TestGenerator::CLUSTER, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_d_benchmark.cc b/phtree/benchmark/insert_d_benchmark.cc new file mode 100644 index 00000000..2c7daca8 --- /dev/null +++ b/phtree/benchmark/insert_d_benchmark.cc @@ -0,0 +1,172 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_d.h" +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void Insert(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const int num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities) +: data_type_{data_type}, num_entities_(num_entities), points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { + for (int i = 0; i < num_entities_; ++i) { + PhPointD& p = points_[i]; + tree.emplace(p, i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree6D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<6> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<10> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<20> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1K, TestGenerator::CUBE, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10K, TestGenerator::CUBE, 10000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_100K, TestGenerator::CUBE, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_1M, TestGenerator::CUBE, 1000000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CU_10M, TestGenerator::CUBE, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_1K, TestGenerator::CLUSTER, 1000)->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_10K, TestGenerator::CLUSTER, 10000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, INS_CL_10M, TestGenerator::CLUSTER, 10000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D, INS_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INS_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INS_CL_100K, TestGenerator::CLUSTER, 100000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INS_CL_1M, TestGenerator::CLUSTER, 1000000) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/knn_d_benchmark.cc b/phtree/benchmark/knn_d_benchmark.cc new file mode 100644 index 00000000..131de0a9 --- /dev/null +++ b/phtree/benchmark/knn_d_benchmark.cc @@ -0,0 +1,160 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for k-nearest-neighbour queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, int knn_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, PhPointD& center); + void CreateQuery(PhPointD& center); + + const TestGenerator data_type_; + const int num_entities_; + const double knn_result_size_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, TestGenerator data_type, int num_entities, int knn_result_size) +: data_type_{data_type} +, num_entities_(num_entities) +, knn_result_size_(knn_result_size) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["total_query_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { + int n = 0; + for (auto q = tree_.begin_knn_query(knn_result_size_, center); q != tree_.end(); ++q) { + ++n; + } + + state.counters["total_query_count"] += 1; + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_1_of_10K, TestGenerator::CUBE, 10000, 1) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_1_of_1M, TestGenerator::CUBE, 1000000, 1) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_10_of_10K, TestGenerator::CUBE, 10000, 10) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_10_of_1M, TestGenerator::CUBE, 1000000, 10) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_1_of_10K, TestGenerator::CLUSTER, 10000, 1) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_1_of_1M, TestGenerator::CLUSTER, 1000000, 1) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_10_of_10K, TestGenerator::CLUSTER, 10000, 10) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_10_of_1M, TestGenerator::CLUSTER, 1000000, 10) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_benchmark.cc b/phtree/benchmark/query_benchmark.cc new file mode 100644 index 00000000..b8734082 --- /dev/null +++ b/phtree/benchmark/query_benchmark.cc @@ -0,0 +1,178 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void QueryWorld(benchmark::State& state, PhPoint& min, PhPoint& max); + + void CreateQuery(PhPoint& min, PhPoint& max); + + const TestGenerator data_type_; + const int num_entities_; + const double avg_query_result_size_; + + constexpr int query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + PhTree tree_; + std::default_random_engine random_engine_; + std::uniform_int_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size) +: data_type_{data_type} +, num_entities_(num_entities) +, avg_query_result_size_(avg_query_result_size) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPoint min; + PhPoint max; + CreateQuery(min, max); + state.ResumeTiming(); + + QueryWorld(state, min, max); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld( + benchmark::State& state, PhPoint& min, PhPoint& max) { + int n = 0; + for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { + ++n; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPoint& min, PhPoint& max) { + int length = query_endge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + scalar_t s = cube_distribution_(random_engine_); + s = s * scale; + min[d] = s; + max[d] = s + length; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_box_d_benchmark.cc b/phtree/benchmark/query_box_d_benchmark.cc new file mode 100644 index 00000000..125dccd0 --- /dev/null +++ b/phtree/benchmark/query_box_d_benchmark.cc @@ -0,0 +1,179 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_box_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +const double BOX_LEN = GLOBAL_MAX / 100.; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void QueryWorld(benchmark::State& state, PhPointD& min, PhPointD& max); + + void CreateQuery(PhPointD& min, PhPointD& max); + + const TestGenerator data_type_; + const int num_entities_; + const double avg_query_result_size_; + + constexpr int query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + PhTreeBoxD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> boxes_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size) +: data_type_{data_type} +, num_entities_(num_entities) +, avg_query_result_size_(avg_query_result_size) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, boxes_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD min; + PhPointD max; + CreateQuery(min, max); + state.ResumeTiming(); + + QueryWorld(state, min, max); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(boxes_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld( + benchmark::State& state, PhPointD& min, PhPointD& max) { + int n = 0; + for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { + ++n; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& min, PhPointD& max) { + int length = query_endge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + scalar_t s = cube_distribution_(random_engine_); + s = s * scale; + min[d] = s; + max[d] = s + length; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/query_d_benchmark.cc b/phtree/benchmark/query_d_benchmark.cc new file mode 100644 index 00000000..1cd8202e --- /dev/null +++ b/phtree/benchmark/query_d_benchmark.cc @@ -0,0 +1,178 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + + void QueryWorld(benchmark::State& state, PhPointD& min, PhPointD& max); + + void CreateQuery(PhPointD& min, PhPointD& max); + + const TestGenerator data_type_; + const int num_entities_; + const double avg_query_result_size_; + + constexpr int query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + double avg_query_result_size) +: data_type_{data_type} +, num_entities_(num_entities) +, avg_query_result_size_(avg_query_result_size) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD min; + PhPointD max; + CreateQuery(min, max); + state.ResumeTiming(); + + QueryWorld(state, min, max); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld( + benchmark::State& state, PhPointD& min, PhPointD& max) { + int n = 0; + for (auto q = tree_.begin_query(min, max); q != tree_.end(); ++q) { + ++n; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& min, PhPointD& max) { + int length = query_endge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + scalar_t s = cube_distribution_(random_engine_); + s = s * scale; + min[d] = s; + max[d] = s + length; + } +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CUBE +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1K, TestGenerator::CUBE, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_10K, TestGenerator::CUBE, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_100K, TestGenerator::CUBE, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +// index type, scenario name, data_type, num_entities, query_result_size +// PhTree 3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, WQ_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100.0) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_box_d_benchmark.cc b/phtree/benchmark/update_box_d_benchmark.cc new file mode 100644 index 00000000..e787475a --- /dev/null +++ b/phtree/benchmark/update_box_d_benchmark.cc @@ -0,0 +1,201 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_box_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +const double BOX_LEN = 10; + +template +struct UpdateOp { + scalar_t id_; + PhBoxD old_; + PhBoxD new_; +}; + +/* + * Benchmark for updating the position of entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + int updates_per_round, + double move_distance); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void BuildUpdate(std::vector>& updates); + void UpdateWorld(benchmark::State& state, std::vector>& updates); + + const TestGenerator data_type_; + const int num_entities_; + const int updates_per_round_; + const double move_distance_; + + PhTreeBoxD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> boxes_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + int updates_per_round, + double move_distance) +: data_type_{data_type} +, num_entities_(num_entities) +, updates_per_round_(updates_per_round) +, move_distance_(move_distance) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, boxes_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + std::vector> updates; + updates.reserve(updates_per_round_); + for (auto _ : state) { + state.PauseTiming(); + BuildUpdate(updates); + state.ResumeTiming(); + + UpdateWorld(state, updates); + + state.PauseTiming(); + for (auto& update : updates) { + boxes_[update.id_] = update.new_; + } + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::BuildUpdate(std::vector>& updates) { + // Use Delta to avoid moving points in insertion order (not that it matters for the PH-Tree, but + // we may test other trees as well. + int box_id_increment = num_entities_ / updates_per_round_; // int division + int box_id = 0; + updates.clear(); + for (size_t i = 0; i < updates_per_round_; ++i) { + assert(box_id >= 0); + assert(box_id < boxes_.size()); + auto& old_box = boxes_[box_id]; + auto update = UpdateOp{box_id, old_box, old_box}; + for (dimension_t d = 0; d < DIM; ++d) { + update.new_.min()[d] += move_distance_; + update.new_.max()[d] += move_distance_; + } + updates.emplace_back(update); + box_id += box_id_increment; + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(boxes_[i], i); + } + + state.counters["total_upd_count"] = benchmark::Counter(0); + state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::UpdateWorld( + benchmark::State& state, std::vector>& updates) { + size_t initial_tree_size = tree_.size(); + for (auto& update : updates) { + size_t result_erase = tree_.erase(update.old_); + auto result_emplace = tree_.emplace(update.new_, update.id_); + assert(result_erase == 1); + assert(result_emplace.second); + } + + // For normal indexes we expect num_entities==size(), but the PhTree> index has + // size() as low as (num_entities-duplicates). + if (tree_.size() > num_entities_ || tree_.size() < initial_tree_size - updates_per_round_) { + spdlog::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + } + + state.counters["total_upd_count"] += updates_per_round_; + state.counters["update_rate"] += updates_per_round_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<4> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree3D CUBE +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +// PhTree3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_d_benchmark.cc b/phtree/benchmark/update_d_benchmark.cc new file mode 100644 index 00000000..19865e9a --- /dev/null +++ b/phtree/benchmark/update_d_benchmark.cc @@ -0,0 +1,200 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree_d.h" +#include +#include +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +template +struct UpdateOp { + scalar_t id_; + PhPointD old_; + PhPointD new_; +}; + +/* + * Benchmark for updating the position of entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + int updates_per_round, + double move_distance); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void BuildUpdate(std::vector>& updates); + void UpdateWorld(benchmark::State& state, std::vector>& updates); + + const TestGenerator data_type_; + const int num_entities_; + const int updates_per_round_; + const double move_distance_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, + TestGenerator data_type, + int num_entities, + int updates_per_round, + double move_distance) +: data_type_{data_type} +, num_entities_(num_entities) +, updates_per_round_(updates_per_round) +, move_distance_(move_distance) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::warn); + + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + std::vector> updates; + updates.reserve(updates_per_round_); + for (auto _ : state) { + state.PauseTiming(); + BuildUpdate(updates); + state.ResumeTiming(); + + UpdateWorld(state, updates); + + state.PauseTiming(); + for (auto& update : updates) { + points_[update.id_] = update.new_; + } + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::BuildUpdate(std::vector>& updates) { + // Use Delta to avoid moving points in insertion order (not that it matters for the PH-Tree, but + // we may test other trees as well. + int point_id_increment = num_entities_ / updates_per_round_; // int division + int point_id = 0; + updates.clear(); + for (size_t i = 0; i < updates_per_round_; ++i) { + assert(point_id >= 0); + assert(point_id < points_.size()); + auto& old_point = points_[point_id]; + auto update = UpdateOp{point_id, old_point, {}}; + for (dimension_t d = 0; d < DIM; ++d) { + update.new_[d] = old_point[d] + move_distance_; + } + update.new_[3] = 0; + updates.emplace_back(update); + point_id += point_id_increment; + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + spdlog::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_upd_count"] = benchmark::Counter(0); + state.counters["update_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + spdlog::info("World setup complete."); +} + +template +void IndexBenchmark::UpdateWorld( + benchmark::State& state, std::vector>& updates) { + size_t initial_tree_size = tree_.size(); + for (auto& update : updates) { + size_t result_erase = tree_.erase(update.old_); + auto result_emplace = tree_.emplace(update.new_, update.id_); + assert(result_erase == 1); + assert(result_emplace.second); + } + + // For normal indexes we expect num_entities==size(), but the PhTree> index has + // size() as low as (num_entities-duplicates). + if (tree_.size() > num_entities_ || tree_.size() < initial_tree_size - updates_per_round_) { + spdlog::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); + } + + state.counters["total_upd_count"] += updates_per_round_; + state.counters["update_rate"] += updates_per_round_; +} + +} // namespace + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, updates_per_round, move_distance +// PhTree3D CUBE +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +// PhTree3D CLUSTER +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100, 10.) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/common/BUILD b/phtree/common/BUILD new file mode 100644 index 00000000..e4927f35 --- /dev/null +++ b/phtree/common/BUILD @@ -0,0 +1,126 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "common", + hdrs = [ + "ph_base_types.h", + "ph_bits.h", + "ph_common.h", + "ph_distance.h", + "ph_filter.h", + "ph_flat_array_map.h", + "ph_flat_sparse_map.h", + "ph_preprocessor.h", + "ph_tree_debug_helper.h", + "ph_tree_stats.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + ], +) + +cc_test( + name = "base_types_test", + timeout = "long", + srcs = [ + "ph_base_types_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "bits_test", + timeout = "long", + srcs = [ + "ph_bits_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "common_test", + timeout = "long", + srcs = [ + "ph_common_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "distance_test", + timeout = "long", + srcs = [ + "ph_distance_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "filter_test", + timeout = "long", + srcs = [ + "ph_filter_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "flat_array_map_test", + timeout = "long", + srcs = [ + "ph_flat_array_map_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "flat_sparse_map_test", + timeout = "long", + srcs = [ + "ph_flat_sparse_map_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + +cc_test( + name = "preprocessor_test", + timeout = "long", + srcs = [ + "ph_preprocessor_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) diff --git a/phtree/common/CMakeLists.txt b/phtree/common/CMakeLists.txt new file mode 100644 index 00000000..0cd11641 --- /dev/null +++ b/phtree/common/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.14) + +target_sources(phtree + PRIVATE + ph_common.h + ph_base_types.h + ph_bits.h + ph_distance.h + ph_filter.h + ph_flat_array_map.h + ph_flat_sparse_map.h + ph_preprocessor.h + ph_tree_debug_helper.h + ph_tree_stats.h + ) diff --git a/phtree/common/README.md b/phtree/common/README.md new file mode 100644 index 00000000..258cc1be --- /dev/null +++ b/phtree/common/README.md @@ -0,0 +1,2 @@ +### Utilities used by all PH-Tree implementations + diff --git a/phtree/common/ph_base_types.h b/phtree/common/ph_base_types.h new file mode 100644 index 00000000..030711cb --- /dev/null +++ b/phtree/common/ph_base_types.h @@ -0,0 +1,171 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_BASE_TYPES_H +#define PHTREE_COMMON_BASE_TYPES_H + +#include +#include +#include +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file contains specifications for various types used in the PH-Tree, including + * PhPoint, PhPointD and PhPointBox. + */ +namespace improbable::phtree { + +// ************************************************************************ +// Constants and base types +// ************************************************************************ + +using scalar_t = int64_t; +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) +using bit_width_t = uint16_t; +// Number of bit for 'scalar_t'. Note that 'digits' does _not_ include sign bit, so e.g. int64_t has +// 63 `digits`, however we need all bits, i.e. 64. +static constexpr bit_width_t MAX_BIT_WIDTH = + std::numeric_limits::digits + std::numeric_limits::is_signed; +using node_size_t = int32_t; // Node sizes +using bit_mask_t = uint64_t; // Bit mask +static constexpr bit_mask_t MAX_MASK = std::numeric_limits::max(); +using dimension_t = size_t; // Number of dimensions +using hc_pos_t = uint64_t; + +template +static constexpr hc_pos_t END_POS = (hc_pos_t(1) << DIM); // Max hypercube address + 1 + +// double +using scalar_d_t = double; +static constexpr scalar_d_t D_INFINITY = std::numeric_limits::infinity(); +static constexpr scalar_d_t D_NEG_INFINITY = -std::numeric_limits::infinity(); + +// ************************************************************************ +// Basic structs and classes +// ************************************************************************ + +template +class PhBoxD; + +struct HashPhBoxD; + +template +using PhPoint = std::array; + +template +using PhPointD = std::array; + +template +class PhBoxD { + friend HashPhBoxD; + + public: + explicit PhBoxD() = default; + + PhBoxD(const PhBoxD& orig) = default; + + PhBoxD(const std::array& min, const std::array& max) + : min_{min}, max_{max} {} + + [[nodiscard]] PhPointD min() const { + return min_; + } + + [[nodiscard]] PhPointD max() const { + return max_; + } + + [[nodiscard]] PhPointD& min() { + return min_; + } + + [[nodiscard]] PhPointD& max() { + return max_; + } + + void min(const std::array& new_min) { + min_ = new_min; + } + + void max(const std::array& new_max) { + max_ = new_max; + } + + auto operator==(const PhBoxD& other) const -> bool { + return min_ == other.min_ && max_ == other.max_; + } + + private: + PhPointD min_; + PhPointD max_; +}; + +struct HashPhBoxD { + template + std::size_t operator()(const PhBoxD& x) const { + std::size_t hash_val = 0; + for (dimension_t i = 0; i < DIM; i++) { + hash_val = std::hash{}(x.min_[i]) ^ (hash_val * 31); + hash_val = std::hash{}(x.max_[i]) ^ (hash_val * 31); + } + return hash_val; + } +}; + +template +std::ostream& operator<<(std::ostream& os, const PhPoint& data) { + assert(DIM >= 1); + os << "["; + for (dimension_t i = 0; i < DIM - 1; i++) { + os << data[i] << ","; + } + os << data[DIM - 1] << "]"; + return os; +} + +template +std::ostream& operator<<(std::ostream& os, const PhPointD& data) { + assert(DIM >= 1); + os << "["; + for (dimension_t i = 0; i < DIM - 1; i++) { + os << data[i] << ","; + } + os << data[DIM - 1] << "]"; + return os; +} + +template +std::ostream& operator<<(std::ostream& os, const PhBoxD& data) { + assert(DIM >= 1); + os << "["; + for (dimension_t i = 0; i < DIM - 1; i++) { + os << data[i] << ","; + } + os << data[DIM - 1] << "]:["; + for (dimension_t i = 0; i < DIM - 1; i++) { + os << data[DIM + i] << ","; + } + os << data[2 * DIM - 1] << "]"; + return os; +} + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_BASE_TYPES_H diff --git a/phtree/common/ph_base_types_test.cc b/phtree/common/ph_base_types_test.cc new file mode 100644 index 00000000..048ceccb --- /dev/null +++ b/phtree/common/ph_base_types_test.cc @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_base_types.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeBaseTypesTest, PhBoxD) { + PhBoxD<3> box({1, 2, 3}, {4, 5, 6}); + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(box.min()[i], i + 1); + ASSERT_EQ(box.max()[i], i + 4); + } + + // try assigning coordinates + box.min() = {7, 8, 9}; + box.max() = {10, 11, 12}; + for (int i = 0; i < 3; i++) { + ASSERT_EQ(box.min()[i], i + 7); + ASSERT_EQ(box.max()[i], i + 10); + } +} diff --git a/phtree/common/ph_bits.h b/phtree/common/ph_bits.h new file mode 100644 index 00000000..ba264e2c --- /dev/null +++ b/phtree/common/ph_bits.h @@ -0,0 +1,166 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PH_COMMON_BITS_H +#define PHTREE_PH_COMMON_BITS_H + +#include "ph_base_types.h" +#include + +#if defined(__clang__) +#include +#elif defined(__GNUC__) +#include +#elif defined(_MSC_VER) +// https://docs.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=vs-2019 +#include +#endif + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file defines how certain bit level operations are implemented, such as: + * - count leading zeroes + * - count trailing zeros + */ +namespace improbable::phtree { + +namespace { +inline bit_width_t NumberOfLeadingZeros(std::uint64_t bit_string) { + if (bit_string == 0) { + return 64; + } + bit_width_t n = 1; + std::uint32_t x = (bit_string >> 32); + if (x == 0) { + n += 32; + x = (int)bit_string; + } + if (x >> 16 == 0) { + n += 16; + x <<= 16; + } + if (x >> 24 == 0) { + n += 8; + x <<= 8; + } + if (x >> 28 == 0) { + n += 4; + x <<= 4; + } + if (x >> 30 == 0) { + n += 2; + x <<= 2; + } + n -= x >> 31; + return n; +} + +inline bit_width_t NumberOfLeadingZeros(std::int32_t bit_string) { + if (bit_string == 0) { + return 32; + } + bit_width_t n = 1; + if (bit_string >> 16 == 0) { + n += 16; + bit_string <<= 16; + } + if (bit_string >> 24 == 0) { + n += 8; + bit_string <<= 8; + } + if (bit_string >> 28 == 0) { + n += 4; + bit_string <<= 4; + } + if (bit_string >> 30 == 0) { + n += 2; + bit_string <<= 2; + } + n -= bit_string >> 31; + return n; +} + +inline bit_width_t NumberOfTrailingZeros(std::uint64_t bit_string) { + if (bit_string == 0) { + return 64; + } + uint32_t x = 0; + uint32_t y = 0; + uint16_t n = 63; + y = (std::uint32_t)bit_string; + if (y != 0) { + n = n - 32; + x = y; + } else { + x = (std::uint32_t)(bit_string >> 32); + } + y = x << 16; + if (y != 0) { + n = n - 16; + x = y; + } + y = x << 8; + if (y != 0) { + n = n - 8; + x = y; + } + y = x << 4; + if (y != 0) { + n = n - 4; + x = y; + } + y = x << 2; + if (y != 0) { + n = n - 2; + x = y; + } + return n - ((x << 1) >> 31); +} +} // namespace + +#if defined(__clang__) +#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) +//#define CountLeadingZeros(bits) __lzcnt64(bits) +//#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) +#define CountTrailingZeros(bits) __tzcnt_u64(bits) + +#elif defined(__GNUC__) +#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) + // TODO this works only on 64 bit arch, otherwise __builtin_clzll (double 'l') +//#define CountLeadingZeros(bits) __builtin_clzl(bits) +#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) + // TODO this works only on 64 bit arch, otherwise __builtin_ctzll (double 'l') +//#define CountTrailingZeros(bits) __builtin_ctzl(bits) + +#elif defined(_MSC_VER) +// https://docs.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=vs-2019 +// static inline size_t CountLeadingZeros(std::uint64_t bits) { +// // TODO there is alo __lzcnt_u64 (AMD/INTEL) +//// TODO this is MS: -> #include +// #define CountTrailingZeros(bits) __lzcnt64(bits); +#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) +#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) +//#define CountTrailingZeros(bits) _tzcnt_u64(bits); + +#else +#define CountLeadingZeros(bits) NumberOfLeadingZeros(bits) +#define CountTrailingZeros(bits) NumberOfTrailingZeros(bits) +#endif + +} // namespace improbable::phtree + +#endif // PHTREE_PH_COMMON_BITS_H diff --git a/phtree/common/ph_bits_test.cc b/phtree/common/ph_bits_test.cc new file mode 100644 index 00000000..a451aba6 --- /dev/null +++ b/phtree/common/ph_bits_test.cc @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_bits.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeBitsTest, CountLeadingZeros) { + std::uint64_t x = 1; + x <<= 63; + for (int i = 0; i < 64; i++) { + int ctz = CountLeadingZeros(x); + ASSERT_EQ(i, ctz); + x >>= 1; + } +} + +TEST(PhTreeBitsTest, CountTrailingZeros) { + std::uint64_t x = 1; + for (int i = 0; i < 64; i++) { + int ctz = CountTrailingZeros(x); + ASSERT_EQ(i, ctz); + x <<= 1; + } +} diff --git a/phtree/common/ph_common.h b/phtree/common/ph_common.h new file mode 100644 index 00000000..1c3e6b9d --- /dev/null +++ b/phtree/common/ph_common.h @@ -0,0 +1,141 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PH_COMMON_H +#define PHTREE_PH_COMMON_H + +#include "ph_base_types.h" +#include "ph_bits.h" +#include "ph_distance.h" +#include "ph_filter.h" +#include "ph_flat_array_map.h" +#include "ph_flat_sparse_map.h" +#include "ph_preprocessor.h" +#include "ph_tree_stats.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree { + +// This is the single-point inclusion file for common types/function/... for the PH-Tree. +// 'single-point inclusion' meaning that including it provides all relevant types/functions/... . + +// ************************************************************************ +// Bits +// ************************************************************************ + +/* + * Encode the bits at the given position of all attributes into a hyper-cube address. + * Currently, the first attribute determines the left-most (high-value) bit of the address + * (left to right ordered) + * + * @param valSet vector + * @param postfix_len the postfix length + * @returns Encoded HC position, which is the index in the array if the entries would be stored in + * an array. + */ +template +static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { + // n=DIM, i={0..n-1} + // i = 0 : |0|1|0|1|0|1|0|1| + // i = 1 : | 0 | 1 | 0 | 1 | + // i = 2 : | 0 | 1 | + // len = 2^n + // Following formula was for inverse ordering of current ordering... + // pos = sum (i=1..n, len/2^i) = sum (..., 2^(n-i)) + bit_mask_t valMask = bit_mask_t(1) << postfix_len; + hc_pos_t pos = 0; + for (dimension_t i = 0; i < DIM; i++) { + pos <<= 1; + // set pos-bit if bit is set in value + pos |= (valMask & valSet[i]) >> postfix_len; + } + return pos; +} + +template +static bool IsInRange( + const PhPoint& candidate, const PhPoint& range_min, const PhPoint& range_max) { + for (dimension_t i = 0; i < DIM; i++) { + scalar_t k = candidate[i]; + if (k < range_min[i] || k > range_max[i]) { + return false; + } + } + return true; +} + +/* + * @param v1 key 1 + * @param v2 key 2 + * @return the number of diverging bits. For each dimension we determine the most significant bit + * where the two keys differ. We then count this bit plus all trailing bits (even if individual bits + * may be the same). Then we return the highest number of diverging bits found in any dimension of + * the two keys. In case of key1==key2 we return 0. In other words, for 64 bit keys, we return 64 + * minus the number of leading bits that are common in both keys across all dimensions. + */ +template +static bit_width_t NumberOfDivergingBits(const PhPoint& v1, const PhPoint& v2) { + // write all differences to diff, we just check diff afterwards + bit_mask_t diff = 0; + for (dimension_t i = 0; i < DIM; i++) { + diff |= (v1[i] ^ v2[i]); + } + return MAX_BIT_WIDTH - CountLeadingZeros(diff); +} + +template +static bool KeyEquals(const PhPoint& key_a, const PhPoint& key_b, bit_mask_t mask) { + for (dimension_t i = 0; i < DIM; i++) { + if (((key_a[i] ^ key_b[i]) & mask) != 0) { + return false; + } + } + return true; +} + +// ************************************************************************ +// String helpers +// ************************************************************************ + +static inline std::string ToBinary(scalar_t l, bit_width_t width = MAX_BIT_WIDTH) { + std::ostringstream sb; + // long mask = DEPTH < 64 ? (1<<(DEPTH-1)) : 0x8000000000000000L; + for (bit_width_t i = 0; i < width; i++) { + bit_mask_t mask = (bit_mask_t(1) << (width - i - 1)); + sb << ((l & mask) != 0 ? "1" : "0"); + if ((i + 1) % 8 == 0 && (i + 1) < width) { + sb << '.'; + } + } + return sb.str(); +} + +template +static inline std::string ToBinary(const PhPoint& la, bit_width_t width = MAX_BIT_WIDTH) { + std::ostringstream sb; + for (dimension_t i = 0; i < DIM; ++i) { + sb << ToBinary(la[i], width) << ", "; + } + return sb.str(); +} + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_H diff --git a/phtree/common/ph_common_test.cc b/phtree/common/ph_common_test.cc new file mode 100644 index 00000000..81ffb00e --- /dev/null +++ b/phtree/common/ph_common_test.cc @@ -0,0 +1,55 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_common.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeCommonTest, NumberOfDivergingBits) { + double d1 = -55; + double d2 = 7; + + scalar_t l1 = Preprocessors::ToSortableLong(d1); + scalar_t l2 = Preprocessors::ToSortableLong(d2); + scalar_t l_min = std::numeric_limits::lowest(); + scalar_t l_max = std::numeric_limits::max(); + + bit_mask_t x = NumberOfDivergingBits(PhPoint<2>({l1, l1}), PhPoint<2>({l2, l2})); + ASSERT_EQ(64, x); + x = NumberOfDivergingBits(PhPoint<2>({-1, -1}), PhPoint<2>({l_min, l_min})); + ASSERT_EQ(63, x); + x = NumberOfDivergingBits(PhPoint<2>({1, 1}), PhPoint<2>({l_max, l_max})); + ASSERT_EQ(63, x); + + x = NumberOfDivergingBits(PhPoint<2>({l1, l2}), PhPoint<2>({l1, l2})); + ASSERT_EQ(0, x); + + // PhPointD{679.186, 519.897, 519.897} + PhPoint<3> p1{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; + // PhPointD{35.5375, 8.69049, 8.69049} + PhPoint<3> p2{0x4041c4ce0e8a359e, 0x40216187a0776fd5, 0x40216187a0776fd5}; + x = NumberOfDivergingBits(p1, p2); + ASSERT_EQ(56, x); + + // PhPointD{132.406, 219.74, 219.74} + PhPoint<3> p20{0x40608cffffe5b480, 0x406b77aff096adc1, 0x406b77aff096adc1}; + // PhPointD{679.186, 519.897, 519.897} + PhPoint<3> p21{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; + x = NumberOfDivergingBits(p20, p21); + ASSERT_EQ(56, x); +} diff --git a/phtree/common/ph_distance.h b/phtree/common/ph_distance.h new file mode 100644 index 00000000..367ac0ca --- /dev/null +++ b/phtree/common/ph_distance.h @@ -0,0 +1,88 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PH_COMMON_DISTANCES_H +#define PHTREE_PH_COMMON_DISTANCES_H + +#include "ph_base_types.h" +#include "ph_bits.h" +#include "ph_flat_array_map.h" +#include "ph_flat_sparse_map.h" +#include "ph_preprocessor.h" +#include "ph_tree_stats.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree { + +/* + * The PH-Tree supports different distance functions. These can be used + * by the kNN (k nearest neighbor) query facility. + * + * The implementations in this file are: + * - PhDistanceDoubleEuclidean: Euclidean distance for PhPointD + * - PhDistanceDoubleL1: L1 distance (manhattan distance / taxi distance) for PhPointD + * - PhDistanceLongEuclidean: Euclidean distance for PhPoint + */ + +template +struct PhDistanceDoubleEuclidean { + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +template +struct PhDistanceDoubleL1 { + double operator()(const PhPointD& v1, const PhPointD& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +template +struct PhDistanceLongEuclidean { + double operator()(const PhPoint& v1, const PhPoint& v2) const { + // Substraction of large long integers can easily overflow because the distance can be + // larger than the value range. Such large values are common when using the IEEE + // double-to-long converter, however, if we use a converter we should use a distance + // function that processes converted values. + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + assert( + (v1[i] >= 0) == (v2[i] >= 0) || + double(v1[i]) - double(v2[i]) < double(std::numeric_limits::max())); + double d2 = double(v1[i] - v2[i]); + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_PH_COMMON_DISTANCES_H diff --git a/phtree/common/ph_distance_test.cc b/phtree/common/ph_distance_test.cc new file mode 100644 index 00000000..a0313495 --- /dev/null +++ b/phtree/common/ph_distance_test.cc @@ -0,0 +1,36 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_common.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeDistanceTest, DoubleEuclidean) { + auto distance = PhDistanceDoubleEuclidean<2>(); + ASSERT_DOUBLE_EQ(5, distance({-1, -1}, {2, 3})); +} + +TEST(PhTreeDistanceTest, DoubleL1) { + auto distance = PhDistanceDoubleL1<2>(); + ASSERT_DOUBLE_EQ(7, distance({-1, -1}, {2, 3})); +} + +TEST(PhTreeDistanceTest, LongEuclidean) { + auto distance = PhDistanceLongEuclidean<2>(); + ASSERT_DOUBLE_EQ(5, distance({-1, -1}, {2, 3})); +} diff --git a/phtree/common/ph_filter.h b/phtree/common/ph_filter.h new file mode 100644 index 00000000..b74805b2 --- /dev/null +++ b/phtree/common/ph_filter.h @@ -0,0 +1,135 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PH_COMMON_FILTERS_H +#define PHTREE_PH_COMMON_FILTERS_H + +#include "ph_base_types.h" +#include "ph_bits.h" +#include "ph_flat_array_map.h" +#include "ph_flat_sparse_map.h" +#include "ph_preprocessor.h" +#include "ph_tree_stats.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree { + +/* + * Any iterator that has a filter defined will traverse nodes or return values if and only if the + * filter function returns 'true'. The filter functions are called for every node and every entry + * (note: internally, nodes are also stored in entries, but these entries will be passed to the + * filter for nodes) that the iterator encounters. By implication, it will never call the filter + * function for nodes of entries if their respective parent node has already been rejected. + * + * There are separate filter functions for nodes and for key/value entries. + * + * Every filter needs to provide two functions: + * - bool IsEntryValid(const PhPoint& key, const T& value); + * This function is called for every key/value pair that the query encounters. The function + * should return 'true' iff the key/value should be added to the query result. + * The parameters are the key and value of the key/value pair. + * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); + * This function is called for every node that the query encounters. The function should + * return 'true' if the node should be traversed and searched for potential results. + * The parameters are the prefix of the node and the number of least significant bits of the + * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can + * have any value. + */ + +/* + * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. + */ +template +struct PhFilterNoOp { + /* + * @param key The key/coordinate of the entry. + * @param value The value of the entry. + * @returns This default implementation always returns `true`. + */ + constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return true; + } + + /* + * @param prefix The prefix of node. Any coordinate in the nodes shares this prefix. + * @param bits_to_ignore The number of bits of the prefix that should be ignored because they + * are NOT the same for all coordinates in the node. For example, assuming 64bit values, if the + * node represents coordinates that all share the first 10 bits of the prefix, then the value of + * bits_to_ignore is 64-10=54. + * @returns This default implementation always returns `true`. + */ + constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +/* + * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). + * The result is equivalent to that of the 'begin_query(...)' function. + */ +template +class PhFilterAABB { + public: + PhFilterAABB(const PhPoint& minInclude, const PhPoint& maxInclude) + : minIncludeBits{minInclude}, maxIncludeBits{maxInclude} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const PhPoint& minExclude, const PhPoint& maxExclude) { + minIncludeBits = minExclude; + maxIncludeBits = maxExclude; + } + + [[nodiscard]] bool IsEntryValid(const PhPoint& key, const T& value) const { + for (int i = 0; i < DIM; ++i) { + if (key[i] < minIncludeBits[i] || key[i] > maxIncludeBits[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + bit_mask_t maskMin = MAX_MASK << bits_to_ignore; + bit_mask_t maskMax = ~maskMin; + + for (size_t i = 0; i < prefix.size(); ++i) { + scalar_t minBits = prefix[i] & maskMin; + scalar_t maxBits = prefix[i] | maskMax; + if (maxBits < minIncludeBits[i] || minBits > maxIncludeBits[i]) { + return false; + } + } + return true; + } + + private: + const PhPoint minIncludeBits; + const PhPoint maxIncludeBits; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_PH_COMMON_FILTERS_H diff --git a/phtree/common/ph_filter_test.cc b/phtree/common/ph_filter_test.cc new file mode 100644 index 00000000..c77a4c62 --- /dev/null +++ b/phtree/common/ph_filter_test.cc @@ -0,0 +1,42 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_common.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeFilterTest, BoxFilterTest) { + auto filter = PhFilterAABB<2, int*>({3, 3}, {7, 7}); + // root is always valid + ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); + // valid because node encompasses the AABB + ASSERT_TRUE(filter.IsNodeValid({1, 1}, 10)); + // valid + ASSERT_TRUE(filter.IsNodeValid({7, 7}, 1)); + // invalid + ASSERT_FALSE(filter.IsNodeValid({88, 5}, 1)); + + ASSERT_TRUE(filter.IsEntryValid({3, 7}, nullptr)); + ASSERT_FALSE(filter.IsEntryValid({2, 8}, nullptr)); +} + +TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { + auto filter = PhFilterNoOp<3, int>(); + ASSERT_TRUE(filter.IsNodeValid({3, 7, 2}, 10)); + ASSERT_TRUE(filter.IsEntryValid({3, 7, 2}, 10)); +} \ No newline at end of file diff --git a/phtree/common/ph_flat_array_map.h b/phtree/common/ph_flat_array_map.h new file mode 100644 index 00000000..27633b1f --- /dev/null +++ b/phtree/common/ph_flat_array_map.h @@ -0,0 +1,212 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_FLAT_ARRAY_MAP_H +#define PHTREE_COMMON_FLAT_ARRAY_MAP_H + +#include "ph_bits.h" +#include +#include + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file contains the array_map implementation, which is used in low-dimensional nodes in the + * PH-Tree. + */ +namespace improbable::phtree { + +namespace { +template +class PhFlatMapIterator; + +template +using PhFlatMapPair = std::pair; + +using bit_string_t = std::uint64_t; +constexpr bit_string_t U64_ONE = bit_string_t(1); +} // namespace + +/* + * The array_map is a flat map implementation that uses an array of SIZE=2^DIM. The key is + * effectively the position in the array. + * + * It has O(1) insertion/removal time complexity, but O(2^DIM) space complexity, so it is best used + * when DIM is low and/or the map is known to have a high fill ratio. + */ +template +class array_map { + friend PhFlatMapIterator; + static_assert(SIZE <= 64); // or else we need to adapt 'occupancy' + static_assert(SIZE > 0); + + public: + explicit array_map() : occupancy{0}, size_{0} {}; + + [[nodiscard]] auto find(size_t index) const { + return occupied(index) ? PhFlatMapIterator{index, *this} : end(); + } + + [[nodiscard]] auto lower_bound(size_t index) const { + size_t index2 = lower_bound_index(index); + if (index2 < SIZE) { + return PhFlatMapIterator{index2, *this}; + } + return end(); + } + + [[nodiscard]] auto begin() const { + size_t index = CountTrailingZeros(occupancy); + // Assert index points to a valid position or outside the map if the map is empty + assert((size_ == 0 && index >= SIZE) || occupied(index)); + return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; + } + + [[nodiscard]] auto cbegin() const { + size_t index = CountTrailingZeros(occupancy); + // Assert index points to a valid position or outside the map if the map is empty + assert((size_ == 0 && index >= SIZE) || occupied(index)); + return PhFlatMapIterator{index < SIZE ? index : SIZE, *this}; + } + + [[nodiscard]] auto end() const { + return PhFlatMapIterator{SIZE, *this}; + } + + auto emplace(size_t index, T&& value) { + return emplace_base(index, std::forward(value)); + } + + template + auto emplace(_Args&&... __args) { + return emplace_base(std::forward<_Args>(__args)...); + } + + bool erase(size_t index) { + if (occupied(index)) { + occupied(index, false); + --size_; + data_[index].second.~T(); + return true; + } + return false; + } + + bool erase(PhFlatMapIterator& iterator) { + size_t index = iterator.first; + if (occupied(index)) { + occupied(index, false); + --size_; + data_[index].second.~T(); + return true; + } + return false; + } + + [[nodiscard]] size_t size() const { + return size_; + } + + private: + std::pair*, bool> emplace_base(size_t index, T&& value) { + if (!occupied(index)) { + data_[index].first = index; + data_[index].second = std::forward(value); + ++size_; + occupied(index, true); + return {&data_[index], true}; + } + return {&data_[index], false}; + } + + [[nodiscard]] size_t lower_bound_index(size_t index) const { + assert(index < SIZE); + size_t num_zeros = CountTrailingZeros(occupancy >> index); + // num_zeros may be equal to SIZE if no bits remain + return std::min(SIZE, index + num_zeros); + } + + void occupied(size_t index, bool flag) { + assert(index < SIZE); + assert(occupied(index) != flag); + // flip the bit + occupancy ^= (U64_ONE << index); + assert(occupied(index) == flag); + } + + [[nodiscard]] bool occupied(size_t index) const { + return (occupancy >> index) & U64_ONE; + } + + bit_string_t occupancy; + std::uint32_t size_; + PhFlatMapPair data_[SIZE]; +}; + +namespace { +template +class PhFlatMapIterator { + friend array_map; + + public: + PhFlatMapIterator() : first{0}, map_{nullptr} {}; + + explicit PhFlatMapIterator(size_t index, const array_map& map) + : first{index}, map_{&map} { + assert(index >= 0); + assert(index <= SIZE); + } + + auto& operator*() const { + assert(first < SIZE && map_->occupied(first)); + return const_cast&>(map_->data_[first]); + } + + auto* operator-> () const { + assert(first < SIZE && map_->occupied(first)); + return const_cast*>(&map_->data_[first]); + } + + auto& operator++() { + first = (first + 1) >= SIZE ? SIZE : first = map_->lower_bound_index(first + 1); + return *this; + } + + auto operator++(int) { + PhFlatMapIterator iterator(first, *map_); + ++(*this); + return iterator; + } + + friend bool operator==( + const PhFlatMapIterator& left, const PhFlatMapIterator& right) { + return left.first == right.first; + } + + friend bool operator!=( + const PhFlatMapIterator& left, const PhFlatMapIterator& right) { + return !(left == right); + } + + private: + size_t first; + const array_map* map_; +}; + +} // namespace +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_FLAT_ARRAY_MAP_H diff --git a/phtree/common/ph_flat_array_map_test.cc b/phtree/common/ph_flat_array_map_test.cc new file mode 100644 index 00000000..3b230fac --- /dev/null +++ b/phtree/common/ph_flat_array_map_test.cc @@ -0,0 +1,97 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_flat_array_map.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeFlatArrayMapTest, SmokeTest) { + const int max_size = 8; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + array_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + // TODO avoid move() + test_map.emplace(val, std::move(val)); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeFlatArrayMapTest, IteratorPostIncrementTest) { + const int num_entries = 3; + + array_map test_map; + for (int j = 0; j < num_entries; j++) { + size_t val = j * 2; + bool hasVal = test_map.find(val) != test_map.end(); + if (!hasVal) { + // TODO avoid move() + test_map.emplace(val, std::move(val)); + } + } + + // test post increment + auto it_post = test_map.begin(); + int n_post = 0; + while (it_post != test_map.end()) { + size_t v = (*it_post).first; + auto it_find = test_map.find(v); + ASSERT_EQ(it_post, it_find); + // post increment + auto it2 = it_post++; + ASSERT_NE(it2, it_post); + ++n_post; + } + ASSERT_EQ(num_entries, n_post); + + // test pre increment + auto it_pre = test_map.begin(); + int n_pre = 0; + while (it_pre != test_map.end()) { + size_t v = (*it_pre).first; + auto it_find = test_map.find(v); + ASSERT_EQ(it_pre, it_find); + auto it2 = ++it_pre; + ASSERT_EQ(it2, it_pre); + ++n_pre; + } + ASSERT_EQ(num_entries, n_pre); +} diff --git a/phtree/common/ph_flat_sparse_map.h b/phtree/common/ph_flat_sparse_map.h new file mode 100644 index 00000000..25a90484 --- /dev/null +++ b/phtree/common/ph_flat_sparse_map.h @@ -0,0 +1,140 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_FLAT_SPARSE_MAP_H +#define PHTREE_COMMON_FLAT_SPARSE_MAP_H + +#include "ph_bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file contains the sparse_map implementation, which is used in medium-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +namespace { +template +using PhFlatMapPair = std::pair; + +using index_t = std::int32_t; +} // namespace + +/* + * The sparse_map is a flat map implementation that uses an array of *at* *most* SIZE=2^DIM. + * The array contains a list sorted by key. + * + * It has O(log n) lookup and O(n) insertion/removal time complexity, space complexity is O(n). + */ +template +class sparse_map { + public: + explicit sparse_map() : data_{} {}; + + [[nodiscard]] auto find(size_t key) { + auto it = lower_bound(key); + if (it != data_.end() && it->first == key) { + return it; + } + return data_.end(); + } + + [[nodiscard]] auto find(size_t key) const { + auto it = lower_bound(key); + if (it != data_.end() && it->first == key) { + return it; + } + return data_.end(); + } + + [[nodiscard]] auto lower_bound(size_t key) { + return std::lower_bound( + data_.begin(), data_.end(), key, [](PhFlatMapPair& left, const size_t key) { + return left.first < key; + }); + } + + [[nodiscard]] auto lower_bound(size_t key) const { + return std::lower_bound( + data_.cbegin(), data_.cend(), key, [](const PhFlatMapPair& left, const size_t key) { + return left.first < key; + }); + } + + [[nodiscard]] auto begin() { + return data_.begin(); + } + + [[nodiscard]] auto begin() const { + return cbegin(); + } + + [[nodiscard]] auto cbegin() const { + return data_.cbegin(); + } + + [[nodiscard]] auto end() { + return data_.end(); + } + + [[nodiscard]] auto end() const { + return data_.end(); + } + + auto emplace(size_t index, T&& value) { + return emplace_base(index, std::forward(value)); + } + + template + auto emplace(_Args&&... __args) { + return emplace_base(std::forward<_Args>(__args)...); + } + + void erase(size_t key) { + auto it = lower_bound(key); + if (it != end() && it->first == key) { + data_.erase(it); + } + } + + void erase(const typename std::vector>::iterator& iterator) { + data_.erase(iterator); + } + + [[nodiscard]] size_t size() const { + return data_.size(); + } + + private: + auto emplace_base(size_t key, T&& value) { + auto it = lower_bound(key); + if (it != end() && it->first == key) { + return std::make_pair(it, false); + } else { + return std::make_pair(data_.insert(it, {key, std::forward(value)}), true); + } + } + + std::vector> data_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_FLAT_SPARSE_MAP_H diff --git a/phtree/common/ph_flat_sparse_map_test.cc b/phtree/common/ph_flat_sparse_map_test.cc new file mode 100644 index 00000000..58c48b49 --- /dev/null +++ b/phtree/common/ph_flat_sparse_map_test.cc @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_flat_sparse_map.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeFlatSparseMapTest, SmokeTest) { + const int max_size = 8; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + sparse_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + // TODO avoid move() + test_map.emplace(val, std::move(val)); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} diff --git a/phtree/common/ph_preprocessor.h b/phtree/common/ph_preprocessor.h new file mode 100644 index 00000000..3c558a55 --- /dev/null +++ b/phtree/common/ph_preprocessor.h @@ -0,0 +1,115 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_PH_PREPROCESSOR_H +#define PHTREE_COMMON_PH_PREPROCESSOR_H + +#include "ph_base_types.h" +#include +#include + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file contains conversion/tranmsformation functions for converting user coordinates and + * shapes, such as PhPointD and PhBoxD, into PH-Tree native coordinates (PhPoint). + */ +namespace improbable::phtree { + +template +using PhPreprocessor = PhPoint (*)(const KEY& point); + +template +using PhPostprocessor = KEY (*)(const PhPoint& point); + +template +using PhPreprocessorD = PhPreprocessor>; + +template +using PhPostprocessorD = PhPostprocessor>; + +template +using PhPreprocessorBoxD = PhPoint<2 * DIM> (*)(const PhBoxD& point); + +template +using PhPostprocessorBoxD = PhBoxD (*)(const PhPoint<2 * DIM>& point); + +class Preprocessors { + public: + static std::int64_t ToSortableLong(double value) { + // To create a sortable long, we convert the double to a long using the IEEE-754 standard, + // which stores floats in the form . + // This result is properly ordered longs for all positive doubles. Negative values have + // inverse ordering. For negative doubles, we therefore simply invert them to make them + // sortable, however the sign must be inverted again to stay negative. + std::int64_t r = reinterpret_cast(value); + return r >= 0 ? r : r ^ 0x7FFFFFFFFFFFFFFFL; + } + + static double ToDouble(scalar_t value) { + auto v = value >= 0.0 ? value : value ^ 0x7FFFFFFFFFFFFFFFL; + return reinterpret_cast(v); + } +}; + +// These are the IEEE and no-op conversion functions for KEY/PRE/POST + +template +PhPoint PrePostNoOp(const PhPoint& in) { + return in; +} + +template +PhPoint PreprocessIEEE(const PhPointD& point) { + PhPoint out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = Preprocessors::ToSortableLong(point[i]); + } + return out; +} + +template +PhPointD PostprocessIEEE(const PhPoint& in) { + PhPointD out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = Preprocessors::ToDouble(in[i]); + } + return out; +} + +template +PhPoint<2 * DIM> PreprocessBoxIEEE(const PhBoxD& box) { + PhPoint<2 * DIM> out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = Preprocessors::ToSortableLong(box.min()[i]); + out[i + DIM] = Preprocessors::ToSortableLong(box.max()[i]); + } + return out; +} + +template +PhBoxD PostprocessBoxIEEE(const PhPoint<2 * DIM>& in) { + PhBoxD out; + for (dimension_t i = 0; i < DIM; ++i) { + out.min()[i] = Preprocessors::ToDouble(in[i]); + out.max()[i] = Preprocessors::ToDouble(in[i + DIM]); + } + return out; +} + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_PH_PREPROCESSOR_H diff --git a/phtree/common/ph_preprocessor_test.cc b/phtree/common/ph_preprocessor_test.cc new file mode 100644 index 00000000..937e5fcf --- /dev/null +++ b/phtree/common/ph_preprocessor_test.cc @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ph_preprocessor.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreePreprocessorTest, IEEE_SmokeTest) { + double d1 = -55; + double d2 = 7; + + scalar_t l1 = Preprocessors::ToSortableLong(d1); + scalar_t l2 = Preprocessors::ToSortableLong(d2); + + ASSERT_GT(l2, l1); + + ASSERT_EQ(d1, Preprocessors::ToDouble(l1)); + ASSERT_EQ(d2, Preprocessors::ToDouble(l2)); +} diff --git a/phtree/common/ph_tree_debug_helper.h b/phtree/common/ph_tree_debug_helper.h new file mode 100644 index 00000000..73cd6f6a --- /dev/null +++ b/phtree/common/ph_tree_debug_helper.h @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H +#define PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H + +#include "ph_tree_stats.h" + +namespace improbable::phtree { + +class PhTreeDebugHelper { + public: + enum class PrintDetail { name, entries, tree }; + + class DebugHelper { + virtual void CheckConsistency() const = 0; + + [[nodiscard]] virtual PhTreeStats GetStats() const = 0; + + [[nodiscard]] virtual std::string ToString(const PrintDetail& detail) const = 0; + }; + + /* + * Checks the consistency of the tree. This function requires assertions to be enabled. + */ + template + static void CheckConsistency(const TREE& tree) { + tree.GetInternalTree().GetDebugHelper().CheckConsistency(); + } + + /* + * Collects some statistics about the tree, such as number of nodes, average depth, ... + * + * @return some statistics about the tree. + */ + template + static PhTreeStats GetStats(const TREE& tree) { + return tree.GetInternalTree().GetDebugHelper().GetStats(); + } + + /* + * Depending on the detail parameter this returns: + * - "name" : a string that identifies the tree implementation type. + * - "entries" : a string that lists all elements in the tree. + * - "tree" : a string that lists all elements in the tree, pretty formatted to indicate tree + * structure. + * + * @return a string as described above. + */ + template + static std::string ToString(const TREE& tree, const PrintDetail& detail) { + return tree.GetInternalTree().GetDebugHelper().ToString(detail); + } +}; + +} // namespace improbable::phtree +#endif // PHTREE_COMMON_PH_TREE_DEBUG_HELPER_H diff --git a/phtree/common/ph_tree_stats.h b/phtree/common/ph_tree_stats.h new file mode 100644 index 00000000..2319aae0 --- /dev/null +++ b/phtree/common/ph_tree_stats.h @@ -0,0 +1,106 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_PH_TREE_STATS_H +#define PHTREE_COMMON_PH_TREE_STATS_H + +#include "ph_base_types.h" +#include +#include + +/* + * PLEASE do not include this file directly, it is included via ph_common.h. + * + * This file defines the type returned by the getStats() method of the PH-Tree. + * They provide various statistics on the PH-Tree instance that returns them. + */ +namespace improbable::phtree { + +class PhTreeStats { + public: + std::string ToString() { + std::ostringstream s; + s << " nNodes = " << std::to_string(n_nodes_) << std::endl; + s << " avgNodeDepth = " << ((double)q_total_depth_ / (double)n_nodes_) << std::endl; + s << " AHC=" << n_AHC_ << " NI=" << n_nt_ << " nNtNodes_=" << n_nt_nodes_ << std::endl; + double apl = GetAvgPostlen(); + s << " avgPostLen = " << apl << " (" << (MAX_BIT_WIDTH - apl) << ")" << std::endl; + return s.str(); + } + + std::string ToStringHist() { + std::ostringstream s; + s << " infix_len = "; + to_string(s, infix_hist_) << std::endl; + s << " nodeSizeLog = "; + to_string(s, node_size_log_hist_) << std::endl; + s << " node_depth_hist_ = "; + to_string(s, node_depth_hist_) << std::endl; + s << " depthHist = "; + to_string(s, q_n_post_fix_n_) << std::endl; + return s.str(); + } + + /* + * + * @param r String builder + * @return average postfix_len, including the HC/LHC bit. + */ + double GetAvgPostlen() { + size_t total = 0; + size_t num_entry = 0; + for (bit_width_t i = 0; i < MAX_BIT_WIDTH; i++) { + total += (MAX_BIT_WIDTH - i) * q_n_post_fix_n_[i]; + num_entry += q_n_post_fix_n_[i]; + } + return (double)total / (double)num_entry; + } + + size_t GetNodeCount() { + return n_nodes_; + } + + size_t GetCalculatedMemSize() { + return size_; + } + + private: + static std::ostringstream& to_string(std::ostringstream& s, std::vector& data) { + s << "["; + for (size_t x : data) { + s << x << ","; + } + s << "]"; + return s; + } + + public: + size_t n_nodes_ = 0; + size_t n_AHC_ = 0; // AHC nodes (formerly Nodes with AHC-postfix representation) + size_t n_nt_nodes_ = 0; // NtNodes (formerly Nodes with sub-HC representation) + size_t n_nt_ = 0; // nodes with NT representation + size_t n_total_children_ = 0; + size_t size_ = 0; // calculated size in bytes + size_t q_total_depth_ = 0; + std::vector q_n_post_fix_n_ = + std::vector(MAX_BIT_WIDTH, (size_t)0); // filled with x[current_depth] = nPost; + std::vector infix_hist_ = std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len + std::vector node_depth_hist_ = std::vector(MAX_BIT_WIDTH, (size_t)0); // prefix len + std::vector node_size_log_hist_ = std::vector(32, (size_t)0); // log (num_entries) +}; + +} // namespace improbable::phtree +#endif // PHTREE_COMMON_PH_TREE_STATS_H diff --git a/phtree/phtree.h b/phtree/phtree.h new file mode 100644 index 00000000..8fb08475 --- /dev/null +++ b/phtree/phtree.h @@ -0,0 +1,196 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PHTREE_H +#define PHTREE_PHTREE_H + +#include "common/ph_common.h" +#include "v16/phtree_v16.h" + +namespace improbable::phtree { + +/* + * PH-Tree main class. + * This class is a wrapper which can implement different implementations of the PH-Tree. + * This class support only `PhPoint` coordinates with `int64_t` scalars. + * + * For more information please refer to the README of this project. + */ +template < + dimension_t DIM, + typename T, + typename KEY = PhPoint, + PhPostprocessor POST = PrePostNoOp> +class PhTree { + friend PhTreeDebugHelper; + + public: + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param __args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair emplace(const PhPoint key, _Args&&... __args) { + return tree_.emplace(key, std::forward<_Args>(__args)...); + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const PhPoint& key, const T& value) { + return tree_.insert(key, value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const PhPoint& key) { + return tree_[key]; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const PhPoint& key) const { + return tree_.count(key); + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + auto find(const PhPoint& key) const { + return tree_.find(key); + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const PhPoint& key) { + return tree_.erase(key); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'PhFilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template > + auto begin(FILTER filter = FILTER()) const { + return tree_.begin(filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param min Minimum values + * @param max Maximum values + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'PhFilterNoOp`. + * @return Result iterator. + */ + template > + auto begin_query( + const PhPoint& min, const PhPoint& max, FILTER filter = FILTER()) const { + return tree_.begin_query(min, max, filter); + } + + /* + * Locate nearest neighbors for a given point in space. + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template < + typename DISTANCE = PhDistanceLongEuclidean, + typename FILTER = PhFilterNoOp> + auto begin_knn_query( + size_t min_results, + const PhPoint& center, + DISTANCE distance_function = DISTANCE(), + FILTER filter = FILTER()) const { + return tree_.begin_knn_query(min_results, center, distance_function, filter); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + const auto& end() const { + return tree_.end(); + } + + /* + * Remove all entries from the tree. + */ + void clear() { + tree_.clear(); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return tree_.size(); + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return tree_.empty(); + } + + private: + // This is used by PhTreeDebugHelper + const auto& GetInternalTree() const { + return tree_; + } + + v16::PhTreeV16 tree_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_PHTREE_H diff --git a/phtree/phtree_box_d.h b/phtree/phtree_box_d.h new file mode 100644 index 00000000..ad837891 --- /dev/null +++ b/phtree/phtree_box_d.h @@ -0,0 +1,220 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PHTREE_BOX_D_H +#define PHTREE_PHTREE_BOX_D_H + +#include "common/ph_common.h" +#include "v16/phtree_v16.h" + +namespace improbable::phtree { + +/* + * Floating-point `double` Box version of the PH-Tree. + * This wrapper accepts axis aligned boxes as key. The boxes are defined by their minimum and + * maximum coordinates in each dimension. + * + * Encoding boxes as points + * ======================== + * The native PH-Tree can only handle points, not boxes. This PhTreeBoxD class solves this by + * encoding the boxes into points by concatenating the minimum and maximum coordinates (with DIM + * dimensions) of each box to a single point with 2*DIM dimensions. For example, a 2D box + * (1,3)/(9,8) becomes (1,3,9,8). + * + * Querying boxes + * ============== + * Executing window queries on these encoded boxes requires some transformation of the query + * constraints. + * + * The transformation has two steps: one steps is to transform the requested query min_req/max_req + * points into useful internal 4D min_int/max_int points, the other step is to transform floating + * point coordinates into integer coordinates. The second step is equivalent to the transformation + * in normal floating-point point trees, so it is not discussed further here. Also note that the two + * steps can be swapped. + * + * The default window query works as 'intersection' query, i.e. it returns all boxes that intersect + * or lie completely inside the query window. The solution is to fill the lower half of the internal + * min_int point with -infinity and the upper half with the requested min_req coordinate. For the + * internal max_int point we fill the lower half with the requested max_req value and the upper half + * with +infinity. + * + * For example, since the internal tree is 4D, a 2D window query with min_req=(2,4)/max_req=(12,10) + * is transformed to min_int=(-infinity,-infinity,2,4) / max_int=(12,10,+infinity,+infinity). The + * internal query of the PH-Tree simply returns any 4D point (= encoded box) that is strictly larger + * than min_int and strictly smaller than max_int. The result is that it returns all boxes that + * somehow intersect with, or lie inside of, the requested query window. + * + * For more information please refer to the README of this project. + */ +template < + dimension_t DIM, + typename T, + typename KEY = PhBoxD, + PhPreprocessorBoxD PRE = PreprocessBoxIEEE, + PhPostprocessorBoxD POST = PostprocessBoxIEEE, + PhPreprocessorD<2 * DIM> PRE_QUERY = PreprocessIEEE<2 * DIM>> +class PhTreeBoxD { + friend PhTreeDebugHelper; + static const dimension_t TREE_DIM = 2 * DIM; + + public: + PhTreeBoxD() : tree_() {} + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param __args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair emplace(const PhBoxD& key, _Args&&... __args) { + return tree_.emplace(PRE(key), std::forward<_Args>(__args)...); + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const PhBoxD& key, const T& value) { + return tree_.insert(PRE(key), value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const PhBoxD& key) { + return tree_[PRE(key)]; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const PhBoxD& key) const { + return tree_.count(PRE(key)); + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + auto find(const PhBoxD& key) const { + return tree_.find(PRE(key)); + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const PhBoxD& key) { + return tree_.erase(PRE(key)); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'PhFilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template > + auto begin(FILTER filter = FILTER()) const { + return tree_.begin(filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param min Minimum values + * @param max Maximum values + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'PhFilterNoOp`. + * @return Result iterator. + */ + template > + auto begin_query( + const PhPointD& min, const PhPointD& max, FILTER filter = FILTER()) const { + PhPointD min_2_DIM; + PhPointD max_2_DIM; + for (dimension_t i = 0; i < DIM; i++) { + min_2_DIM[i] = D_NEG_INFINITY; + max_2_DIM[i] = max[i]; + } + for (dimension_t i = DIM; i < 2 * DIM; i++) { + min_2_DIM[i] = min[i - DIM]; + max_2_DIM[i] = D_INFINITY; + } + return tree_.begin_query(PRE_QUERY(min_2_DIM), PRE_QUERY(max_2_DIM), filter); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + const auto& end() const { + return tree_.end(); + } + + /* + * Remove all entries from the tree. + */ + void clear() { + tree_.clear(); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return tree_.size(); + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return tree_.empty(); + } + + private: + // This is used by PhTreeDebugHelper + const auto& GetInternalTree() const { + return tree_; + } + + v16::PhTreeV16 tree_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_PHTREE_BOX_D_H diff --git a/phtree/phtree_box_d_test.cc b/phtree/phtree_box_d_test.cc new file mode 100644 index 00000000..8d5c4bec --- /dev/null +++ b/phtree/phtree_box_d_test.cc @@ -0,0 +1,650 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_box_d.h" +#include +#include +#include + +using namespace improbable::phtree; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const PhPointD& p1, const PhPointD& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +void generateCube(std::vector>& points, size_t N, double boxLen = 10) { + DoubleRng rng(-1000, 1000); + auto refTree = std::unordered_map, size_t, HashPhBoxD>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto min = PhPointD({rng.next(), rng.next(), rng.next()}); + auto max = PhPointD({min[0] + boxLen, min[1] + boxLen, min[2] + boxLen}); + auto box = PhBoxD(min, max); + if (refTree.count(box) != 0) { + i--; + continue; + } + + refTree.emplace(box, i); + points.push_back(box); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +using TestPoint = PhBoxD; + +template +using TestTree = PhTreeBoxD< + DIM, + T, + TestPoint, + PreprocessBoxIEEE, + PostprocessBoxIEEE, + PreprocessIEEE<2 * DIM>>; + +TEST(PhTreeBoxDTest, SmokeTestBasicOps) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get multiple results. + int found = 0; + for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + ASSERT_LE(10, PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::name).length()); + ASSERT_LE( + N * 10, + PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::entries).length()); + ASSERT_LE( + N * 10, PhTreeDebugHelper::ToString(tree, PhTreeDebugHelper::PrintDetail::tree).length()); + ASSERT_EQ(N, PhTreeDebugHelper::GetStats(tree).size_); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeBoxDTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get multiple results. + int found = 0; + for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeBoxDTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first._i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first._i); + tree.emplace(p, id2).first = id; + ASSERT_EQ(i, tree.emplace(p, id).first._i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get multiple results. + int found = 0; + for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeBoxDTest, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]._i = i; + } else { + tree[p] = id; + } + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get multiple results. + int found = 0; + for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + } + + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i, tree[p]._i); + } +} + +template +void populate( + PhTreeBoxD& tree, std::vector>& points, size_t N, double boxLen = 10) { + generateCube(points, N, boxLen); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate( + TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { + generateCube(points, N, boxLen); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeBoxDTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeBoxDTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + PhBoxD p({1, 1, 1}, {2, 2, 10000000}); + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); +} + +TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + PhBoxD pNew( + {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, + {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeBoxDTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); +} + +TEST(PhTreeBoxDTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + PhPointD& min, + PhPointD& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + auto pMin = p.min(); + auto pMax = p.max(); + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= pMax[d] >= min[d] && pMin[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +template +int testQuery(PhPointD& min, PhPointD& max, int N) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + assert(x._i >= 0); + assert(referenceResult.count(x._i) == 1); + n++; + } + assert(referenceResult.size() == n); + return n; +} + +TEST(PhTreeBoxDTest, TestWindowQuery0) { + const dimension_t dim = 3; + PhPointD p{-10000, -10000, -10000}; + ASSERT_EQ(0, testQuery(p, p, 10000)); +} + +TEST(PhTreeBoxDTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + PhBoxD& p = points.at(i); + // With intersection queries we may get multiple results. + int found = 0; + for (auto q = tree.begin_query(p.min(), p.max()); q != tree.end(); ++q) { + found += (i == (*q)._i); + } + ASSERT_EQ(1, found); + n++; + } + ASSERT_TRUE(N == n); +} + +TEST(PhTreeBoxDTest, TestWindowQueryMany) { + const dimension_t dim = 3; + PhPointD min{-100, -100, -100}; + PhPointD max{100, 100, 100}; + int n = testQuery(min, max, 10000); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeBoxDTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + PhPointD min{-10000, -10000, -10000}; + PhPointD max{10000, 10000, 10000}; + ASSERT_EQ(N, testQuery(min, max, N)); +} + +TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + PhPointD min{i * 10., i * 9., i * 11.}; + PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(3, 500); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeBoxDTest, TestWindowQueryManyMovingPoint) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N, 100); + + size_t nTotal = 0; + for (scalar_t i = -120; i < 120; i++) { + PhPointD min_max{i * 10., i * 9., i * 11.}; + std::set referenceResult; + referenceQuery(points, min_max, min_max, referenceResult); + + int n = 0; + for (auto it = tree.begin_query(min_max, min_max); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + } + ASSERT_EQ(referenceResult.size(), n); + nTotal += n; + + // basic check to ensure healthy queries + ASSERT_GE(N / 10, n); + } + ASSERT_LE(10, nTotal); +} + +TEST(PhTreeBoxDTest, SmokeTestPointAPI) { + PhBoxD<3> p({1, 2, 3}, {4, 5, 6}); + (void)p; +} + +TEST(PhTreeBoxDTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeBoxD<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhBoxD<3>({1, 2, 3}, {4, 5, 6}), idPtr); + treePtr.clear(); + delete idPtr; + + std::map mapConst; + PhTreeBoxD<3, const Id> treeConst; + treeConst.emplace(PhBoxD<3>({1, 2, 3}, {4, 5, 6}), Id(1)); +} diff --git a/phtree/phtree_d.h b/phtree/phtree_d.h new file mode 100644 index 00000000..36bc4d99 --- /dev/null +++ b/phtree/phtree_d.h @@ -0,0 +1,204 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_PHTREE_D_H +#define PHTREE_PHTREE_D_H + +#include "common/ph_common.h" +#include "v16/phtree_v16.h" + +namespace improbable::phtree { + +/* + * Floating-point `double` version of the PH-Tree. + * This version of the tree accepts multi-dimensional keys with floatring point (`double`) + * coordinates. + * + * The default implementation uses a direct lossless (in terms of numeric precision) mapping from + * 64bit double to 64bit long integer. The mapping is defined in the Preprocessor/PostProcessor + * functions. Other, lossy, mapping have been shown to provide somewhat better performance (due to + * better tree structure), but this default mapping has been chosen because it is lossless. + * + * For more information please refer to the README of this project. + */ +template < + dimension_t DIM, + typename T, + typename KEY = PhPointD, + PhPreprocessor PRE = PreprocessIEEE, + PhPostprocessor POST = PostprocessIEEE> +class PhTreeD { + friend PhTreeDebugHelper; + + public: + PhTreeD() : tree_{} {} + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param __args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair emplace(const PhPointD& key, _Args&&... __args) { + return tree_.emplace(PRE(key), std::forward<_Args>(__args)...); + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const PhPointD& key, const T& value) { + return tree_.insert(PRE(key), value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const PhPointD& key) { + return tree_[PRE(key)]; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const PhPointD& key) const { + return tree_.count(PRE(key)); + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + auto find(const PhPointD& key) const { + return tree_.find(PRE(key)); + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const PhPointD& key) { + return tree_.erase(PRE(key)); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'PhFilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template > + auto begin(FILTER filter = FILTER()) const { + return tree_.begin(filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param min Minimum values + * @param max Maximum values + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'PhFilterNoOp`. + * @return Result iterator. + */ + template > + auto begin_query( + const PhPointD& min, const PhPointD& max, FILTER filter = FILTER()) const { + return tree_.begin_query(PRE(min), PRE(max), filter); + } + + /* + * Locate nearest neighbors for a given point in space. + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template < + typename DISTANCE = PhDistanceDoubleEuclidean, + typename FILTER = PhFilterNoOp> + auto begin_knn_query( + size_t min_results, + const PhPointD& center, + DISTANCE distance_function = DISTANCE(), + FILTER filter = FILTER()) const { + return tree_.begin_knn_query(min_results, PRE(center), distance_function, filter); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + const auto& end() const { + return tree_.end(); + } + + /* + * Remove all entries from the tree. + */ + void clear() { + tree_.clear(); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return tree_.size(); + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return tree_.empty(); + } + + private: + // This is used by PhTreeDebugHelper + const auto& GetInternalTree() const { + return tree_; + } + + v16::PhTreeV16 tree_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_PHTREE_D_H diff --git a/phtree/phtree_d_test.cc b/phtree/phtree_d_test.cc new file mode 100644 index 00000000..207a369c --- /dev/null +++ b/phtree/phtree_d_test.cc @@ -0,0 +1,894 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_d.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD, PreprocessIEEE, PostprocessIEEE>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeDTest, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); + SmokeTestBasicOps<6>(); + SmokeTestBasicOps<10>(); + SmokeTestBasicOps<20>(); +} + +TEST(PhTreeDTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeDTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeDTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first._i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first._i); + tree.emplace(p, id2).first = id; + ASSERT_EQ(i, tree.emplace(p, id).first._i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeDTest, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]._i = i; + } else { + tree[p] = id; + } + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i, tree[p]._i); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeDTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeDTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeDTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + double delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeDTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct PhFilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeDTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeDTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +template +int testQuery(TestPoint& min, TestPoint& max, size_t N) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + assert(x._i >= 0); + assert(referenceResult.count(x._i) == 1); + n++; + } + assert(referenceResult.size() == n); + return n; +} + +TEST(PhTreeDTest, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + ASSERT_EQ(0, testQuery(p, p, 10000)); +} + +TEST(PhTreeDTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + // just read the entry + auto& x = *q; + ASSERT_EQ(i, x._i); + q++; + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeDTest, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = testQuery(min, max, 10000); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeDTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + ASSERT_EQ(N, testQuery(min, max, N)); +} + +TEST(PhTreeDTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + double query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10., i * 9., i * 11.}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(3, 500); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeDTest, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query(p, p); + auto q2 = tree.begin_query(p, p); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeDTest, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query(min, max, PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +TEST(PhTreeDTest, TestKnnQuery) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +template +struct PhDistanceLongL1 { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +TEST(PhTreeDTest, TestKnnQueryFilterAndDistanceL1) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 100; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually by L1; skip every 2nd point + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i += 2) { + double dist = distanceL1(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = + tree.begin_knn_query(Nq, center, PhDistanceLongL1(), PhFilterEvenId()); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + // Note that this may fail for larger datasets if several points have the same distance. + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +TEST(PhTreeDTest, TestKnnQueryIterator) { + // deliberately allowing outside of main points range + DoubleRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + TestPoint center{rng.next(), rng.next(), rng.next()}; + size_t n = 0; + auto q1 = tree.begin_knn_query(Nq, center); + auto q2 = tree.begin_knn_query(Nq, center); + while (q1 != tree.end()) { + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(Nq, n); +} + +TEST(PhTreeDTest, SmokeTestPoint0) { + // Test edge case: empty tree + TestPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + ASSERT_EQ(tree.size(), 0); + ASSERT_EQ(tree.find(p), tree.end()); + + auto q_window = tree.begin_query(p, p); + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p); + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeDTest, SmokeTestPointInfinity) { + // Test inifnity. + double positive_infinity = std::numeric_limits::infinity(); + double negative_infinity = -positive_infinity; + PhPointD<3> p_pos{positive_infinity, positive_infinity, positive_infinity}; + PhPointD<3> p_neg{negative_infinity, negative_infinity, negative_infinity}; + PhPointD<3> p{1, 2, 3}; + PhTreeD<3, Id> tree; + tree.emplace(p, Id{1}); + tree.emplace(p_pos, Id{10}); + tree.emplace(p_neg, Id{-10}); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree[p_neg]._i, -10); + ASSERT_EQ(tree[p]._i, 1); + ASSERT_EQ(tree[p_pos]._i, 10); + + ASSERT_EQ(positive_infinity, positive_infinity); + ASSERT_EQ(negative_infinity, negative_infinity); + ASSERT_GT(positive_infinity, negative_infinity); + + // Note that the tree returns result in z-order, however, since the z-order is based on + // the (unsigned) bit representation, negative values come _after_ positive values. + auto q_window = tree.begin_query(p_neg, p_pos); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(10, q_window->_i); + ++q_window; + ASSERT_EQ(-10, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(10, q_extent->_i); + ++q_extent; + ASSERT_EQ(-10, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_NE(q_knn, tree.end()); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p_neg)); + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(1, tree.erase(p_pos)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p_neg)); + ASSERT_EQ(0, tree.erase(p_pos)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeDTest, SmokeTestTreeAPI) { + std::map mapPtr; + PhTreeD<3, Id*> treePtr; + Id* idPtr = new Id(1); + treePtr.emplace(PhPointD<3>{1, 2, 3}, idPtr); + treePtr.clear(); + delete idPtr; + + std::map mapConst; + PhTreeD<3, const Id> treeConst; + treeConst.emplace(PhPointD<3>{1, 2, 3}, Id(1)); +} diff --git a/phtree/phtree_d_test_preprocessor.cc b/phtree/phtree_d_test_preprocessor.cc new file mode 100644 index 00000000..11dc391c --- /dev/null +++ b/phtree/phtree_d_test_preprocessor.cc @@ -0,0 +1,176 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_d.h" +#include +#include + +using namespace improbable::phtree; + +static const double MY_MULTIPLIER = 1000000.; +static const double MY_DIVIDER = 1. / MY_MULTIPLIER; + +template +PhPoint PreprocessMultiply(const PhPointD& point) { + PhPoint out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = point[i] * MY_MULTIPLIER; + } + return out; +} + +template +PhPointD PostprocessMultiply(const PhPoint& in) { + PhPointD out; + for (dimension_t i = 0; i < DIM; ++i) { + out[i] = ((double)in[i]) * MY_DIVIDER; + } + return out; +} + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD, PreprocessMultiply, PostprocessMultiply>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeDTestPreprocessor, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); + SmokeTestBasicOps<6>(); + SmokeTestBasicOps<10>(); + SmokeTestBasicOps<20>(); +} diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc new file mode 100644 index 00000000..8143d012 --- /dev/null +++ b/phtree/phtree_test.cc @@ -0,0 +1,855 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree, PrePostNoOp>; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTest, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); + SmokeTestBasicOps<6>(); + SmokeTestBasicOps<10>(); + SmokeTestBasicOps<20>(); +} + +TEST(PhTreeTest, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeTest, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeTest, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first._i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first._i); + tree.emplace(p, id2).first = id; + ASSERT_EQ(i, tree.emplace(p, id).first._i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeTest, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]._i = i; + } else { + tree[p] = id; + } + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i, tree[p]._i); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeTest, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeTest, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeTest, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + int delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeTest, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct PhFilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeTest, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeTest, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +template +int testQuery(TestPoint& min, TestPoint& max, size_t N) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + assert(x._i >= 0); + assert(referenceResult.count(x._i) == 1); + n++; + } + assert(referenceResult.size() == n); + return n; +} + +TEST(PhTreeTest, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + ASSERT_EQ(0, testQuery(p, p, 10000)); +} + +TEST(PhTreeTest, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + // just read the entry + auto& x = *q; + ASSERT_EQ(i, x._i); + q++; + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeTest, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = testQuery(min, max, 10000); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeTest, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + ASSERT_EQ(N, testQuery(min, max, N)); +} + +TEST(PhTreeTest, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10, i * 9, i * 11}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(3, 500); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeTest, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query(p, p); + auto q2 = tree.begin_query(p, p); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeTest, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query(min, max, PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +TEST(PhTreeTest, TestKnnQuery) { + // deliberately allowing outside of main points range + IntRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +template +struct PhDistanceLongL1 { + double operator()(const TestPoint& v1, const TestPoint& v2) const { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(v1[i] - v2[i]); + } + return sum; + }; +}; + +TEST(PhTreeTest, TestKnnQueryFilterAndDistanceL1) { + // deliberately allowing outside of main points range + IntRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 100; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually by L1; skip every 2nd point + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i += 2) { + double dist = distanceL1(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = + tree.begin_knn_query(Nq, center, PhDistanceLongL1(), PhFilterEvenId()); + while (q != tree.end()) { + // just read the entry + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + // We don't check anything else because with L1 there will often be several different + // entries with the same distance but with different ordering than sorted_data. + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} + +TEST(PhTreeTest, TestKnnQueryIterator) { + // deliberately allowing outside of main points range + IntRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + TestPoint center{rng.next(), rng.next(), rng.next()}; + size_t n = 0; + auto q1 = tree.begin_knn_query(Nq, center); + auto q2 = tree.begin_knn_query(Nq, center); + while (q1 != tree.end()) { + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(Nq, n); +} + +TEST(PhTreeTest, SmokeTestPoint0) { + // Test edge case: empty tree + TestPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + ASSERT_EQ(tree.size(), 0); + ASSERT_EQ(tree.find(p), tree.end()); + + auto q_window = tree.begin_query(p, p); + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p); + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, SmokeTestPoint1) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree; + tree.emplace(p, Id{1}); + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + Id id4{4}; + tree.insert(p, id4); + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query(p, p); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} diff --git a/phtree/phtree_test_const_values.cc b/phtree/phtree_test_const_values.cc new file mode 100644 index 00000000..7eeec951 --- /dev/null +++ b/phtree/phtree_test_const_values.cc @@ -0,0 +1,696 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree, PrePostNoOp>; + +class IntRng { + public: + IntRng(std::int32_t minIncl, std::int32_t maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + std::int32_t next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestConst, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); + SmokeTestBasicOps<6>(); + SmokeTestBasicOps<10>(); + SmokeTestBasicOps<20>(); +} + +TEST(PhTreeTestConst, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + tree.clear(); + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeTestConst, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeTestConst, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id id2(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id._i, tree.find(p)->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, tree.find(p)->_i); + } +} + +TEST(PhTreeTestConst, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_EQ(0, tree[p]._i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i + 1, tree.size()); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(0, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], (int)i).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeTestConst, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + + // try again + populate(tree, points, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); +} + +TEST(PhTreeTestConst, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(tree.find(p)->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +TEST(PhTreeTestConst, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + int delta = 20; + std::vector> points; + populate(tree, points, N); + + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeTestConst, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); +} + +template +struct PhFilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return value._i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeTestConst, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + auto qE = tree.begin(PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); +} + +TEST(PhTreeTestConst, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + size_t num_e1 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x._i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +template +int testQuery(TestPoint& min, TestPoint& max, size_t N) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + assert(x._i >= 0); + assert(referenceResult.count(x._i) == 1); + n++; + } + assert(referenceResult.size() == n); + return n; +} + +TEST(PhTreeTestConst, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + ASSERT_EQ(0, testQuery(p, p, 10000)); +} + +TEST(PhTreeTestConst, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + PhPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + // just read the entry + auto& x = *q; + ASSERT_EQ(i, x._i); + q++; + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeTestConst, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = testQuery(min, max, 10000); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeTestConst, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + ASSERT_EQ(N, testQuery(min, max, N)); +} + +TEST(PhTreeTestConst, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10, i * 9, i * 11}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto& x = *it; + ASSERT_EQ(referenceResult.count(x._i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(3, 500); + ASSERT_GE(5000, nn); +} + +TEST(PhTreeTestConst, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query(p, p); + auto q2 = tree.begin_query(p, p); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); +} + +TEST(PhTreeTestConst, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + populate(tree, points, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query(min, max, PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE(qE->_i > -1); + ASSERT_TRUE(qE->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); +} + +TEST(PhTreeTestConst, TestKnnQuery) { + // deliberately allowing outside of main points range + IntRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + populate(tree, points, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id, e._i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()._i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } +} diff --git a/phtree/phtree_test_ptr_values.cc b/phtree/phtree_test_ptr_values.cc new file mode 100644 index 00000000..1c374da5 --- /dev/null +++ b/phtree/phtree_test_ptr_values.cc @@ -0,0 +1,779 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree, PrePostNoOp>; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(Id& rhs) { + return _i == rhs._i; + } + + Id& operator=(Id const& rhs) = default; + + int _i; +}; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + assert(refTree.size() == N); + assert(points.size() == N); +} + +template +void SmokeTestBasicOps() { + TestTree tree; + size_t N = 10000; + + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id* id = new Id(i); + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id* id = *tree.find(p); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(1, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + delete id; + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestPtr, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(); + SmokeTestBasicOps<6>(); + SmokeTestBasicOps<10>(); + SmokeTestBasicOps<20>(); +} + +TEST(PhTreeTestPtr, TestDebug) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + using Debug = PhTreeDebugHelper; + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id* id = new Id(i); + ASSERT_TRUE(tree.insert(p, id).second); + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_LE(N * 10, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(N, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + auto id = tree.find(p).second(); + tree.erase(p); + delete id; + } + + ASSERT_LE(10, Debug::ToString(tree, Debug::PrintDetail::name).length()); + ASSERT_GE(10, Debug::ToString(tree, Debug::PrintDetail::entries).length()); + ASSERT_GE(100, Debug::ToString(tree, Debug::PrintDetail::tree).length()); + ASSERT_EQ(0, Debug::GetStats(tree).size_); + Debug::CheckConsistency(tree); +} + +TEST(PhTreeTestPtr, TestInsert) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id* id = new Id(i); + ASSERT_EQ(true, tree.insert(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + + // try add again + ASSERT_EQ(false, tree.insert(p, id).second); + ASSERT_EQ(i, tree.insert(p, id).first->_i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + delete *tree.find(p); + } +} + +TEST(PhTreeTestPtr, TestEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id* id = new Id(i); + ASSERT_EQ(true, tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try add again, this should _not_ replace the existing value + Id* id2 = new Id(-i); + ASSERT_EQ(false, tree.emplace(p, id2).second); + ASSERT_EQ(i, tree.emplace(p, id).first->_i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(id->_i, (*tree.find(p))->_i); + + // Check that the returned value is a reference + tree.emplace(p, id2).first->_i++; + ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); + tree.emplace(p, id2).first = id2; + ASSERT_EQ(-i, tree.emplace(p, id).first->_i); + // Replace it with previous value + tree.emplace(p, id2).first = id; + ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); + id->_i = i; + ASSERT_EQ(i, tree.emplace(p, id).first->_i); + delete id2; + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + delete *tree.find(p); + } +} + +TEST(PhTreeTestPtr, TestSquareBrackets) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + + std::vector> points; + generateCube(points, N); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id* id = new Id(i); + Id* id2 = new Id(-i); + ASSERT_EQ(nullptr, tree[p]); + tree[p] = id2; + ASSERT_EQ(-i, tree[p]->_i); + ASSERT_EQ(tree.count(p), 1); + if (i % 2 == 0) { + tree[p]->_i = i; + ASSERT_EQ(i, id2->_i); + delete id; + } else { + tree[p] = id; + delete id2; + } + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try `add` again + ASSERT_EQ(i, tree[p]->_i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + } + ASSERT_EQ(N, tree.size()); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 1); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i, tree[p]->_i); + delete *tree.find(p); + } +} + +template +void populate( + TestTree& tree, + std::vector>& points, + std::vector& values, + size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + values.emplace_back(new Id(i)); + ASSERT_TRUE(tree.insert(points[i], values[i]).second); + } + ASSERT_EQ(N, tree.size()); +} + +void depopulate(std::vector& values) { + for (auto x : values) { + delete x; + } + values.clear(); +} + +TEST(PhTreeTestPtr, TestClear) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 100; + std::vector> points; + std::vector values; + + ASSERT_TRUE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + + populate(tree, points, values, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + depopulate(values); + + // try again + populate(tree, points, values, N); + + ASSERT_FALSE(tree.empty()); + tree.clear(); + ASSERT_TRUE(tree.empty()); + points.clear(); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestFind) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + size_t i = 0; + for (auto& p : points) { + // test commutativity + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ((*tree.find(p))->_i, i); + i++; + } + + TestPoint p{1, 1, 10000000}; + auto result = tree.find(p); + ASSERT_EQ(result, tree.end()); + ASSERT_EQ(tree.end(), result); + + auto iter1 = tree.find(points[0]); + auto iter2 = tree.find(points[0]); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); + + depopulate(values); +} + +TEST(PhTreeTestPtr, TestUpdateWithEmplace) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + int delta = 20; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + for (auto& p : points) { + auto pOld = p; + PhPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + int n = tree.erase(pOld); + ASSERT_EQ(1, n); + tree.emplace(pNew, new Id(42)); + ASSERT_EQ(1, tree.count(pNew)); + ASSERT_EQ(0, tree.count(pOld)); + p = pNew; + } + + for (auto& p : tree) { + delete p; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestExtent) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int num_e = 0; + auto qE = tree.begin(); + while (qE != tree.end()) { + ASSERT_TRUE((*qE)->_i > -1); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e); + + auto iter1 = tree.begin(); + auto iter2 = tree.begin(); + ASSERT_EQ(iter1, iter2); + ASSERT_NE(tree.end(), iter1); + + depopulate(values); +} + +template +struct PhFilterEvenId { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const T& value) const { + return value->_i % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + return true; + } +}; + +TEST(PhTreeTestPtr, TestExtentFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int num_e = 0; + auto qE = tree.begin(PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE((*qE)->_i > -1); + ASSERT_TRUE((*qE)->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_EQ(N, num_e * 2); + + depopulate(values); +} + +TEST(PhTreeTestPtr, TestRangeBasedForLoop) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + size_t num_e1 = 0; + for (auto x : tree) { + ASSERT_TRUE(x->_i > -1); + num_e1++; + } + ASSERT_EQ(N, num_e1); + + size_t num_e2 = 0; + for (auto& x : tree) { + ASSERT_TRUE(x->_i > -1); + num_e2++; + } + ASSERT_EQ(N, num_e2); + depopulate(values); +} + +template +void referenceQuery( + std::vector>& points, + TestPoint& min, + TestPoint& max, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool match = true; + for (dimension_t d = 0; d < DIM; d++) { + match &= p[d] >= min[d] && p[d] <= max[d]; + } + if (match) { + result.insert(i); + } + } +} + +template +int testQuery(TestPoint& min, TestPoint& max, size_t N) { + TestTree tree; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto x = *it; + assert(x->_i >= 0); + assert(referenceResult.count(x->_i) == 1); + n++; + } + assert(referenceResult.size() == n); + depopulate(values); + return n; +} + +TEST(PhTreeTestPtr, TestWindowQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + ASSERT_EQ(0, testQuery(p, p, 10000)); +} + +TEST(PhTreeTestPtr, TestWindowQuery1) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query(p, p); + ASSERT_NE(q, tree.end()); + // just read the entry + auto x = *q; + ASSERT_EQ(i, x->_i); + q++; + ASSERT_EQ(q, tree.end()); + n++; + } + ASSERT_EQ(N, n); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestWindowQueryMany) { + const dimension_t dim = 3; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + int n = testQuery(min, max, 10000); + ASSERT_LE(3, n); + ASSERT_GE(100, n); +} + +TEST(PhTreeTestPtr, TestWindowQueryAll) { + const dimension_t dim = 3; + const size_t N = 10000; + TestPoint min{-10000, -10000, -10000}; + TestPoint max{10000, 10000, 10000}; + ASSERT_EQ(N, testQuery(min, max, N)); +} + +TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { + size_t N = 10000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int query_length = 200; + size_t nn = 0; + for (int i = -120; i < 120; i++) { + TestPoint min{i * 10, i * 9, i * 11}; + TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + std::set referenceResult; + referenceQuery(points, min, max, referenceResult); + + size_t n = 0; + for (auto it = tree.begin_query(min, max); it != tree.end(); it++) { + auto x = *it; + ASSERT_EQ(referenceResult.count(x->_i), 1); + n++; + nn++; + } + ASSERT_EQ(referenceResult.size(), n); + + // basic check to ensure healthy queries + if (i > -50 && i < 50) { + ASSERT_LE(1, n); + } + ASSERT_GE(100, n); + } + ASSERT_LE(3, 500); + ASSERT_GE(5000, nn); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestWindowQueryIterators) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int n = 0; + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q1 = tree.begin_query(p, p); + auto q2 = tree.begin_query(p, p); + ASSERT_NE(q1, tree.end()); + ASSERT_NE(q2, tree.end()); + ASSERT_EQ(q1, q2); + q1++; + ASSERT_NE(q1, q2); + q2++; + n++; + } + ASSERT_EQ(N, n); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestWindowQueryFilter) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + int num_e = 0; + TestPoint min{-100, -100, -100}; + TestPoint max{100, 100, 100}; + auto qE = tree.begin_query(min, max, PhFilterEvenId()); + while (qE != tree.end()) { + ASSERT_TRUE((*qE)->_i > -1); + ASSERT_TRUE((*qE)->_i % 2 == 0); + qE++; + num_e++; + } + ASSERT_LE(2, num_e); + ASSERT_GE(50, num_e); + depopulate(values); +} + +TEST(PhTreeTestPtr, TestKnnQuery) { + // deliberately allowing outside of main points range + IntRng rng(-1500, 1500); + const dimension_t dim = 3; + const size_t N = 1000; + const size_t Nq = 10; + + TestTree tree; + std::vector> points; + std::vector values; + populate(tree, points, values, N); + + for (size_t round = 0; round < 100; round++) { + TestPoint center{rng.next(), rng.next(), rng.next()}; + + // sort points manually + std::vector sorted_data; + for (size_t i = 0; i < points.size(); i++) { + double dist = distance(center, points[i]); + sorted_data.emplace_back(dist, i); + } + std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance); + + size_t n = 0; + double prevDist = -1; + auto q = tree.begin_knn_query(Nq, center); + while (q != tree.end()) { + // just read the entry + auto& e = *q; + ASSERT_EQ(sorted_data[n]._distance, q.distance()); + ASSERT_EQ(sorted_data[n]._id, e->_i); + ASSERT_EQ(points[sorted_data[n]._id], q.first()); + ASSERT_EQ(sorted_data[n]._id, q.second()->_i); + ASSERT_GE(q.distance(), prevDist); + prevDist = q.distance(); + q++; + n++; + } + ASSERT_EQ(Nq, n); + } + depopulate(values); +} diff --git a/phtree/testing/BUILD b/phtree/testing/BUILD new file mode 100644 index 00000000..2aed744c --- /dev/null +++ b/phtree/testing/BUILD @@ -0,0 +1,14 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "testing", + testonly = True, + srcs = [ + ], + hdrs = [ + ], + visibility = [ + ], + deps = [ + ], +) diff --git a/phtree/testing/gtest_main/BUILD b/phtree/testing/gtest_main/BUILD new file mode 100644 index 00000000..83ffea6e --- /dev/null +++ b/phtree/testing/gtest_main/BUILD @@ -0,0 +1,15 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "gtest_main", + testonly = True, + srcs = ["gtest_main.cc"], + visibility = [ + "//visibility:public", + ], + deps = [ + "@gtest", + "@spdlog", + ], + alwayslink = 1, +) diff --git a/phtree/testing/gtest_main/gtest_main.cc b/phtree/testing/gtest_main/gtest_main.cc new file mode 100644 index 00000000..1dfcd1f4 --- /dev/null +++ b/phtree/testing/gtest_main/gtest_main.cc @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +int main(int argc, char** argv) { + auto console_sink = std::make_shared(); + spdlog::set_default_logger( + std::make_shared("", spdlog::sinks_init_list({console_sink}))); + spdlog::set_level(spdlog::level::trace); + + testing::InitGoogleMock(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/phtree/v16/BUILD b/phtree/v16/BUILD new file mode 100644 index 00000000..1717296f --- /dev/null +++ b/phtree/v16/BUILD @@ -0,0 +1,24 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "v16", + srcs = [ + ], + hdrs = [ + "debug_helper.h", + "node.h", + "ph_entry.h", + "ph_iterator_base.h", + "ph_iterator_full.h", + "ph_iterator_hc.h", + "ph_iterator_knn_hs.h", + "ph_iterator_simple.h", + "phtree_v16.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "//phtree/common", + ], +) diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt new file mode 100644 index 00000000..b4bb6e43 --- /dev/null +++ b/phtree/v16/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14) + +target_sources(phtree + PRIVATE + debug_helper.h + node.h + ph_entry.h + ph_iterator_base.h + ph_iterator_full.h + ph_iterator_hc.h + ph_iterator_knn_hs.h + ph_iterator_simple.h + phtree_v16.h + ) diff --git a/phtree/v16/debug_helper.h b/phtree/v16/debug_helper.h new file mode 100644 index 00000000..a5ac8fc0 --- /dev/null +++ b/phtree/v16/debug_helper.h @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_DEBUG_HELPER_H +#define PHTREE_V16_DEBUG_HELPER_H + +#include "../common/ph_common.h" +#include "../common/ph_tree_debug_helper.h" +#include "node.h" +#include "phtree_v16.h" +#include + +namespace improbable::phtree::v16 { + +template POST> +class PhTreeV16; + +template +class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { + public: + DebugHelperV16(const Node& root, size_t size) : root_{root}, size_{size} {} + + /* + * Depending on the detail parameter this returns: + * - "name" : a string that identifies the tree implementation type. + * - "entries" : a string that lists all elements in the tree. + * - "tree" : a string that lists all elements in the tree, pretty formatted to indicate tree + * structure. + * + * @return a string that identifies the tree implementation type. + */ + [[nodiscard]] std::string ToString( + const PhTreeDebugHelper::PrintDetail& detail) const override { + using Enum = PhTreeDebugHelper::PrintDetail; + std::ostringstream os; + switch (detail) { + case Enum::name: + os << "PH-TreeV16-C++"; + break; + case Enum::entries: + ToStringPlain(os, root_); + break; + case Enum::tree: + ToStringTree(os, 0, root_, PhPoint(), true); + break; + } + return os.str(); + } + + /* + * Collects some statistics about the tree, such as number of nodes, average depth, ... + * + * @return some statistics about the tree. + */ + [[nodiscard]] PhTreeStats GetStats() const override { + PhTreeStats stats; + root_.GetStats(stats); + return stats; + } + + /* + * Checks the consistency of the tree. This function requires assertions to be enabled. + */ + void CheckConsistency() const override { + assert(size_ == root_.CheckConsistency()); + } + + private: + void ToStringPlain(std::ostringstream& os, const Node& node) const { + for (auto& it : node.Entries()) { + const PhEntry& o = it.second; + // inner node? + if (o.IsNode()) { + ToStringPlain(os, o.GetNode()); + } else { + os << o.GetKey(); + os << " v=" << (o.IsValue() ? "T" : "null") << std::endl; + } + } + } + + void ToStringTree( + std::ostringstream& sb, + bit_width_t current_depth, + const Node& node, + const PhPoint& prefix, + bool printValue) const { + std::string ind = "*"; + for (bit_width_t i = 0; i < current_depth; ++i) { + ind += "-"; + } + sb << ind << "il=" << node.GetInfixLen() << " pl=" << node.GetPostfixLen() + << " ec=" << node.GetEntryCount() << " inf=["; + + // for a leaf node, the existence of a sub just indicates that the value exists. + if (node.GetInfixLen() > 0) { + bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + mask = ~mask; + mask <<= node.GetPostfixLen() + 1; + for (dimension_t i = 0; i < DIM; ++i) { + sb << ToBinary(prefix[i] & mask) << ","; + } + } + current_depth += node.GetInfixLen(); + sb << "] " + << "Node___il=" << node.GetInfixLen() << ";pl=" << node.GetPostfixLen() + << ";size=" << node.Entries().size() << std::endl; + + // To clean previous postfixes. + for (auto& it : node.Entries()) { + const auto& o = it.second; + hc_pos_t hcPos = it.first; + if (o.IsNode()) { + sb << ind << "# " << hcPos << " Node: " << std::endl; + ToStringTree(sb, current_depth + 1, o.GetNode(), o.GetKey(), printValue); + } else { + // post-fix + sb << ind << ToBinary(o.GetKey()); + sb << " hcPos=" << hcPos; + if (printValue) { + sb << " v=" << (o.IsValue() ? "T" : "null"); + } + sb << std::endl; + } + } + } + + const Node& root_; + const size_t size_; +}; +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_DEBUG_HELPER_H diff --git a/phtree/v16/node.h b/phtree/v16/node.h new file mode 100644 index 00000000..a9693c55 --- /dev/null +++ b/phtree/v16/node.h @@ -0,0 +1,424 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_NODE_H +#define PHTREE_V16_NODE_H + +#include "../common/ph_common.h" +#include "../common/ph_tree_stats.h" +#include "ph_entry.h" +#include "phtree_v16.h" +#include + +namespace improbable::phtree::v16 { + +/* + * We provide different implementations of the node's internal entry set: + * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful + * because many nodes may have only 2 entries. + * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler + * does not support CTZ (count trailing bits). + * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). + * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. + * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and + * dimensionality. Remember that n_max = 2^DIM. + */ +template +using EntryMap = typename std::conditional< + DIM <= 3, + array_map, (1 << DIM)>, + typename std:: + conditional>, std::map>>:: + type>::type; + +template +using EntryIterator = decltype(EntryMap().begin()); +template +using EntryIteratorC = decltype(EntryMap().cbegin()); + +namespace { + +/* + * Takes a construct of parent_node -> child_node, ie the child_node is owned by parent_node. + * This function also assumes that the child_node contains only one entry. + * + * This function takes the remaining entry from the child node and inserts it into the parent_node + * where it replaces (and implicitly deletes) the child_node. + * @param prefix_of_child_in_parent This specifies the position of child_node inside the + * parent_node. We only need the relevant bits at the level of the parent_node. This means we can + * use any key of any node or entry that is, or used to be) inside the child_node, because they all + * share the same prefix. This includes the key of the child_node itself. + * @param child_node The node to be removed from the parent node. + * @param parent_node Current owner of the child node. + */ +template +void MergeIntoParent( + const PhPoint& prefix_of_child_in_parent, Node& child_node, Node& parent) { + assert(child_node.GetEntryCount() == 1); + // At this point we have found an entry that needs to be removed. We also know that we need to + // remove the child node because it contains at most one other entry and it is not the root + // node. + auto& entry = child_node.Entries().begin()->second; + + auto hc_pos_in_parent = CalcPosInArray(prefix_of_child_in_parent, parent.GetPostfixLen()); + auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; + + if (entry.IsNode()) { + // connect sub to parent + auto& sub2 = entry.GetNode(); + bit_width_t new_infix_len = child_node.GetInfixLen() + 1 + sub2.GetInfixLen(); + sub2.SetInfixLen(new_infix_len); + } + + // Now move the single entry into the parent, the position in the parent is the same as the + // child_node. + // We need the double 'move' here because moving anything into the parent_entry causes the + // destructors to be called first on child_node and everything referenced from it. If we were + // not moving the single child entry away first, it would be destructed by the destructor of + // child_node. + // TODO This is really bad, we are calling the copy constructor twice here.... (try 'const'...) + auto temporary_entry = std::move(entry); + parent_entry = std::move(temporary_entry); +} +} // namespace + +/* + * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data + * of type T or a child node (both are of the variant type PhEntry). + * + * The keys (coordinates) of all entries of a node have the same prefix, where prefix refers to the + * first 'n' bits of their keys. 'n' is equivalent to "n = w - GetPostLen() - 1", where 'w' is the + * number of bits of the keys per dimension (usually w = 64 for `int64_t` or 'double'). + * + * The entries are stored in an EntryMap indexed and ordered by their "hypercube address". + * The hypercube address is the ID of the quadrant in the node. Nodes are effectively binary + * hypercubes (= binary Hamming space) on {0,1}^DIM. The hypercube address thus uses one bit per + * dimension to address all quadrants of the node's binary hypercube. Each bit designates for one + * dimension which quadrant it refers to, such as 0=left/1=right; 0=down/1=up; 0=front/1=back; ... . + * The ordering of the quadrants thus represents a z-order curve (please note that this completely + * unrelated to `z-ordering` used in graphics). + * + * A node always has at least two entries, except for the root node which can have fewer entries. + * None of the functions in this class are recursive, see Emplace(). + */ +template +class Node { + public: + Node(bit_width_t infix_len, bit_width_t postfix_len) + : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { + assert(infix_len_ < MAX_BIT_WIDTH); + assert(infix_len >= 0); + } + + // Nodes should never be copied! + Node(const Node&) = delete; + Node(Node&&) = delete; + Node& operator=(const Node&) = delete; + Node& operator=(Node&&) = delete; + + [[nodiscard]] node_size_t GetEntryCount() const { + return entries_.size(); + } + + [[nodiscard]] bit_width_t GetInfixLen() const { + return infix_len_; + } + + [[nodiscard]] bit_width_t GetPostfixLen() const { + return postfix_len_; + } + + /* + * Attempts to emplace an entry in this node. + * The behavior is analogous to std::map::emplace(), i.e. if there is already a value with the + * given hypercube address 'hc_pos', that value is returned. This function is also + * non-recursive, it will return a child node instead of traversing it. + * + * The scenarios in detail: + * + * If there is no entry at the position of 'hc_pos', a new entry is inserted. The new entry is + * constructed from a constructor of T that accepts the arguments __args. Also, 'is_inserted' is + * set top 'true'. + * + * If there is a is a entry with a value T at 'hc_pos', that value is returned. The value is + * _not_ overwritten. + * + * If there is a child node at the position of 'hc_pos', the child node's prefix is is analysed. + * If the prefix indicates that the new value would end up inside the child node or any of its + * children, then the child node is returned for further traversal. + * If the child nodes' prefix is different, then a new node is created. The new node contains + * the child node and a new key/value entry constructed with __args. The new node is inserted in + * the current node at the position of the former child node. The new value is returned and + * 'is_inserted' is set to 'true'. + * + * @param is_inserted The function will set this to true if a new value was inserted + * @param key The key for which a new value should be inserted. + * @param __args Constructor arguments for creating a value T that can be inserted for the key. + */ + template + PhEntry* Emplace(bool& is_inserted, const PhPoint& key, _Args&&... __args) { + hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + + // We do find() _and_ emplace() here. Why? + // We tried using only emplace(), but that requires either PhEntry to be constructed + // beforehand, which is expensive, or we use the following, which is apparently even more + // expensive: + // std::piecewise_construct, + // std::forward_as_tuple(pos), + // std::forward_as_tuple(key, std::forward<_Args>(__args)...)); + // + auto entry = entries_.find(hc_pos); + if (entry == entries_.end()) { + is_inserted = true; + T t{std::forward<_Args>(__args)...}; + return &entries_.emplace(hc_pos, PhEntry{key, t}).first->second; + } + return HandleCollision(entry->second, is_inserted, key, std::forward<_Args>(__args)...); + } + + /* + * Returns the value (T or Node) if the entry exists and matches the key. Child nodes are + * _not_ traversed. + * @param key The key of the entry + * @param parent parent node + * @return The sub node or null. + */ + const PhEntry* Find(const PhPoint& key) const { + hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + const auto& entry = entries_.find(hc_pos); + if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { + return &entry->second; + } + return nullptr; + } + + /* + * Attempts to erase a key/value pair. + * This function is not recursive, if the 'key' leads to a child node, the child node + * is returned and nothing is removed. + * + * @param key The key of the key/value pair to be erased + * @param parent The parent node of the current node (=nullptr) if this is the root node. + * @param found This is and output parameter and will be set to 'true' if a value was removed. + * @return A child node if the provided key leads to a child node. + */ + Node* Erase(const PhPoint& key, Node* parent, bool& found) { + hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + auto it = entries_.find(hc_pos); + if (it != entries_.end() && DoesEntryMatch(it->second, key)) { + if (it->second.IsNode()) { + return &it->second.GetNode(); + } + entries_.erase(it); + + found = true; + if (parent && GetEntryCount() == 1) { + MergeIntoParent(key, *this, *parent); + // WARNING: (this) is deleted here, do not refer to it beyond this point. + } + } + return nullptr; + } + + EntryMap& Entries() { + return entries_; + } + + const EntryMap& Entries() const { + return entries_; + } + + void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { + node_size_t num_children = entries_.size(); + + ++stats.n_nodes_; + ++stats.infix_hist_[GetInfixLen()]; + ++stats.node_depth_hist_[current_depth]; + ++stats.node_size_log_hist_[32 - CountLeadingZeros(num_children)]; + stats.n_total_children_ += num_children; + + current_depth += GetInfixLen(); + stats.q_total_depth_ += current_depth; + + for (auto& entry : entries_) { + auto& child = entry.second; + if (child.IsNode()) { + auto& sub = child.GetNode(); + sub.GetStats(stats, current_depth + 1); + } else { + ++stats.q_n_post_fix_n_[current_depth]; + ++stats.size_; + } + } + } + + size_t CheckConsistency(bit_width_t current_depth = 0) const { + // Except for a root node if the tree has <2 entries. + assert(entries_.size() >= 2 || current_depth == 0); + + current_depth += GetInfixLen(); + size_t num_entries_local = 0; + size_t num_entries_children = 0; + for (auto& entry : entries_) { + auto& child = entry.second; + if (child.IsNode()) { + auto& sub = child.GetNode(); + // Check node consistency + assert(sub.GetInfixLen() + 1 + sub.GetPostfixLen() == GetPostfixLen()); + num_entries_children += sub.CheckConsistency(current_depth + 1); + } else { + ++num_entries_local; + } + } + return num_entries_local + num_entries_children; + } + + void SetInfixLen(bit_width_t newInfLen) { + assert(newInfLen < MAX_BIT_WIDTH); + assert(newInfLen >= 0); + infix_len_ = newInfLen; + } + + private: + template + PhEntry& WriteValue(hc_pos_t hc_pos, const PhPoint& new_key, _Args&&... __args) { + return entries_.emplace(hc_pos, PhEntry{new_key, std::forward<_Args>(__args)...}) + .first->second; + } + + void WriteEntry(hc_pos_t hc_pos, PhEntry&& entry) { + if (entry.IsNode()) { + auto& node = entry.GetNode(); + bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; + node.SetInfixLen(new_subnode_infix_len); + } + entries_.emplace(hc_pos, std::move(entry)); + } + + /* + * Handles the case where we want to insert a new entry into a node but the node already + * has an entry in that position. + * @param existing_entry The current entry in the node + * @param is_inserted Output: This will be set to 'true' by this function if a new entry was + * inserted by this function. + * @param new_key The key of the entry to be inserted + * @param __args The constructor arguments for a new value T of a the new entry to be inserted + * @return A PhEntry that may contain a child node, a newly created entry or an existing entry. + * A child node indicates that no entry was inserted, but the caller should try inserting into + * the child node. A newly created entry (indicated by is_inserted=true) indicates successful + * insertion. An existing entry (indicated by is_inserted=false) indicates that there is already + * an entry with the exact same key as new_key, so insertion has failed. + */ + template + PhEntry* HandleCollision( + PhEntry& existing_entry, + bool& is_inserted, + const PhPoint& new_key, + _Args&&... __args) { + assert(!is_inserted); + // We have two entries in the same location (local pos). + // Now we need to compare the keys. + // If they are identical, we simply return the entry for further traversal. + if (existing_entry.IsNode()) { + auto& sub_node = existing_entry.GetNode(); + if (sub_node.GetInfixLen() > 0) { + bit_width_t max_conflicting_bits = + NumberOfDivergingBits(new_key, existing_entry.GetKey()); + if (max_conflicting_bits > sub_node.GetPostfixLen() + 1) { + is_inserted = true; + return InsertSplit( + existing_entry, + new_key, + max_conflicting_bits, + std::forward<_Args>(__args)...); + } + } + // No infix conflict, just traverse subnode + } else { + bit_width_t max_conflicting_bits = + NumberOfDivergingBits(new_key, existing_entry.GetKey()); + if (max_conflicting_bits > 0) { + is_inserted = true; + return InsertSplit( + existing_entry, new_key, max_conflicting_bits, std::forward<_Args>(__args)...); + } + // perfect match -> return existing + } + return &existing_entry; + } + + template + PhEntry* InsertSplit( + PhEntry& current_entry, + const PhPoint& new_key, + bit_width_t max_conflicting_bits, + _Args&&... __args) { + const PhPoint current_key = current_entry.GetKey(); + + // determine length of infix + bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; + bit_width_t new_postfix_len = max_conflicting_bits - 1; + auto new_sub_node = std::make_unique>(new_local_infix_len, new_postfix_len); + hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); + hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); + + // Move key/value into subnode + new_sub_node->WriteEntry(pos_sub_2, std::move(current_entry)); + auto& new_entry = + new_sub_node->WriteValue(pos_sub_1, new_key, std::forward<_Args>(__args)...); + + // Insert new node into local node + // We use new_key because current_key has been moved(). + // TODO avoid reassigning the key here, this is unnecessary. + current_entry = {new_key, std::move(new_sub_node)}; + return &new_entry; + } + + /* + * Checks whether an entry's key matches another key. For Non-node entries this simply means + * comparing the two keys. For entries that contain nodes, we only compare the prefix. + * @param entry An entry + * @param key A key + * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for + * other entries). + */ + bool DoesEntryMatch(const PhEntry& entry, const PhPoint& key) const { + if (entry.IsNode()) { + const Node& sub = entry.GetNode(); + if (sub.GetInfixLen() > 0) { + const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); + return KeyEquals(entry.GetKey(), key, mask); + } + return true; + } + return entry.GetKey() == key; + } + + // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the + // current node). If a variable prefix_len would refer to the number of bits in this node's + // prefix, and if we assume 64 bit values, the following would always hold: + // prefix_len + 1 + postfix_len = 64. + // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, + // ie. the same bit that is used to create the lookup keys in entries_. + bit_width_t postfix_len_; + // The number of bits between this node and the parent node. For 64bit keys possible values + // range from 0 to 62. + bit_width_t infix_len_; + EntryMap entries_; +}; + +} // namespace improbable::phtree::v16 +#endif // PHTREE_V16_NODE_H diff --git a/phtree/v16/ph_entry.h b/phtree/v16/ph_entry.h new file mode 100644 index 00000000..fa32e7ea --- /dev/null +++ b/phtree/v16/ph_entry.h @@ -0,0 +1,103 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_ENTRY_H +#define PHTREE_V16_PH_ENTRY_H + +#include "../common/ph_common.h" +#include "node.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree::v16 { + +template +class Node; + +/* + * Nodes in the PH-Tree contain up to 2^DIM PhEntries, one in each geometric quadrant. + * PhEntries can contain two types of data: + * - A key/value pair (value of type T) + * - A prefix/child-node pair, where prefix is the prefix of the child node and the + * child node is contained in a unique_ptr. + */ +template +class PhEntry { + using Value = std::remove_const_t; + + public: + PhEntry() : kd_key_(), value_{std::in_place_type, T{}} {} + + /* + * Construct entry with existing node. + */ + PhEntry(const PhPoint& k, std::unique_ptr>&& node) + : kd_key_{k} + , value_{std::in_place_type>>, + std::forward>>(node)} {} + + /* + * Construct entry with a new node. + */ + PhEntry(bit_width_t infix_len, bit_width_t postfix_len) + : kd_key_() + , value_{std::in_place_type>>, + std::make_unique>(infix_len, postfix_len)} {} + + /* + * Construct entry with existing value T. + */ + PhEntry(const PhPoint& k, const T& v) : kd_key_{k}, value_{std::in_place_type, v} {} + + /* + * Construct entry with new T value. + */ + template + explicit PhEntry(const PhPoint& k, _Args&&... __args) + : kd_key_{k}, value_{std::in_place_type, std::forward<_Args>(__args)...} {} + + [[nodiscard]] const PhPoint& GetKey() const { + return kd_key_; + } + + [[nodiscard]] bool IsValue() const { + return std::holds_alternative(value_); + } + + [[nodiscard]] bool IsNode() const { + return std::holds_alternative>>(value_); + } + + [[nodiscard]] T& GetValue() const { + assert(IsValue()); + return const_cast(std::get(value_)); + } + + [[nodiscard]] Node& GetNode() const { + assert(IsNode()); + return *std::get>>(value_); + } + + private: + PhPoint kd_key_; + std::variant>> value_; +}; +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_ENTRY_H diff --git a/phtree/v16/ph_iterator_base.h b/phtree/v16/ph_iterator_base.h new file mode 100644 index 00000000..4b8529a6 --- /dev/null +++ b/phtree/v16/ph_iterator_base.h @@ -0,0 +1,111 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_ITERATOR_BASE_H +#define PHTREE_V16_PH_ITERATOR_BASE_H + +#include "../common/ph_common.h" +#include "ph_entry.h" + +namespace improbable::phtree::v16 { + +/* + * Base class for all PH-Tree iterators. + */ +template < + dimension_t DIM, + typename T, + typename KEY, + PhPostprocessor POST, + typename FILTER = PhFilterNoOp> +class PhIteratorBase { + public: + PhIteratorBase() : current_result_{nullptr}, is_finished_{false}, filter_{FILTER()} {} + + explicit PhIteratorBase(FILTER filter) + : current_result_{nullptr}, is_finished_{false}, filter_(std::move(filter)) {} + + T& operator*() const { + assert(current_result_); + return current_result_->GetValue(); + } + + T* operator->() const { + assert(current_result_); + return ¤t_result_->GetValue(); + } + + template + friend bool operator==( + const PhIteratorBase& left, + const PhIteratorBase& right) { + // Note: The following compares pointers to PhEntry objects so it should be + // a) fast (i.e. not comparing contents of entries) + // b) return `false` when comparing apparently identical entries from different PH-Trees (as + // intended) + return (left.is_finished_ && right.Finished()) || + (!left.is_finished_ && !right.Finished() && + left.current_result_ == right.GetCurrentResult()); + } + + template + friend bool operator!=( + const PhIteratorBase& left, + const PhIteratorBase& right) { + return !(left == right); + } + + KEY first() const { + return POST(current_result_->GetKey()); + } + + T& second() const { + return current_result_->GetValue(); + } + + [[nodiscard]] bool Finished() const { + return is_finished_; + } + + const PhEntry* GetCurrentResult() const { + return current_result_; + } + + protected: + void SetFinished() { + is_finished_ = true; + current_result_ = nullptr; + } + + [[nodiscard]] bool ApplyFilter(const PhEntry& entry) const { + return entry.IsNode() + ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + } + + void SetCurrentResult(const PhEntry* current_result) { + current_result_ = current_result; + } + + private: + const PhEntry* current_result_; + bool is_finished_; + FILTER filter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_ITERATOR_BASE_H diff --git a/phtree/v16/ph_iterator_full.h b/phtree/v16/ph_iterator_full.h new file mode 100644 index 00000000..ec8a6ee3 --- /dev/null +++ b/phtree/v16/ph_iterator_full.h @@ -0,0 +1,113 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_ITERATOR_FULL_H +#define PHTREE_V16_PH_ITERATOR_FULL_H + +#include "../common/ph_common.h" +#include "ph_iterator_base.h" + +namespace improbable::phtree::v16 { + +template +class Node; + +template +class PhEntry; + +template < + dimension_t DIM, + typename T, + typename KEY, + PhPostprocessor POST, + typename FILTER> +class PhIteratorFull : public PhIteratorBase { + public: + PhIteratorFull(const PhEntry& root, FILTER filter) + : PhIteratorBase(filter), stack_{}, stack_size_{0} { + PrepareAndPush(root.GetNode()); + FindNextElement(); + } + + PhIteratorFull& operator++() { + FindNextElement(); + return *this; + } + + PhIteratorFull operator++(int) { + PhIteratorFull iterator(*this); + ++(*this); + return iterator; + } + + private: + void FindNextElement() { + while (!IsEmpty()) { + EntryIteratorC* p = &Peek(); + while (*p != PeekEnd()) { + auto& candidate = (*p)->second; + ++(*p); + if (this->ApplyFilter(candidate)) { + if (candidate.IsNode()) { + p = &PrepareAndPush(candidate.GetNode()); + } else { + this->SetCurrentResult(&candidate); + return; + } + } + } + // return to parent node + Pop(); + } + // finished + this->SetFinished(); + } + + EntryIteratorC& PrepareAndPush(const Node& node) { + assert(stack_size_ < stack_.size() - 1); + // No '&' because this is a temp value + stack_[stack_size_].first = node.Entries().cbegin(); + stack_[stack_size_].second = node.Entries().end(); + ++stack_size_; + return stack_[stack_size_ - 1].first; + } + + EntryIteratorC& Peek() { + assert(stack_size_ > 0); + return stack_[stack_size_ - 1].first; + } + + EntryIteratorC& PeekEnd() { + assert(stack_size_ > 0); + return stack_[stack_size_ - 1].second; + } + + EntryIteratorC& Pop() { + assert(stack_size_ > 0); + return stack_[--stack_size_].first; + } + + bool IsEmpty() { + return stack_size_ == 0; + } + + std::array, EntryIteratorC>, MAX_BIT_WIDTH> stack_; + size_t stack_size_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_ITERATOR_FULL_H diff --git a/phtree/v16/ph_iterator_hc.h b/phtree/v16/ph_iterator_hc.h new file mode 100644 index 00000000..df8ab351 --- /dev/null +++ b/phtree/v16/ph_iterator_hc.h @@ -0,0 +1,272 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_ITERATOR_HC_H +#define PHTREE_V16_PH_ITERATOR_HC_H + +#include "../common/ph_common.h" +#include "ph_iterator_simple.h" + +namespace improbable::phtree::v16 { + +template +class Node; + +template +class PhEntry; + +namespace { +template +class NodeIterator; +} // namespace + +/* + * The HC (hyper cube) iterator uses `hypercube navigation`, ie. filtering of quadrants by their + * binary hypercube address. In effect it compares the node's volume (box) with the query volume + * (box) to calculate two bit masks, mask_lower_ and mask_upper_. These can be used as the number of + * the lowest and highest quadrant that overlaps with the query box. They can also be used to tell + * for any quadrant whether it overlaps with the query, simply by comparing the quadrant's ID with + * the two masks, see IsPosValid(). + * + * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, + * 2017. + */ +template < + dimension_t DIM, + typename T, + typename KEY, + PhPostprocessor POST, + typename FILTER> +class PhIteratorHC : public PhIteratorBase { + public: + PhIteratorHC( + const PhEntry& root, + const PhPoint& range_min, + const PhPoint& range_max, + FILTER filter) + : PhIteratorBase(filter) + , stack_size_{0} + , range_min_{range_min} + , range_max_{range_max} { + PrepareAndPush(root); + FindNextElement(); + } + + PhIteratorHC& operator++() { + FindNextElement(); + return *this; + } + + PhIteratorHC operator++(int) { + PhIteratorHC iterator(*this); + ++(*this); + return iterator; + } + + private: + void FindNextElement() { + assert(!this->Finished()); + while (!IsEmpty()) { + auto* p = &Peek(); + const PhEntry* current_result = nullptr; + while ((current_result = p->Increment(range_min_, range_max_))) { + if (this->ApplyFilter(*current_result)) { + if (current_result->IsNode()) { + p = &PrepareAndPush(*current_result); + } else { + this->SetCurrentResult(current_result); + return; + } + } + } + // no matching (more) elements found + Pop(); + } + // finished + this->SetFinished(); + } + + auto& PrepareAndPush(const PhEntry& entry) { + assert(stack_size_ < stack_.size() - 1); + NodeIterator& ni = stack_[stack_size_++]; + ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); + return ni; + } + + auto& Peek() { + assert(stack_size_ > 0); + return stack_[stack_size_ - 1]; + } + + auto& Pop() { + assert(stack_size_ > 0); + return stack_[--stack_size_]; + } + + bool IsEmpty() { + return stack_size_ == 0; + } + + std::array, MAX_BIT_WIDTH> stack_; + size_t stack_size_; + const PhPoint range_min_; + const PhPoint range_max_; +}; + +namespace { +template +class NodeIterator { + public: + NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} + + void init( + const PhPoint& range_min, + const PhPoint& range_max, + const Node& node, + const PhPoint& prefix) { + node_ = &node; + CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); + iter_ = node.Entries().lower_bound(mask_lower_); + } + + /* + * Advances the cursor. + * @return TRUE iff a matching element was found. + */ + const PhEntry* Increment(const PhPoint& range_min, const PhPoint& range_max) { + while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { + if (IsPosValid(iter_->first)) { + const auto* be = &iter_->second; + if (CheckEntry(*be, range_min, range_max)) { + ++iter_; + return be; + } + } + ++iter_; + } + return nullptr; + } + + bool CheckEntry( + const PhEntry& candidate, + const PhPoint& range_min, + const PhPoint& range_max) const { + if (candidate.IsValue()) { + return IsInRange(candidate.GetKey(), range_min, range_max); + } + + auto& node = candidate.GetNode(); + // Check if node-prefix allows sub-node to contain any useful values. + // An infix with len=0 implies that at least part of the child node overlaps with the query. + if (node.GetInfixLen() == 0) { + return true; + } + + // Mask for comparing the prefix with the query boundaries. + assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); + scalar_t comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + auto& key = candidate.GetKey(); + for (dimension_t dim = 0; dim < DIM; ++dim) { + scalar_t in = key[dim] & comparison_mask; + if (in > range_max[dim] || in < (range_min[dim] & comparison_mask)) { + return false; + } + } + return true; + } + + private: + [[nodiscard]] bool IsPosValid(hc_pos_t key) const { + return ((key | mask_lower_) & mask_upper_) == key; + } + + void CalcLimits( + bit_width_t postfix_len, + const PhPoint& range_min, + const PhPoint& range_max, + const PhPoint& prefix) { + // create limits for the local node. there is a lower and an upper limit. Each limit + // consists of a series of DIM bit, one for each dimension. + // For the lower limit, a '1' indicates that the 'lower' half of this dimension does + // not need to be queried. + // For the upper limit, a '0' indicates that the 'higher' half does not need to be + // queried. + // + // || lower_limit=0 || lower_limit=1 || upper_limit = 0 || upper_limit = 1 + // =============||====================================================================== + // query lower || YES NO + // ============ || ===================================================================== + // query higher || NO YES + // + assert(postfix_len < MAX_BIT_WIDTH); + bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; + bit_mask_t maskVT = MAX_MASK << postfix_len; + hc_pos_t lower_limit = 0; + hc_pos_t upper_limit = 0; + constexpr hc_pos_t ONE = 1; + // to prevent problems with signed long when using 64 bit + if (postfix_len < 63) { + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + upper_limit <<= 1; + scalar_t nodeBisection = (prefix[i] | maskHcBit) & maskVT; + if (range_min[i] >= nodeBisection) { + //==> set to 1 if lower value should not be queried + lower_limit |= ONE; + } + if (range_max[i] >= nodeBisection) { + // Leave 0 if higher value should not be queried. + upper_limit |= ONE; + } + } + } else { + // special treatment for signed longs + // The problem (difference) here is that a '1' at the leading bit does indicate a + // LOWER value, opposed to indicating a HIGHER value as in the remaining 63 bits. + // The hypercube assumes that a leading '0' indicates a lower value. + // Solution: We leave HC as it is. + + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + upper_limit <<= 1; + if (range_min[i] < 0) { + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= ONE; + } + if (range_max[i] < 0) { + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= ONE; + } + } + } + mask_lower_ = lower_limit; + mask_upper_ = upper_limit; + } + + private: + EntryIteratorC iter_; + const Node* node_; + hc_pos_t mask_lower_; + hc_pos_t mask_upper_; +}; +} // namespace +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_ITERATOR_HC_H diff --git a/phtree/v16/ph_iterator_knn_hs.h b/phtree/v16/ph_iterator_knn_hs.h new file mode 100644 index 00000000..79f9a3e7 --- /dev/null +++ b/phtree/v16/ph_iterator_knn_hs.h @@ -0,0 +1,166 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_QUERY_KNN_HS_H +#define PHTREE_V16_PH_QUERY_KNN_HS_H + +#include "../common/ph_common.h" +#include "ph_iterator_base.h" +#include +#include + +namespace improbable::phtree::v16 { + +/* + * kNN query implementation that uses preprocessors and distance functions. + * + * Implementation after Hjaltason and Samet (with some deviations: no MinDist or MaxDist used). + * G. R. Hjaltason and H. Samet., "Distance browsing in spatial databases.", ACM TODS + * 24(2):265--318. 1999 + */ + +namespace { +template +using PhEntryDist = std::pair*>; + +template +struct ComparePhEntryDistByDistance { + bool operator()(const PhEntryDist& left, const PhEntryDist& right) const { + return left.first > right.first; + }; +}; +} // namespace + +template < + dimension_t DIM, + typename T, + typename KEY, + PhPostprocessor POST, + typename DISTANCE, + typename FILTER> +class PhIteratorKnnHS : public PhIteratorBase { + public: + explicit PhIteratorKnnHS( + const PhEntry& root, + size_t min_results, + const PhPoint& center, + DISTANCE dist, + FILTER filter) + : PhIteratorBase(filter) + , center_{center} + , center_post_{POST(center)} + , current_distance_{DBL_MAX} + , num_found_results_(0) + , num_requested_results_(min_results) + , distance_(std::move(dist)) { + if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { + this->SetFinished(); + return; + } + + // Initialize queue, use d=0 because every imaginable point lies inside the root Node + queue_.emplace(0, &root); + FindNextElement(); + } + + [[nodiscard]] double distance() const { + return current_distance_; + } + + PhIteratorKnnHS& operator++() { + FindNextElement(); + return *this; + } + + PhIteratorKnnHS operator++(int) { + PhIteratorKnnHS iterator(*this); + ++(*this); + return iterator; + } + + private: + void FindNextElement() { + while (num_found_results_ < num_requested_results_ && !queue_.empty()) { + auto& candidate = queue_.top(); + auto o = candidate.second; + if (!o->IsNode()) { + // data entry + ++num_found_results_; + this->SetCurrentResult(o); + current_distance_ = candidate.first; + // We need to pop() AFTER we processed the value, otherwise the reference is + // overwritten. + queue_.pop(); + return; + } else { + // inner node + auto& node = o->GetNode(); + queue_.pop(); + for (auto& entry : node.Entries()) { + auto& e2 = entry.second; + if (this->ApplyFilter(e2)) { + if (e2.IsNode()) { + auto& sub = e2.GetNode(); + double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); + queue_.emplace(d, &e2); + } else { + double d = distance_(center_post_, POST(e2.GetKey())); + queue_.emplace(d, &e2); + } + } + } + } + } + this->SetFinished(); + current_distance_ = DBL_MAX; + } + + double DistanceToNode(const PhPoint& prefix, int bits_to_ignore) { + assert(bits_to_ignore < MAX_BIT_WIDTH); + scalar_t mask_min = MAX_MASK << bits_to_ignore; + scalar_t mask_max = ~mask_min; + PhPoint buf; + // The following calculates the point inside of the node that is closest to center_. + // If center is inside the node this returns center_, otherwise it finds a point on the + // node's surface. + for (dimension_t i = 0; i < DIM; ++i) { + // if center_[i] is outside the node, return distance to closest edge, + // otherwise return center_[i] itself (assume possible distance=0) + scalar_t min = prefix[i] & mask_min; + scalar_t max = prefix[i] | mask_max; + buf[i] = min > center_[i] ? min : (max < center_[i] ? max : center_[i]); + } + return distance_(center_post_, POST(buf)); + } + + private: + const PhPoint center_; + // center after post processing == the external representation + const KEY center_post_; + double current_distance_; + std::priority_queue< + PhEntryDist, + std::vector>, + ComparePhEntryDistByDistance> + queue_; + int num_found_results_; + int num_requested_results_; + DISTANCE distance_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_QUERY_KNN_HS_H diff --git a/phtree/v16/ph_iterator_simple.h b/phtree/v16/ph_iterator_simple.h new file mode 100644 index 00000000..790b0a24 --- /dev/null +++ b/phtree/v16/ph_iterator_simple.h @@ -0,0 +1,64 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PH_ITERATOR_SIMPLE_H +#define PHTREE_V16_PH_ITERATOR_SIMPLE_H + +#include "../common/ph_common.h" +#include "ph_iterator_base.h" + +namespace improbable::phtree::v16 { + +template POST> +class PhTreeV16; + +template +class PhEntry; + +template POST> +class PhIteratorSimple : public PhIteratorBase { + public: + PhIteratorSimple() : PhIteratorBase() { + this->SetFinished(); + } + + explicit PhIteratorSimple(const PhEntry* e) : PhIteratorBase() { + this->SetCurrentResult(e); + } + + PhIteratorSimple& operator++() { + FindNextElement(); + return *this; + } + + PhIteratorSimple operator++(int) { + PhIteratorSimple iterator(*this); + ++(*this); + return iterator; + } + + protected: + void FindNextElement() { + this->SetFinished(); + } +}; + +template POST> +using PhIteratorEnd = PhIteratorSimple; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PH_ITERATOR_SIMPLE_H diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h new file mode 100644 index 00000000..d8e78517 --- /dev/null +++ b/phtree/v16/phtree_v16.h @@ -0,0 +1,261 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PHTREEV16_H +#define PHTREE_V16_PHTREEV16_H + +#include "debug_helper.h" +#include "node.h" +#include "ph_iterator_full.h" +#include "ph_iterator_hc.h" +#include "ph_iterator_knn_hs.h" +#include "ph_iterator_simple.h" + +namespace improbable::phtree::v16 { + +/* + * The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each + * dimension is (by default) indexed by a 64 bit integer. The index ordered follows z-order / Morton + * order. The index is effectively a 'map', i.e. each key is associated with at most one value. + * + * Keys are points in n-dimensional space. + * + * This API behaves similar to std::map, see function descriptions for details. + * + * Loosely based on PH-Tree Java, V16, see http://www.phtree.org + * + * See also : + * - T. Zaeschke, C. Zimmerli, M.C. Norrie: + * "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) + * - T. Zaeschke: "The PH-Tree Revisited", (2015) + * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). + * + * @tparam T Value type. + * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. + */ +template < + dimension_t DIM, + typename T, + typename KEY = PhPoint, + PhPostprocessor POST = PrePostNoOp> +class PhTreeV16 { + friend PhTreeDebugHelper; + + public: + static_assert(!std::is_reference::value, "Reference type value are not supported."); + + PhTreeV16() : num_entries_{0}, root_{0, MAX_BIT_WIDTH - 1} {} + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param __args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair emplace(const PhPoint& key, _Args&&... __args) { + auto* current_entry = &root_; + bool is_inserted = false; + while (current_entry->IsNode()) { + current_entry = + current_entry->GetNode().Emplace(is_inserted, key, std::forward<_Args>(__args)...); + } + num_entries_ += is_inserted; + return {current_entry->GetValue(), is_inserted}; + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const PhPoint& key, const T& value) { + return emplace(key, value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const PhPoint& key) { + return emplace(key).first; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const PhPoint& key) const { + if (empty()) { + return 0; + } + auto* current_entry = &root_; + while (current_entry && current_entry->IsNode()) { + current_entry = current_entry->GetNode().Find(key); + } + return current_entry ? 1 : 0; + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + PhIteratorSimple find(const PhPoint& key) const { + if (empty()) { + return {}; + } + + auto* current_entry = &root_; + while (current_entry && current_entry->IsNode()) { + current_entry = current_entry->GetNode().Find(key); + } + + if (current_entry) { + return PhIteratorSimple(current_entry); + } + return {}; + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const PhPoint& key) { + auto* current_node = &root_.GetNode(); + Node* parent_node = nullptr; + bool found = false; + while (current_node) { + auto* child_node = current_node->Erase(key, parent_node, found); + parent_node = current_node; + current_node = child_node; + } + num_entries_ -= found; + return found; + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'PhFilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template > + auto begin(FILTER filter = FILTER()) const { + return PhIteratorFull(root_, filter); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param min Minimum values + * @param max Maximum values + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'PhFilterNoOp`. + * @return Result iterator. + */ + template > + auto begin_query( + const PhPoint& min, const PhPoint& max, FILTER filter = FILTER()) const { + return PhIteratorHC(root_, min, max, filter); + } + + /* + * Locate nearest neighbors for a given point in space. + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template < + typename DISTANCE = PhDistanceLongEuclidean, + typename FILTER = PhFilterNoOp> + auto begin_knn_query( + size_t min_results, + const PhPoint& center, + DISTANCE distance_function = DISTANCE(), + FILTER filter = FILTER()) const { + return PhIteratorKnnHS( + root_, min_results, center, distance_function, filter); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + const auto& end() const { + return the_end_; + } + + /* + * Remove all entries from the tree. + */ + void clear() { + num_entries_ = 0; + root_ = PhEntry(0, MAX_BIT_WIDTH - 1); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return num_entries_; + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return num_entries_ == 0; + } + + private: + /* + * This function is only for debugging. + */ + auto GetDebugHelper() const { + return DebugHelperV16(root_.GetNode(), num_entries_); + } + + private: + size_t num_entries_; + // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node + // that is allowed to have less than two entries. + PhEntry root_; + PhIteratorEnd the_end_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PHTREEV16_H diff --git a/third_party/BUILD b/third_party/BUILD new file mode 100644 index 00000000..6595690d --- /dev/null +++ b/third_party/BUILD @@ -0,0 +1,3 @@ +# NOTE: This file is intentionally left blank. +# We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without +# having all targets defined in third-party BUILD files in this directory buildable. diff --git a/third_party/WORKSPACE b/third_party/WORKSPACE new file mode 100644 index 00000000..6595690d --- /dev/null +++ b/third_party/WORKSPACE @@ -0,0 +1,3 @@ +# NOTE: This file is intentionally left blank. +# We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without +# having all targets defined in third-party BUILD files in this directory buildable. diff --git a/third_party/gtest/BUILD b/third_party/gtest/BUILD new file mode 100644 index 00000000..9d3d02c1 --- /dev/null +++ b/third_party/gtest/BUILD @@ -0,0 +1,77 @@ +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: misterg@google.com (Gennadiy Civil) +# +# Bazel Build for Google C++ Testing Framework(Google Test) + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +# This is a fork of gtests BUILD file, that additionally disables warnings (-w), due to us building with maximum warnings. + +# Google Test including Google Mock +cc_library( + name = "gtest", + srcs = glob( + include = [ + "googletest/src/*.cc", + "googletest/src/*.h", + "googletest/include/gtest/**/*.h", + "googlemock/src/*.cc", + "googlemock/include/gmock/**/*.h", + ], + exclude = [ + "googletest/src/gtest-all.cc", + "googletest/src/gtest_main.cc", + "googlemock/src/gmock-all.cc", + "googlemock/src/gmock_main.cc", + ], + ), + hdrs = glob([ + "googletest/include/gtest/*.h", + "googlemock/include/gmock/*.h", + ]), + copts = select({ + "@//:windows": [], + "//conditions:default": ["-w"], + }), + includes = [ + "googlemock", + "googlemock/include", + "googletest", + "googletest/include", + ], + linkopts = select({ + "@//:windows": [], + "//conditions:default": ["-pthread"], + }), +) diff --git a/third_party/spdlog/BUILD b/third_party/spdlog/BUILD new file mode 100644 index 00000000..68763012 --- /dev/null +++ b/third_party/spdlog/BUILD @@ -0,0 +1,11 @@ +licenses(["notice"]) + +cc_library( + name = "spdlog", + hdrs = glob([ + "include/**/*.cc", + "include/**/*.h", + ]), + includes = ["include"], + visibility = ["//visibility:public"], +) diff --git a/tools/BUILD b/tools/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/tools/bazel b/tools/bazel new file mode 100755 index 00000000..43883f99 --- /dev/null +++ b/tools/bazel @@ -0,0 +1,122 @@ +#!/usr/bin/env bash + +TOOLS_DIR="$(dirname "$0")" + +source "${TOOLS_DIR}"/../ci/includes/os.sh +source "${TOOLS_DIR}"/../ci/includes/bazel.sh + +# All information required for the script to select or, if necessary, install bazel is contained +# in this code block. +# If a higher version of bazel is required, update `REQUIRED_BAZEL_VERSION` and the +# `REQUIRED_BAZEL_SHA256` values for each platform. +REQUIRED_BAZEL_VERSION="$(getBazelVersion)" +BAZEL_INSTALLATION_DIR="${HOME}/.bazel_installations/${REQUIRED_BAZEL_VERSION}" +if isLinux; then + REQUIRED_BAZEL_SHA256="4df79462c6c3ecdeeee7af99fc269b52ab1aa4828ef3bc359c1837d3fafeeee7" + REQUIRED_BAZEL_SHA256CMD="sha256sum" + DOWNLOAD_CMD="wget -q --no-clobber -O bazel" + BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-linux-x86_64" + + if [[ "$(lsb_release -cs)" != "bionic" ]]; then + echo -e "\033[0;33mWarning: You don't seem to be running Ubuntu 18.04, which is the supported Linux distribution. Continuing anyway, but your mileage might vary.\033[0m" + fi + + if which clang-9 1>/dev/null; then + # We follow the symlink of clang-9 here to avoid a bug with the LLVM package when combined with -no-canonical-prefixes. + export CC="$(readlink -f "$(which clang-9)")" + else + echo -e "\033[0;33mWarning: You don't seem to have clang-9 correctly installed. Please check README.md to ensure your compiler is set up correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" + fi +elif isMacOS; then + REQUIRED_BAZEL_SHA256="3eca4c96cfda97a9d5f8d3d0dec4155a5cc5ff339b10d3f35213c398bf13881e" + REQUIRED_BAZEL_SHA256CMD="shasum -a 256" + DOWNLOAD_CMD="wget -q --no-clobber -O bazel" + BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-darwin-x86_64" +else + REQUIRED_BAZEL_SHA256="cc7b3ff6f4bfd6bc2121a80656afec66ee57713e8b88e9d2fb58b4eddf271268" + REQUIRED_BAZEL_SHA256CMD="sha256sum" + DOWNLOAD_CMD="curl -L -s -o bazel.exe" + # Windows does not have an installer but retrieves the executable directly. + BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-windows-x86_64.exe" + + export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC" + if [[ ! -d "$BAZEL_VC" ]]; then + export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" + fi + if [[ ! -d "$BAZEL_VC" ]]; then + echo -e "\033[0;33mWarning: You don't seem to have Visual Studio 2019 installed correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" + fi +fi + +BAZEL_TARGET_PATH="${BAZEL_INSTALLATION_DIR}/bin/bazel" + +# Check if correct version is already installed. +if [[ -f "${BAZEL_TARGET_PATH}" ]]; then + if [[ ! -x "${BAZEL_TARGET_PATH}" ]]; then + echo "ERROR: Bazel executable at '${BAZEL_TARGET_PATH}' does not have execute permission" + stat "${BAZEL_TARGET_PATH}" + exit 1 + fi + BAZEL_SUBCOMMAND="$1" + shift + exec -a "$0" "${BAZEL_TARGET_PATH}" "$BAZEL_SUBCOMMAND" "$@" +fi + +cat << EOM +================================================= +Bazel version ${REQUIRED_BAZEL_VERSION} is not +installed under ~/.bazel_installations + +Installing bazel ${REQUIRED_BAZEL_VERSION} now... +================================================= +EOM + +# Create root directory if needed. +if [[ ! -d "${BAZEL_INSTALLATION_DIR}" ]]; then + echo "Installation directory created." + mkdir -p "${BAZEL_INSTALLATION_DIR}" +fi + +function _are_checksums_equal() { + CHECKSUM_A=$1 + CHECKSUM_B=$2 + # Split apart the checksum from the file name by splitting the returned string by the contained + # space; the checksum has the format: + # xxxxxx...xxxxx + CHECKSUM_A_PARTS=(${CHECKSUM_A}) + CHECKSUM_B_PARTS=(${CHECKSUM_B}) + + if [[ "${CHECKSUM_A_PARTS[0]}" == "${CHECKSUM_B_PARTS[0]}" ]]; then + return 0 # True + else + return 1 # False + fi +} + +# Install correct bazel version. +# If we don't have a local Bazel install at this point we need to retrieve the right version from GitHub. +mkdir -p "${BAZEL_INSTALLATION_DIR}/bin/tmp" +pushd "${BAZEL_INSTALLATION_DIR}/bin/tmp" +rm bazel 2>/dev/null || true # Remove bazel binary if already present in tmp dir - indicates previous failed download. +echo "Starting download of bazel ${REQUIRED_BAZEL_VERSION}..." +${DOWNLOAD_CMD} "https://github.com/bazelbuild/bazel/releases/download/${REQUIRED_BAZEL_VERSION}/${BAZEL_EXE}" +echo "Download finished." +echo "Testing download file integrity..." +CALCULATED_CHECKSUM="$(${REQUIRED_BAZEL_SHA256CMD} bazel)" +if ! _are_checksums_equal "${CALCULATED_CHECKSUM}" "${REQUIRED_BAZEL_SHA256}"; then + cat <@my_ssl//jar as a dependency to depend on this jar. + + You may also reference files on the current system (localhost) by using "file:///path/to/file" + if you are on Unix-based systems. If you're on Windows, use "file:///c:/path/to/file". In both + examples, note the three slashes (`/`) -- the first two slashes belong to `file://` and the third + one belongs to the absolute path to the file. +""", +) diff --git a/tools/runners/BUILD b/tools/runners/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/tools/runners/sanitizers/asan/BUILD b/tools/runners/sanitizers/asan/BUILD new file mode 100644 index 00000000..ea80a87f --- /dev/null +++ b/tools/runners/sanitizers/asan/BUILD @@ -0,0 +1,10 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "asan", + srcs = ["asan.sh"], + data = [ + "asan-suppressions.txt", + "lsan-suppressions.txt", + ], +) diff --git a/tools/runners/sanitizers/asan/asan-suppressions.txt b/tools/runners/sanitizers/asan/asan-suppressions.txt new file mode 100644 index 00000000..e69de29b diff --git a/tools/runners/sanitizers/asan/asan.sh b/tools/runners/sanitizers/asan/asan.sh new file mode 100755 index 00000000..7324f364 --- /dev/null +++ b/tools/runners/sanitizers/asan/asan.sh @@ -0,0 +1,3 @@ +ASAN_OPTIONS=suppressions="tools/runners/sanitizers/asan/asan-suppressions.txt ${ASAN_OPTIONS}" \ +LSAN_OPTIONS=suppressions="tools/runners/sanitizers/asan/lsan-suppressions.txt ${LSAN_OPTIONS}" \ +"${@}" diff --git a/tools/runners/sanitizers/asan/lsan-suppressions.txt b/tools/runners/sanitizers/asan/lsan-suppressions.txt new file mode 100644 index 00000000..f884bcdc --- /dev/null +++ b/tools/runners/sanitizers/asan/lsan-suppressions.txt @@ -0,0 +1 @@ +leak:ossl_init_load_crypto_nodelete \ No newline at end of file diff --git a/tools/runners/sanitizers/tsan/BUILD b/tools/runners/sanitizers/tsan/BUILD new file mode 100644 index 00000000..64a6c051 --- /dev/null +++ b/tools/runners/sanitizers/tsan/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "tsan", + srcs = ["tsan.sh"], + data = [ + "tsan-suppressions.txt", + ], +) diff --git a/tools/runners/sanitizers/tsan/tsan-suppressions.txt b/tools/runners/sanitizers/tsan/tsan-suppressions.txt new file mode 100644 index 00000000..e69de29b diff --git a/tools/runners/sanitizers/tsan/tsan.sh b/tools/runners/sanitizers/tsan/tsan.sh new file mode 100755 index 00000000..09b78725 --- /dev/null +++ b/tools/runners/sanitizers/tsan/tsan.sh @@ -0,0 +1 @@ +TSAN_OPTIONS=suppressions="tools/runners/sanitizers/tsan/tsan-suppressions.txt ${TSAN_OPTIONS}" "${@}" diff --git a/tools/runners/sanitizers/ubsan/BUILD b/tools/runners/sanitizers/ubsan/BUILD new file mode 100644 index 00000000..31c72679 --- /dev/null +++ b/tools/runners/sanitizers/ubsan/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "ubsan", + srcs = ["ubsan.sh"], + data = [ + "ubsan-suppressions.txt", + ], +) diff --git a/tools/runners/sanitizers/ubsan/ubsan-suppressions.txt b/tools/runners/sanitizers/ubsan/ubsan-suppressions.txt new file mode 100644 index 00000000..e69de29b diff --git a/tools/runners/sanitizers/ubsan/ubsan.sh b/tools/runners/sanitizers/ubsan/ubsan.sh new file mode 100755 index 00000000..907672d2 --- /dev/null +++ b/tools/runners/sanitizers/ubsan/ubsan.sh @@ -0,0 +1 @@ +UBSAN_OPTIONS=suppressions="tools/runners/sanitizers/ubsan/ubsan-suppressions.txt print_stacktrace=1 ${UBSAN_OPTIONS}" "${@}" diff --git a/tools/runners/sanitizers/valgrind-memcheck/BUILD b/tools/runners/sanitizers/valgrind-memcheck/BUILD new file mode 100644 index 00000000..7bf4fb1a --- /dev/null +++ b/tools/runners/sanitizers/valgrind-memcheck/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "valgrind-memcheck", + srcs = ["valgrind-memcheck.sh"], + data = [ + "valgrind-suppressions.txt", + ], +) diff --git a/tools/runners/sanitizers/valgrind-memcheck/valgrind-memcheck.sh b/tools/runners/sanitizers/valgrind-memcheck/valgrind-memcheck.sh new file mode 100755 index 00000000..c76b1662 --- /dev/null +++ b/tools/runners/sanitizers/valgrind-memcheck/valgrind-memcheck.sh @@ -0,0 +1 @@ +valgrind --leak-check=full --error-exitcode=1 --suppressions=tools/runners/sanitizers/valgrind-memcheck/valgrind-suppressions.txt "${@}" diff --git a/tools/runners/sanitizers/valgrind-memcheck/valgrind-suppressions.txt b/tools/runners/sanitizers/valgrind-memcheck/valgrind-suppressions.txt new file mode 100644 index 00000000..59fbc13c --- /dev/null +++ b/tools/runners/sanitizers/valgrind-memcheck/valgrind-suppressions.txt @@ -0,0 +1,17 @@ +{ + openssl_leak + Memcheck:Leak + match-leak-kinds: definite + fun:realloc + fun:CRYPTO_realloc + fun:ERR_add_error_vdata + fun:ERR_add_error_data + fun:dlfcn_load + fun:DSO_load + fun:DSO_dsobyaddr + fun:ossl_init_load_crypto_nodelete + fun:ossl_init_load_crypto_nodelete_ossl_ + fun:__pthread_once_slow + fun:CRYPTO_THREAD_run_once + fun:OPENSSL_init_crypto +}