From a570a0e87f95027e8544c72295236df5d84c3dcc Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 19 Aug 2023 21:10:58 +0200 Subject: [PATCH] Adds support for c++ modules Compile ankerl.unordered_dense.cpp to get the module. See test/modules/test.sh --- .gitignore | 5 ++ CMakeLists.txt | 2 +- README.md | 88 ++++++++++++++--------- include/ankerl/unordered_dense.h | 117 +++++++++++++++++-------------- meson.build | 2 +- src/ankerl.unordered_dense.cpp | 39 +++++++++++ test/modules/module_test.cpp | 20 ++++++ test/modules/test.sh | 12 ++++ test/unit/namespace.cpp | 2 +- 9 files changed, 199 insertions(+), 88 deletions(-) create mode 100644 src/ankerl.unordered_dense.cpp create mode 100644 test/modules/module_test.cpp create mode 100755 test/modules/test.sh diff --git a/.gitignore b/.gitignore index 5440f1b2..60127523 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,8 @@ compile_commands.json # ignore all in subprojects except the .wrap files /subprojects/* !/subprojects/*.wrap + +# c++ modules +*.pcm +a.out +*.o \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f6bebce4..d5ae6088 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.12) project("unordered_dense" - VERSION 4.0.4 + VERSION 4.1.0 DESCRIPTION "A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion" HOMEPAGE_URL "https://github.com/martinus/unordered_dense") diff --git a/README.md b/README.md index b08ed602..f271bd47 100644 --- a/README.md +++ b/README.md @@ -17,22 +17,23 @@ Additionally, there are `ankerl::unordered_dense::segmented_map` and `ankerl::un - [1. Overview](#1-overview) - [2. Installation](#2-installation) - [2.1. Installing using cmake](#21-installing-using-cmake) -- [3. Extensions](#3-extensions) - - [3.1. Hash](#31-hash) - - [3.1.1. Simple Hash](#311-simple-hash) - - [3.1.2. High Quality Hash](#312-high-quality-hash) - - [3.1.3. Specialize `ankerl::unordered_dense::hash`](#313-specialize-ankerlunordered_densehash) - - [3.1.4. Heterogeneous Overloads using `is_transparent`](#314-heterogeneous-overloads-using-is_transparent) - - [3.1.5. Automatic Fallback to `std::hash`](#315-automatic-fallback-to-stdhash) - - [3.1.6. Hash the Whole Memory](#316-hash-the-whole-memory) - - [3.2. Container API](#32-container-api) - - [3.2.1. `auto extract() && -> value_container_type`](#321-auto-extract----value_container_type) - - [3.2.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&`](#322-nodiscard-auto-values-const-noexcept---value_container_type-const) - - [3.2.3. `auto replace(value_container_type&& container)`](#323-auto-replacevalue_container_type-container) - - [3.3. Custom Container Types](#33-custom-container-types) - - [3.4. Custom Bucket Tyeps](#34-custom-bucket-tyeps) - - [3.4.1. `ankerl::unordered_dense::bucket_type::standard`](#341-ankerlunordered_densebucket_typestandard) - - [3.4.2. `ankerl::unordered_dense::bucket_type::big`](#342-ankerlunordered_densebucket_typebig) +- [3. Usage](#3-usage) + - [3.1. Modules](#31-modules) + - [3.2. Hash](#32-hash) + - [3.2.1. Simple Hash](#321-simple-hash) + - [3.2.2. High Quality Hash](#322-high-quality-hash) + - [3.2.3. Specialize `ankerl::unordered_dense::hash`](#323-specialize-ankerlunordered_densehash) + - [3.2.4. Heterogeneous Overloads using `is_transparent`](#324-heterogeneous-overloads-using-is_transparent) + - [3.2.5. Automatic Fallback to `std::hash`](#325-automatic-fallback-to-stdhash) + - [3.2.6. Hash the Whole Memory](#326-hash-the-whole-memory) + - [3.3. Container API](#33-container-api) + - [3.3.1. `auto extract() && -> value_container_type`](#331-auto-extract----value_container_type) + - [3.3.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&`](#332-nodiscard-auto-values-const-noexcept---value_container_type-const) + - [3.3.3. `auto replace(value_container_type&& container)`](#333-auto-replacevalue_container_type-container) + - [3.4. Custom Container Types](#34-custom-container-types) + - [3.5. Custom Bucket Tyeps](#35-custom-bucket-tyeps) + - [3.5.1. `ankerl::unordered_dense::bucket_type::standard`](#351-ankerlunordered_densebucket_typestandard) + - [3.5.2. `ankerl::unordered_dense::bucket_type::big`](#352-ankerlunordered_densebucket_typebig) - [4. `segmented_map` and `segmented_set`](#4-segmented_map-and-segmented_set) - [5. Design](#5-design) - [5.1. Inserts](#51-inserts) @@ -87,9 +88,32 @@ find_package(unordered_dense CONFIG REQUIRED) target_link_libraries(your_project_name unordered_dense::unordered_dense) ``` -## 3. Extensions +## 3. Usage -### 3.1. Hash +### 3.1. Modules + +`ankerl::unordered_dense` supports c++20 modules. Simply compile `src/ankerl.unordered_dense.cpp` and use the resulting module, e.g. like so: + +```sh +clang++ -std=c++20 -I include --precompile -x c++-module src/ankerl.unordered_dense.cpp +clang++ -std=c++20 -c ankerl.unordered_dense.pcm +``` + +To use the module with e.g. in `module_test.cpp`, use + +```cpp +import ankerl.unordered_dense; +``` + +and compile with e.g. + +```sh +clang++ -std=c++20 -fprebuilt-module-path=. ankerl.unordered_dense.o module_test.cpp -o main +``` + +A simple demo script can be found in `test/modules`. + +### 3.2. Hash `ankerl::unordered_dense::hash` is a fast and high quality hash, based on [wyhash](https://github.com/wangyi-fudan/wyhash). The `ankerl::unordered_dense` map/set differentiates between hashes of high quality (good [avalanching effect](https://en.wikipedia.org/wiki/Avalanche_effect)) and bad quality. Hashes with good quality contain a special marker: @@ -101,7 +125,7 @@ This is the cases for the specializations `bool`, `char`, `signed char`, `unsign Hashes that do not contain such a marker are assumed to be of bad quality and receive an additional mixing step inside the map/set implementation. -#### 3.1.1. Simple Hash +#### 3.2.1. Simple Hash Consider a simple custom key type: @@ -132,7 +156,7 @@ auto ids = ankerl::unordered_dense::set(); Since `custom_hash_simple` doesn't have a `using is_avalanching = void;` marker it is considered to be of bad quality and additional mixing of `x.value` is automatically provided inside the set. -#### 3.1.2. High Quality Hash +#### 3.2.2. High Quality Hash Back to the `id` example, we can easily implement a higher quality hash: @@ -149,7 +173,7 @@ struct custom_hash_avalanching { We know `wyhash::hash` is of high quality, so we can add `using is_avalanching = void;` which makes the map/set directly use the returned value. -#### 3.1.3. Specialize `ankerl::unordered_dense::hash` +#### 3.2.3. Specialize `ankerl::unordered_dense::hash` Instead of creating a new class you can also specialize `ankerl::unordered_dense::hash`: @@ -164,7 +188,7 @@ struct ankerl::unordered_dense::hash { }; ``` -#### 3.1.4. Heterogeneous Overloads using `is_transparent` +#### 3.2.4. Heterogeneous Overloads using `is_transparent` This map/set supports heterogeneous overloads as described in [P2363 Extending associative containers with the remaining heterogeneous overloads](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2363r3.html) which is [targeted for C++26](https://wg21.link/p2077r2). This has overloads for `find`, `count`, `contains`, `equal_range` (see [P0919R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0919r3.html)), `erase` (see [P2077R2](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2077r2.html)), and `try_emplace`, `insert_or_assign`, `operator[]`, `at`, and `insert` & `emplace` for sets (see [P2363R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2363r3.html)). @@ -192,12 +216,12 @@ auto map = ankerl::unordered_dense::map value_container_type` +#### 3.3.1. `auto extract() && -> value_container_type` Extracts the internally used container. `*this` is emptied. -#### 3.2.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&` +#### 3.3.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&` Exposes the underlying values container. -#### 3.2.3. `auto replace(value_container_type&& container)` +#### 3.3.3. `auto replace(value_container_type&& container)` Discards the internally held container and replaces it with the one passed. Non-unique elements are removed, and the container will be partly reordered when non-unique elements are found. -### 3.3. Custom Container Types +### 3.4. Custom Container Types `unordered_dense` accepts a custom allocator, but you can also specify a custom container for that template argument. That way it is possible to replace the internally used `std::vector` with e.g. `std::deque` or any other container like `boost::interprocess::vector`. This supports fancy pointers (e.g. [offset_ptr](https://www.boost.org/doc/libs/1_80_0/doc/html/interprocess/offset_ptr.html)), so the container can be used with e.g. shared memory provided by `boost::interprocess`. -### 3.4. Custom Bucket Tyeps +### 3.5. Custom Bucket Tyeps The map/set supports two different bucket types. The default should be good for pretty much everyone. -#### 3.4.1. `ankerl::unordered_dense::bucket_type::standard` +#### 3.5.1. `ankerl::unordered_dense::bucket_type::standard` * Up to 2^32 = 4.29 billion elements. * 8 bytes overhead per bucket. -#### 3.4.2. `ankerl::unordered_dense::bucket_type::big` +#### 3.5.2. `ankerl::unordered_dense::bucket_type::big` * up to 2^63 = 9223372036854775808 elements. * 12 bytes overhead per bucket. diff --git a/include/ankerl/unordered_dense.h b/include/ankerl/unordered_dense.h index 1ff50f78..e2d38514 100644 --- a/include/ankerl/unordered_dense.h +++ b/include/ankerl/unordered_dense.h @@ -1,7 +1,7 @@ ///////////////////////// ankerl::unordered_dense::{map, set} ///////////////////////// // A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion. -// Version 4.0.4 +// Version 4.1.0 // https://github.com/martinus/unordered_dense // // Licensed under the MIT License . @@ -31,8 +31,8 @@ // see https://semver.org/spec/v2.0.0.html #define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 4 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes -#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality -#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 4 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes +#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality +#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes // API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/ @@ -70,6 +70,11 @@ # define ANKERL_UNORDERED_DENSE_NOINLINE __attribute__((noinline)) #endif +// defined in unordered_dense.cpp +#if !defined(ANKERL_UNORDERED_DENSE_EXPORT) +# define ANKERL_UNORDERED_DENSE_EXPORT +#endif + #if ANKERL_UNORDERED_DENSE_CPP_VERSION < 201703L # error ankerl::unordered_dense requires C++17 or higher #else @@ -157,7 +162,7 @@ namespace detail { // hardcodes seed and the secret, reformattes the code, and clang-tidy fixes. namespace detail::wyhash { -static inline void mum(uint64_t* a, uint64_t* b) { +inline void mum(uint64_t* a, uint64_t* b) { # if defined(__SIZEOF_INT128__) __uint128_t r = *a; r *= *b; @@ -187,30 +192,30 @@ static inline void mum(uint64_t* a, uint64_t* b) { } // multiply and xor mix function, aka MUM -[[nodiscard]] static inline auto mix(uint64_t a, uint64_t b) -> uint64_t { +[[nodiscard]] inline auto mix(uint64_t a, uint64_t b) -> uint64_t { mum(&a, &b); return a ^ b; } // read functions. WARNING: we don't care about endianness, so results are different on big endian! -[[nodiscard]] static inline auto r8(const uint8_t* p) -> uint64_t { +[[nodiscard]] inline auto r8(const uint8_t* p) -> uint64_t { uint64_t v{}; std::memcpy(&v, p, 8U); return v; } -[[nodiscard]] static inline auto r4(const uint8_t* p) -> uint64_t { +[[nodiscard]] inline auto r4(const uint8_t* p) -> uint64_t { uint32_t v{}; std::memcpy(&v, p, 4); return v; } // reads 1, 2, or 3 bytes -[[nodiscard]] static inline auto r3(const uint8_t* p, size_t k) -> uint64_t { +[[nodiscard]] inline auto r3(const uint8_t* p, size_t k) -> uint64_t { return (static_cast(p[0]) << 16U) | (static_cast(p[k >> 1U]) << 8U) | p[k - 1]; } -[[maybe_unused]] [[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t { +[[maybe_unused]] [[nodiscard]] inline auto hash(void const* key, size_t len) -> uint64_t { static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f), UINT64_C(0xe7037ed1a0b428db), UINT64_C(0x8ebc6af09c88c6e3), @@ -257,13 +262,13 @@ static inline void mum(uint64_t* a, uint64_t* b) { return mix(secret[1] ^ len, mix(a ^ secret[1], b ^ seed)); } -[[nodiscard]] static inline auto hash(uint64_t x) -> uint64_t { +[[nodiscard]] inline auto hash(uint64_t x) -> uint64_t { return detail::wyhash::mix(x, UINT64_C(0x9E3779B97F4A7C15)); } } // namespace detail::wyhash -template +ANKERL_UNORDERED_DENSE_EXPORT template struct hash { auto operator()(T const& obj) const noexcept(noexcept(std::declval>().operator()(std::declval()))) -> uint64_t { @@ -1819,60 +1824,66 @@ class table : public std::conditional_t, base_table_type_map, bas } // namespace detail -template , - class KeyEqual = std::equal_to, - class AllocatorOrContainer = std::allocator>, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class AllocatorOrContainer = std::allocator>, + class Bucket = bucket_type::standard> using map = detail::table; -template , - class KeyEqual = std::equal_to, - class AllocatorOrContainer = std::allocator>, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class AllocatorOrContainer = std::allocator>, + class Bucket = bucket_type::standard> using segmented_map = detail::table; -template , - class KeyEqual = std::equal_to, - class AllocatorOrContainer = std::allocator, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class AllocatorOrContainer = std::allocator, + class Bucket = bucket_type::standard> using set = detail::table; -template , - class KeyEqual = std::equal_to, - class AllocatorOrContainer = std::allocator, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class AllocatorOrContainer = std::allocator, + class Bucket = bucket_type::standard> using segmented_set = detail::table; # if defined(ANKERL_UNORDERED_DENSE_PMR) namespace pmr { -template , - class KeyEqual = std::equal_to, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class Bucket = bucket_type::standard> using map = detail::table>, Bucket, false>; -template , - class KeyEqual = std::equal_to, - class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class Bucket = bucket_type::standard> using segmented_map = detail::table>, Bucket, true>; -template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class Bucket = bucket_type::standard> using set = detail::table, Bucket, false>; -template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> +ANKERL_UNORDERED_DENSE_EXPORT template , + class KeyEqual = std::equal_to, + class Bucket = bucket_type::standard> using segmented_set = detail::table, Bucket, true>; @@ -1892,14 +1903,14 @@ using segmented_set = namespace std { // NOLINT(cert-dcl58-cpp) -template +ANKERL_UNORDERED_DENSE_EXPORT template // NOLINTNEXTLINE(cert-dcl58-cpp) auto erase_if(ankerl::unordered_dense::detail::table& map, Pred pred) -> size_t { diff --git a/meson.build b/meson.build index 599c6b27..6dfe4f3f 100644 --- a/meson.build +++ b/meson.build @@ -18,7 +18,7 @@ # project('unordered_dense', 'cpp', - version: '4.0.4', + version: '4.1.0', license: 'MIT', default_options : [ 'cpp_std=c++17', diff --git a/src/ankerl.unordered_dense.cpp b/src/ankerl.unordered_dense.cpp new file mode 100644 index 00000000..001f7c50 --- /dev/null +++ b/src/ankerl.unordered_dense.cpp @@ -0,0 +1,39 @@ +module; + +// see https://github.com/fmtlib/fmt/blob/master/src/fmt.cc + +// Put all implementation-provided headers into the global module fragment +// to prevent attachment to this module. + +#include // for array +#include // for uint64_t, uint32_t, uint8_t, UINT64_C +#include // for size_t, memcpy, memset +#include // for equal_to, hash +#include // for initializer_list +#include // for pair, distance +#include // for numeric_limits +#include // for allocator, allocator_traits, shared_ptr +#include // for out_of_range +#include // for basic_string +#include // for basic_string_view, hash +#include // for forward_as_tuple +#include // for enable_if_t, declval, conditional_t, ena... +#include // for forward, exchange, pair, as_const, piece... +#include // for vector +#if defined(__has_include) +# if __has_include() +# include // for polymorphic_allocator +# elif __has_include() +# include // for polymorphic_allocator +# endif +#endif +#if defined(_MSC_VER) && defined(_M_X64) +# include +# pragma intrinsic(_umul128) +#endif + +export module ankerl.unordered_dense; + +#define ANKERL_UNORDERED_DENSE_EXPORT export + +#include "ankerl/unordered_dense.h" diff --git a/test/modules/module_test.cpp b/test/modules/module_test.cpp new file mode 100644 index 00000000..d44695d3 --- /dev/null +++ b/test/modules/module_test.cpp @@ -0,0 +1,20 @@ +import ankerl.unordered_dense; + +#include +#include + +int main() { + ankerl::unordered_dense::map m; + m["24535"] = 4; + assert(m.size() == 1); + + auto h_int = ankerl::unordered_dense::hash(); + assert(h_int(123) != 123); + + auto h_str = ankerl::unordered_dense::hash(); + assert(h_str("123") != 123); + + auto h_ptr = ankerl::unordered_dense::hash(); + int i = 0; + assert(h_ptr(&i) != 0); +} diff --git a/test/modules/test.sh b/test/modules/test.sh new file mode 100755 index 00000000..ab2facd6 --- /dev/null +++ b/test/modules/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -xe + +rm -f *.o *.pcm a.out + +clang++ -std=c++20 -I ../../include --precompile -x c++-module ../../src/ankerl.unordered_dense.cpp +clang++ -std=c++20 -c ankerl.unordered_dense.pcm +clang++ -std=c++20 -fprebuilt-module-path=. ankerl.unordered_dense.o module_test.cpp -o a.out + +./a.out + +rm -f *.o *.pcm a.out diff --git a/test/unit/namespace.cpp b/test/unit/namespace.cpp index 20a09247..e2bf22e5 100644 --- a/test/unit/namespace.cpp +++ b/test/unit/namespace.cpp @@ -2,7 +2,7 @@ #include -namespace versioned_namespace = ankerl::unordered_dense::v4_0_4; +namespace versioned_namespace = ankerl::unordered_dense::v4_1_0; static_assert(std::is_same_v, ankerl::unordered_dense::map>); static_assert(std::is_same_v, ankerl::unordered_dense::hash>);