From bd36cceb7d0501bda42a498b61a70a5ce6d7ea83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Widera?= Date: Tue, 21 Jul 2020 12:01:54 +0200 Subject: [PATCH] remove boost::aligned_alloc - remove dependency to boost aligned allocation - remove boost in filenames which was providing aligned memory via boost implementations - add `AlignedAlloc.hpp` --- include/alpaka/acc/AccCpuFibers.hpp | 6 +- include/alpaka/acc/AccCpuOmp2Threads.hpp | 6 +- include/alpaka/acc/AccCpuOmp4.hpp | 6 +- include/alpaka/acc/AccCpuThreads.hpp | 6 +- include/alpaka/alpaka.hpp | 5 +- ....hpp => BlockSharedMemDynAlignedAlloc.hpp} | 25 ++++--- .../shared/st/BlockSharedMemStMasterSync.hpp | 7 +- .../shared/st/BlockSharedMemStNoSync.hpp | 7 +- include/alpaka/core/AlignedAlloc.hpp | 72 +++++++++++++++++++ ...puBoostAligned.hpp => AllocCpuAligned.hpp} | 17 +++-- include/alpaka/mem/buf/BufCpu.hpp | 6 +- 11 files changed, 116 insertions(+), 47 deletions(-) rename include/alpaka/block/shared/dyn/{BlockSharedMemDynBoostAlignedAlloc.hpp => BlockSharedMemDynAlignedAlloc.hpp} (75%) create mode 100644 include/alpaka/core/AlignedAlloc.hpp rename include/alpaka/mem/alloc/{AllocCpuBoostAligned.hpp => AllocCpuAligned.hpp} (86%) diff --git a/include/alpaka/acc/AccCpuFibers.hpp b/include/alpaka/acc/AccCpuFibers.hpp index e9d4a55f8be5..a87e5ea55fb0 100644 --- a/include/alpaka/acc/AccCpuFibers.hpp +++ b/include/alpaka/acc/AccCpuFibers.hpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -78,7 +78,7 @@ namespace alpaka atomic::AtomicNoOp // thread atomics >, public math::MathStdLib, - public block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc, + public block::shared::dyn::BlockSharedMemDynAlignedAlloc, public block::shared::st::BlockSharedMemStMasterSync, public block::sync::BlockSyncBarrierFiber, public intrinsic::IntrinsicCpu, @@ -112,7 +112,7 @@ namespace alpaka atomic::AtomicNoOp // atomics between threads >(), math::MathStdLib(), - block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), + block::shared::dyn::BlockSharedMemDynAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), block::shared::st::BlockSharedMemStMasterSync( [this](){block::sync::syncBlockThreads(*this);}, [this](){return (m_masterFiberId == boost::this_fiber::get_id());}), diff --git a/include/alpaka/acc/AccCpuOmp2Threads.hpp b/include/alpaka/acc/AccCpuOmp2Threads.hpp index 945e5562dc32..fb7a7205ae1f 100644 --- a/include/alpaka/acc/AccCpuOmp2Threads.hpp +++ b/include/alpaka/acc/AccCpuOmp2Threads.hpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,7 +80,7 @@ namespace alpaka atomic::AtomicOmpBuiltIn // thread atomics >, public math::MathStdLib, - public block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc, + public block::shared::dyn::BlockSharedMemDynAlignedAlloc, public block::shared::st::BlockSharedMemStMasterSync, public block::sync::BlockSyncBarrierOmp, public intrinsic::IntrinsicCpu, @@ -114,7 +114,7 @@ namespace alpaka atomic::AtomicOmpBuiltIn // atomics between threads >(), math::MathStdLib(), - block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), + block::shared::dyn::BlockSharedMemDynAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), block::shared::st::BlockSharedMemStMasterSync( [this](){block::sync::syncBlockThreads(*this);}, [](){return (::omp_get_thread_num() == 0);}), diff --git a/include/alpaka/acc/AccCpuOmp4.hpp b/include/alpaka/acc/AccCpuOmp4.hpp index 97c824180509..521a8560e3f0 100644 --- a/include/alpaka/acc/AccCpuOmp4.hpp +++ b/include/alpaka/acc/AccCpuOmp4.hpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,7 +80,7 @@ namespace alpaka atomic::AtomicOmpBuiltIn // thread atomics >, public math::MathStdLib, - public block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc, + public block::shared::dyn::BlockSharedMemDynAlignedAlloc, public block::shared::st::BlockSharedMemStMasterSync, public block::sync::BlockSyncBarrierOmp, public intrinsic::IntrinsicCpu, @@ -114,7 +114,7 @@ namespace alpaka atomic::AtomicOmpBuiltIn // atomics between threads >(), math::MathStdLib(), - block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), + block::shared::dyn::BlockSharedMemDynAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), block::shared::st::BlockSharedMemStMasterSync( [this](){block::sync::syncBlockThreads(*this);}, [](){return (::omp_get_thread_num() == 0);}), diff --git a/include/alpaka/acc/AccCpuThreads.hpp b/include/alpaka/acc/AccCpuThreads.hpp index a16a112465c6..52397239a653 100644 --- a/include/alpaka/acc/AccCpuThreads.hpp +++ b/include/alpaka/acc/AccCpuThreads.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -75,7 +75,7 @@ namespace alpaka atomic::AtomicStdLibLock<16> // thread atomics >, public math::MathStdLib, - public block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc, + public block::shared::dyn::BlockSharedMemDynAlignedAlloc, public block::shared::st::BlockSharedMemStMasterSync, public block::sync::BlockSyncBarrierThread, public intrinsic::IntrinsicCpu, @@ -109,7 +109,7 @@ namespace alpaka atomic::AtomicStdLibLock<16> // atomics between threads >(), math::MathStdLib(), - block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), + block::shared::dyn::BlockSharedMemDynAlignedAlloc(static_cast(blockSharedMemDynSizeBytes)), block::shared::st::BlockSharedMemStMasterSync( [this](){block::sync::syncBlockThreads(*this);}, [this](){return (m_idMasterThread == std::this_thread::get_id());}), diff --git a/include/alpaka/alpaka.hpp b/include/alpaka/alpaka.hpp index 250aa08423a9..dbe09678a409 100644 --- a/include/alpaka/alpaka.hpp +++ b/include/alpaka/alpaka.hpp @@ -45,7 +45,7 @@ //----------------------------------------------------------------------------- // dynamic #include - #include + #include #include #include //----------------------------------------------------------------------------- @@ -67,6 +67,7 @@ // core #include #include +#include #include #include #include @@ -130,7 +131,7 @@ #include //----------------------------------------------------------------------------- // mem -#include +#include #include #include diff --git a/include/alpaka/block/shared/dyn/BlockSharedMemDynBoostAlignedAlloc.hpp b/include/alpaka/block/shared/dyn/BlockSharedMemDynAlignedAlloc.hpp similarity index 75% rename from include/alpaka/block/shared/dyn/BlockSharedMemDynBoostAlignedAlloc.hpp rename to include/alpaka/block/shared/dyn/BlockSharedMemDynAlignedAlloc.hpp index 2714956bd10c..6b848003411f 100644 --- a/include/alpaka/block/shared/dyn/BlockSharedMemDynBoostAlignedAlloc.hpp +++ b/include/alpaka/block/shared/dyn/BlockSharedMemDynAlignedAlloc.hpp @@ -12,10 +12,9 @@ #include #include +#include #include -#include - #include #include @@ -29,35 +28,35 @@ namespace alpaka { //############################################################################# //! The block shared dynamic memory allocator without synchronization. - class BlockSharedMemDynBoostAlignedAlloc : public concepts::Implements + class BlockSharedMemDynAlignedAlloc : public concepts::Implements { public: //----------------------------------------------------------------------------- - BlockSharedMemDynBoostAlignedAlloc( + BlockSharedMemDynAlignedAlloc( std::size_t const & blockSharedMemDynSizeBytes) { if(blockSharedMemDynSizeBytes > 0u) { m_blockSharedMemDyn.reset( reinterpret_cast( - boost::alignment::aligned_alloc(core::vectorization::defaultAlignment, blockSharedMemDynSizeBytes))); + core::alignedAlloc(core::vectorization::defaultAlignment, blockSharedMemDynSizeBytes))); } } //----------------------------------------------------------------------------- - BlockSharedMemDynBoostAlignedAlloc(BlockSharedMemDynBoostAlignedAlloc const &) = delete; + BlockSharedMemDynAlignedAlloc(BlockSharedMemDynAlignedAlloc const &) = delete; //----------------------------------------------------------------------------- - BlockSharedMemDynBoostAlignedAlloc(BlockSharedMemDynBoostAlignedAlloc &&) = delete; + BlockSharedMemDynAlignedAlloc(BlockSharedMemDynAlignedAlloc &&) = delete; //----------------------------------------------------------------------------- - auto operator=(BlockSharedMemDynBoostAlignedAlloc const &) -> BlockSharedMemDynBoostAlignedAlloc & = delete; + auto operator=(BlockSharedMemDynAlignedAlloc const &) -> BlockSharedMemDynAlignedAlloc & = delete; //----------------------------------------------------------------------------- - auto operator=(BlockSharedMemDynBoostAlignedAlloc &&) -> BlockSharedMemDynBoostAlignedAlloc & = delete; + auto operator=(BlockSharedMemDynAlignedAlloc &&) -> BlockSharedMemDynAlignedAlloc & = delete; //----------------------------------------------------------------------------- - /*virtual*/ ~BlockSharedMemDynBoostAlignedAlloc() = default; + /*virtual*/ ~BlockSharedMemDynAlignedAlloc() = default; public: std::unique_ptr< uint8_t, - boost::alignment::aligned_delete> mutable + core::AlignedDelete> mutable m_blockSharedMemDyn; //!< Block shared dynamic memory. }; @@ -72,11 +71,11 @@ namespace alpaka typename T> struct GetMem< T, - BlockSharedMemDynBoostAlignedAlloc> + BlockSharedMemDynAlignedAlloc> { //----------------------------------------------------------------------------- ALPAKA_FN_HOST static auto getMem( - block::shared::dyn::BlockSharedMemDynBoostAlignedAlloc const & blockSharedMemDyn) + block::shared::dyn::BlockSharedMemDynAlignedAlloc const & blockSharedMemDyn) -> T * { static_assert( diff --git a/include/alpaka/block/shared/st/BlockSharedMemStMasterSync.hpp b/include/alpaka/block/shared/st/BlockSharedMemStMasterSync.hpp index ba843ddf634e..85615144b676 100644 --- a/include/alpaka/block/shared/st/BlockSharedMemStMasterSync.hpp +++ b/include/alpaka/block/shared/st/BlockSharedMemStMasterSync.hpp @@ -12,10 +12,9 @@ #include #include +#include #include -#include - #include #include #include @@ -57,7 +56,7 @@ namespace alpaka std::vector< std::unique_ptr< uint8_t, - boost::alignment::aligned_delete>> mutable + core::AlignedDelete>> mutable m_sharedAllocs; std::function m_syncFn; @@ -93,7 +92,7 @@ namespace alpaka { blockSharedMemSt.m_sharedAllocs.emplace_back( reinterpret_cast( - boost::alignment::aligned_alloc(alignmentInBytes, sizeof(T)))); + core::alignedAlloc(alignmentInBytes, sizeof(T)))); } blockSharedMemSt.m_syncFn(); diff --git a/include/alpaka/block/shared/st/BlockSharedMemStNoSync.hpp b/include/alpaka/block/shared/st/BlockSharedMemStNoSync.hpp index 298019ad5da7..2b3955360d9e 100644 --- a/include/alpaka/block/shared/st/BlockSharedMemStNoSync.hpp +++ b/include/alpaka/block/shared/st/BlockSharedMemStNoSync.hpp @@ -12,10 +12,9 @@ #include #include +#include #include -#include - #include #include @@ -51,7 +50,7 @@ namespace alpaka std::vector< std::unique_ptr< uint8_t, - boost::alignment::aligned_delete>> mutable + core::AlignedDelete>> mutable m_sharedAllocs; }; @@ -79,7 +78,7 @@ namespace alpaka blockSharedMemSt.m_sharedAllocs.emplace_back( reinterpret_cast( - boost::alignment::aligned_alloc(alignmentInBytes, sizeof(T)))); + core::alignedAlloc(alignmentInBytes, sizeof(T)))); return std::ref( *reinterpret_cast( diff --git a/include/alpaka/core/AlignedAlloc.hpp b/include/alpaka/core/AlignedAlloc.hpp new file mode 100644 index 000000000000..77af609ccbd6 --- /dev/null +++ b/include/alpaka/core/AlignedAlloc.hpp @@ -0,0 +1,72 @@ +/* Copyright 2020 René Widera + * + * This file is part of alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include +#include + +#if BOOST_COMP_MSVC + #include +#else + #include +#endif + +namespace alpaka +{ + namespace core + { + //----------------------------------------------------------------------------- + //! Rounds to the next higher power of two (if not already power of two). + // Adapted from llvm/ADT/SmallPtrSet.h + ALPAKA_FN_INLINE ALPAKA_FN_HOST + void* alignedAlloc(size_t alignment, size_t size) + { +#if BOOST_OS_WINDOWS + return _aligned_malloc(size, alignment); +#elif BOOST_OS_MACOS + void * ptr = nullptr; + posix_memalign(&ptr, alignment, size); + return ptr; +#else + // the amount of bytes to allocate must be a multiple of the alignment + size_t sizeToAllocate = ((size + alignment - 1u) / alignment) * alignment; + return ::aligned_alloc(alignment, sizeToAllocate); +#endif + } + + ALPAKA_FN_INLINE ALPAKA_FN_HOST + void alignedFree(void* ptr) + { +#if BOOST_OS_WINDOWS + _aligned_free(ptr); +#else + // linux and macos + free(ptr); +#endif + } + + //############################################################################# + //! destroy aligned object and free aligned memory + struct AlignedDelete + { + constexpr AlignedDelete() = default; + + //----------------------------------------------------------------------------- + //! Calls ~T() on ptr to destroy the object and then calls aligned_free to free the allocated memory. + template + void operator()(T* ptr) const + { + if (ptr) + ptr->~T(); + alignedFree(reinterpret_cast(ptr)); + } + }; + } +} diff --git a/include/alpaka/mem/alloc/AllocCpuBoostAligned.hpp b/include/alpaka/mem/alloc/AllocCpuAligned.hpp similarity index 86% rename from include/alpaka/mem/alloc/AllocCpuBoostAligned.hpp rename to include/alpaka/mem/alloc/AllocCpuAligned.hpp index 4afc3ed62f40..be4470d86449 100644 --- a/include/alpaka/mem/alloc/AllocCpuBoostAligned.hpp +++ b/include/alpaka/mem/alloc/AllocCpuAligned.hpp @@ -11,11 +11,10 @@ #include +#include #include #include -#include - #include namespace alpaka @@ -32,7 +31,7 @@ namespace alpaka //! \tparam TAlignment An integral constant containing the alignment. template< typename TAlignment> - class AllocCpuBoostAligned : public concepts::Implements> + class AllocCpuAligned : public concepts::Implements> { }; @@ -45,11 +44,11 @@ namespace alpaka typename TAlignment> struct Alloc< T, - AllocCpuBoostAligned> + AllocCpuAligned> { //----------------------------------------------------------------------------- ALPAKA_FN_HOST static auto alloc( - AllocCpuBoostAligned const & alloc, + AllocCpuAligned const & alloc, std::size_t const & sizeElems) -> T * { @@ -70,7 +69,7 @@ namespace alpaka alpaka::ignore_unused(alloc); return reinterpret_cast( - boost::alignment::aligned_alloc(std::max(TAlignment::value, minAlignement), sizeElems * sizeof(T))); + core::alignedAlloc(std::max(TAlignment::value, minAlignement), sizeElems * sizeof(T))); } }; @@ -81,16 +80,16 @@ namespace alpaka typename TAlignment> struct Free< T, - AllocCpuBoostAligned> + AllocCpuAligned> { //----------------------------------------------------------------------------- ALPAKA_FN_HOST static auto free( - AllocCpuBoostAligned const & alloc, + AllocCpuAligned const & alloc, T const * const ptr) -> void { alpaka::ignore_unused(alloc); - boost::alignment::aligned_free( + core::alignedFree( const_cast( reinterpret_cast(ptr))); } diff --git a/include/alpaka/mem/buf/BufCpu.hpp b/include/alpaka/mem/buf/BufCpu.hpp index 9d082c88b903..43c85d877a10 100644 --- a/include/alpaka/mem/buf/BufCpu.hpp +++ b/include/alpaka/mem/buf/BufCpu.hpp @@ -25,7 +25,7 @@ #include #endif -#include +#include #include @@ -49,7 +49,7 @@ namespace alpaka typename TDim, typename TIdx> class BufCpuImpl final : - public mem::alloc::AllocCpuBoostAligned> + public mem::alloc::AllocCpuAligned> { static_assert( !std::is_const::value, @@ -64,7 +64,7 @@ namespace alpaka ALPAKA_FN_HOST BufCpuImpl( dev::DevCpu const & dev, TExtent const & extent) : - mem::alloc::AllocCpuBoostAligned>(), + mem::alloc::AllocCpuAligned>(), m_dev(dev), m_extentElements(extent::getExtentVecEnd(extent)), m_pMem(mem::alloc::alloc(*this, static_cast(computeElementCount(extent)))),