From b455ba5e0983eeb75f979d3140cb209b41d06fd1 Mon Sep 17 00:00:00 2001 From: Simeon Ehrig Date: Tue, 18 Jan 2022 16:48:02 +0100 Subject: [PATCH] Extend interface of reduce and transform to allow begin and end pointer --- include/vikunja/reduce/reduce.hpp | 106 ++++++++++++++++++++++ include/vikunja/transform/transform.hpp | 97 +++++++++++++++++++++ test/integ/reduce/src/Reduce.cpp | 69 ++++++++++++++- test/integ/transform/src/Transform.cpp | 111 +++++++++++++++++++++++- 4 files changed, 378 insertions(+), 5 deletions(-) diff --git a/include/vikunja/reduce/reduce.hpp b/include/vikunja/reduce/reduce.hpp index 4597086..906ed43 100644 --- a/include/vikunja/reduce/reduce.hpp +++ b/include/vikunja/reduce/reduce.hpp @@ -211,6 +211,62 @@ namespace vikunja return result[0]; } + /** + * This is a function which transforms the input values and uses a reduce to accumulate the transformed values. + * For example, given the array [1, 2, 3, 4], the transform function (x) -> x + 1, and the reduce function + * (x,y) -> x + y would return 2 + 3 + 4 + 5 = 14. + * @tparam TAcc The alpaka accelerator type to use. + * @tparam WorkDivPolicy The working division policy. Defaults to a templated value depending on the + * accelerator. For the API of this, see workdiv/BlockBasedWorkDiv.hpp + * @tparam MemAccessPolicy The memory access policy. Defaults to a templated value depending on the + * accelerator. For the API of this, see mem/iterator/PolicyBasedBlockIterator + * @tparam TTransformFunc Type of the transform operator. + * @tparam TReduceFunc Type of the reduce operator. + * @tparam TInputIterator Type of the input iterator. Should be a pointer-like type. + * @tparam TDevAcc The type of the alpaka accelerator. + * @tparam TDevHost The type of the alpaka host. + * @tparam TQueue The type of the alpaka queue. + * @tparam TTransformOperator The vikunja::operators type of the transform function. + * @tparam TReduceOperator The vikunja::operators type of the reduce function. + * @tparam TRed The return value of the function. + * @param devAcc The alpaka accelerator. + * @param devHost The alpaka host. + * @param queue The alpaka queue. + * @param bufferBegin The begin pointer of the input buffer. + * @param bufferEnd The end pointer of the input buffer. + * @param transformFunc The transform operator. + * @param reduceFunc The reduce operator. + * @return Value of the combined transform/reduce operation. + */ + template< + typename TAcc, + typename WorkDivPolicy = vikunja::workdiv::BlockBasedPolicy, + typename MemAccessPolicy = vikunja::mem::iterator::MemAccessPolicy, + typename TTransformFunc, + typename TReduceFunc, + typename TInputIterator, + typename TDevAcc, + typename TDevHost, + typename TQueue, + typename TTransformOperator = vikunja::operators:: + UnaryOp::value_type>, + typename TReduceOperator = vikunja::operators:: + BinaryOp, + typename TRed = typename TReduceOperator::TRed> + auto deviceTransformReduce( + TDevAcc& devAcc, + TDevHost& devHost, + TQueue& queue, + TInputIterator const& bufferBegin, + TInputIterator const& bufferEnd, + TTransformFunc const& transformFunc, + TReduceFunc const& reduceFunc) -> TRed + { + assert(bufferEnd >= bufferBegin); + auto size = static_cast::type>(bufferEnd - bufferBegin); + return deviceTransformReduce(devAcc, devHost, queue, size, bufferBegin, transformFunc, reduceFunc); + } + /** * This is a reduce function, which works exactly like deviceTransformReduce with an identity function for * the transform operator. @@ -270,5 +326,55 @@ namespace vikunja TQueue, TIdx>(devAcc, devHost, queue, n, buffer, detail::Identity(), func); } + + /** + * This is a reduce function, which works exactly like deviceTransformReduce with an identity function for + * the transform operator. + * @see deviceTransformReduce. + * @tparam TAcc + * @tparam WorkDivPolicy + * @tparam MemAccessPolicy + * @tparam TFunc + * @tparam TInputIterator + * @tparam TDevAcc + * @tparam TDevHost + * @tparam TQueue + * @tparam TReduceOperator The vikunja::operators type of the reduce function. + * @tparam TRed The return value of the function. + * @param devAcc + * @param devHost + * @param queue + * @param bufferBegin The begin pointer of the input buffer. + * @param bufferEnd The end pointer of the input buffer. + * @param func + * @return + */ + template< + typename TAcc, + typename WorkDivPolicy = vikunja::workdiv::BlockBasedPolicy, + typename MemAccessPolicy = vikunja::mem::iterator::MemAccessPolicy, + typename TFunc, + typename TInputIterator, + typename TDevAcc, + typename TDevHost, + typename TQueue, + typename TOperator = vikunja::operators::BinaryOp< + TAcc, + TFunc, + typename std::iterator_traits::value_type, + typename std::iterator_traits::value_type>, + typename TRed = typename TOperator::TRed> + auto deviceReduce( + TDevAcc& devAcc, + TDevHost& devHost, + TQueue& queue, + TInputIterator const& bufferBegin, + TInputIterator const& bufferEnd, + TFunc const& func) -> TRed + { + assert(bufferEnd >= bufferBegin); + auto size = static_cast::type>(bufferEnd - bufferBegin); + return deviceReduce(devAcc, devHost, queue, size, bufferBegin, func); + } } // namespace reduce } // namespace vikunja diff --git a/include/vikunja/transform/transform.hpp b/include/vikunja/transform/transform.hpp index ef2e49e..fb4eb66 100644 --- a/include/vikunja/transform/transform.hpp +++ b/include/vikunja/transform/transform.hpp @@ -102,6 +102,54 @@ namespace vikunja alpaka::exec(queue, multiBlockWorkDiv, kernel, source, destination, n, func); } + /** + * This is a function that transforms every element of an input iterator to another element in an output + * iterator, i.e. if one has the array [1,2,3,4] and the transform function (x) -> x + 1, the output + * will contain [2,3,4,5]. + * Input and output iterator can be the same. The output must be at least as big as the input, otherwise bad + * things are bound to happen. + * @tparam TAcc The alpaka accelerator type. + * @tparam WorkDivPolicy The working division policy. Defaults to a templated value depending on the + * accelerator. For the API of this, see workdiv/BlockBasedWorkDiv.hpp + * @tparam MemAccessPolicy The memory access policy. Defaults to a templated value depending on the + * accelerator. For the API of this, see mem/iterator/PolicyBasedBlockIterator + * @tparam TFunc Type of the transform operator. + * @tparam TInputIterator Type of the input iterator. Should be a pointer-like type. + * @tparam TOutputIterator Type of the output iterator. Should be a pointer-like type. + * @tparam TDevAcc The type of the alpaka accelerator. + * @tparam TQueue The type of the alpaka queue. + * @tparam TOperator The vikunja::operators type of the transform function. + * @param devAcc The alpaka accelerator. + * @param queue The alpaka queue. + * @param sourceBegin The begin pointer of the input buffer. + * @param sourceEnd The end pointer of the input buffer. + * @param destination The output iterator. Should be pointer-like. + * @param func The transform operator. + */ + template< + typename TAcc, + typename WorkDivPolicy = vikunja::workdiv::BlockBasedPolicy, + typename MemAccessPolicy = vikunja::mem::iterator::MemAccessPolicy, + typename TFunc, + typename TInputIterator, + typename TOutputIterator, + typename TDevAcc, + typename TQueue, + typename TOperator + = vikunja::operators::UnaryOp::value_type>> + auto deviceTransform( + TDevAcc& devAcc, + TQueue& queue, + TInputIterator const& sourceBegin, + TInputIterator const& sourceEnd, + TOutputIterator const& destination, + TFunc const& func) -> void + { + assert(sourceEnd >= sourceBegin); + auto size = static_cast::type>(sourceEnd - sourceBegin); + deviceTransform(devAcc, queue, size, sourceBegin, destination, func); + } + /** * A transform similar to the above, except that two input iterators are used in parallel. * @tparam TAcc @@ -182,5 +230,54 @@ namespace vikunja detail::BlockThreadTransformKernel kernel; alpaka::exec(queue, multiBlockWorkDiv, kernel, source, sourceSecond, destination, n, func); } + + /** + * A transform similar to the above, except that two input iterators are used in parallel. + * @tparam TAcc + * @tparam WorkDivPolicy + * @tparam MemAccessPolicy + * @tparam TFunc + * @tparam TInputIterator + * @tparam TInputIteratorSecond + * @tparam TOutputIterator + * @tparam TDevAcc + * @tparam TQueue + * @tparam TOperator + * @param devAcc + * @param queue + * @param sourceBegin The begin pointer of the input buffer. + * @param sourceEnd The end pointer of the input buffer. + * @param sourceSecond + * @param destination + * @param func + */ + template< + typename TAcc, + typename WorkDivPolicy = vikunja::workdiv::BlockBasedPolicy, + typename MemAccessPolicy = vikunja::mem::iterator::MemAccessPolicy, + typename TFunc, + typename TInputIterator, + typename TInputIteratorSecond, + typename TOutputIterator, + typename TDevAcc, + typename TQueue, + typename TOperator = vikunja::operators::BinaryOp< + TAcc, + TFunc, + typename std::iterator_traits::value_type, + typename std::iterator_traits::value_type>> + auto deviceTransform( + TDevAcc& devAcc, + TQueue& queue, + TInputIterator const& sourceBegin, + TInputIterator const& sourceEnd, + TInputIteratorSecond const& sourceSecond, + TOutputIterator const& destination, + TFunc const& func) -> void + { + assert(sourceEnd >= sourceBegin); + auto size = static_cast::type>(sourceEnd - sourceBegin); + deviceTransform(devAcc, queue, size, sourceBegin, sourceSecond, destination, func); + } } // namespace transform } // namespace vikunja diff --git a/test/integ/reduce/src/Reduce.cpp b/test/integ/reduce/src/Reduce.cpp index 9d49ae7..56dda87 100644 --- a/test/integ/reduce/src/Reduce.cpp +++ b/test/integ/reduce/src/Reduce.cpp @@ -59,6 +59,38 @@ namespace vikunja }; }; + template< + typename TDim, + template + class TAcc, + typename TData, + typename TDataResult = TData, + typename TIdx = std::uint64_t> + class TestSetupReducePtr : public TestSetupBase + { + public: + using TestSetupBase::TestSetupBase; + + using Base = typename vikunja::test::reduce::TestSetupBase; + + template + void run(TReduceFunctor reduceFunctor) + { + alpaka::memcpy(Base::Base::queueAcc, Base::m_device_mem, Base::m_host_mem, Base::m_extent); + + TData* begin = alpaka::getPtrNative(Base::m_device_mem); + TData* end = begin + Base::m_size; + + Base::m_result = vikunja::reduce::deviceReduce( + Base::devAcc, + Base::devHost, + Base::Base::queueAcc, + begin, + end, + reduceFunctor); + }; + }; + template< typename TDim, template @@ -89,6 +121,39 @@ namespace vikunja }; }; + template< + typename TDim, + template + class TAcc, + typename TData, + typename TDataResult = TData, + typename TIdx = std::uint64_t> + class TestSetupReduceTransformPtr : public TestSetupBase + { + public: + using TestSetupBase::TestSetupBase; + + using Base = typename vikunja::test::reduce::TestSetupBase; + + template + void run(TReduceFunctor reduceFunctor, TTransformFunctor transformFunctor) + { + alpaka::memcpy(Base::Base::queueAcc, Base::m_device_mem, Base::m_host_mem, Base::m_extent); + + TData* begin = alpaka::getPtrNative(Base::m_device_mem); + TData* end = begin + Base::m_size; + + Base::m_result = vikunja::reduce::deviceTransformReduce( + Base::devAcc, + Base::devHost, + Base::Base::queueAcc, + begin, + end, + transformFunctor, + reduceFunctor); + }; + }; + } // namespace reduce } // namespace test } // namespace vikunja @@ -220,7 +285,7 @@ TEMPLATE_TEST_CASE( INFO((vikunja::test::print_acc_info(size))); - vikunja::test::reduce::TestSetupReduce setup(size); + vikunja::test::reduce::TestSetupReducePtr setup(size); // setup initial values Data* const host_mem_ptr = setup.get_host_mem_ptr(); @@ -353,7 +418,7 @@ TEMPLATE_TEST_CASE( INFO((vikunja::test::print_acc_info(size))); - vikunja::test::reduce::TestSetupReduceTransform setup(size); + vikunja::test::reduce::TestSetupReduceTransformPtr setup(size); // setup initial values Data* const host_mem_ptr = setup.get_host_mem_ptr(); diff --git a/test/integ/transform/src/Transform.cpp b/test/integ/transform/src/Transform.cpp index c04b516..8270e88 100644 --- a/test/integ/transform/src/Transform.cpp +++ b/test/integ/transform/src/Transform.cpp @@ -68,6 +68,48 @@ namespace vikunja }; }; + template< + typename TDim, + template + class TAcc, + typename TData, + typename TDataResult = TData, + typename TIdx = std::uint64_t> + class TestSetupTransformPtr : public TestSetupBase + { + public: + using TestSetupBase::TestSetupBase; + + using Base = typename vikunja::test::transform::TestSetupBase; + + template + void run(TReduceFunctor reduceFunctor) + { + alpaka::memcpy( + Base::Base::queueAcc, + Base::m_device_input1_mem, + Base::m_host_input1_mem, + Base::m_extent); + + TData* begin = alpaka::getPtrNative(Base::m_device_input1_mem); + TData* end = begin + Base::m_size; + + vikunja::transform::deviceTransform( + Base::devAcc, + Base::Base::queueAcc, + begin, + end, + alpaka::getPtrNative(Base::m_device_output_mem), + reduceFunctor); + + alpaka::memcpy( + Base::Base::queueAcc, + Base::m_host_output_mem, + Base::m_device_output_mem, + Base::m_extent); + }; + }; + template< typename TDim, template @@ -127,6 +169,68 @@ namespace vikunja }; }; + template< + typename TDim, + template + class TAcc, + typename TData1, + typename TData2 = TData1, + typename TDataResult = TData1, + typename TIdx = std::uint64_t> + class TestSetupTransformDoubleInputPtr : public TestSetupBase + { + private: + using Base = typename vikunja::test::transform::TestSetupBase; + using BufHost = alpaka::Buf; + using BufDev = alpaka::Buf; + + BufHost m_host_input2_mem; + BufDev m_device_input2_mem; + + public: + TestSetupTransformDoubleInputPtr(uint64_t const memSize) + : TestSetupBase(memSize) + , m_host_input2_mem(alpaka::allocBuf(Base::Base::devHost, Base::m_extent)) + , m_device_input2_mem(alpaka::allocBuf(Base::Base::devAcc, Base::m_extent)) + { + } + + + TData2* get_host_input2_mem_ptr() + { + return alpaka::getPtrNative(m_host_input2_mem); + } + + template + void run(TReduceFunctor reduceFunctor) + { + alpaka::memcpy( + Base::Base::queueAcc, + Base::m_device_input1_mem, + Base::m_host_input1_mem, + Base::m_extent); + alpaka::memcpy(Base::Base::queueAcc, m_device_input2_mem, m_host_input2_mem, Base::m_extent); + + TData1* begin = alpaka::getPtrNative(Base::m_device_input1_mem); + TData1* end = begin + Base::m_size; + + vikunja::transform::deviceTransform( + Base::devAcc, + Base::Base::queueAcc, + begin, + end, + alpaka::getPtrNative(m_device_input2_mem), + alpaka::getPtrNative(Base::m_device_output_mem), + reduceFunctor); + + alpaka::memcpy( + Base::Base::queueAcc, + Base::m_host_output_mem, + Base::m_device_output_mem, + Base::m_extent); + }; + }; + } // namespace transform } // namespace test } // namespace vikunja @@ -217,7 +321,7 @@ TEMPLATE_TEST_CASE( INFO((vikunja::test::print_acc_info(size))); - vikunja::test::transform::TestSetupTransform setup(size); + vikunja::test::transform::TestSetupTransformPtr setup(size); // setup initial values Data* const host_mem_ptr = setup.get_host_input1_mem_ptr(); @@ -496,8 +600,9 @@ TEMPLATE_TEST_CASE( INFO((vikunja::test::print_acc_info(size))); - vikunja::test::transform::TestSetupTransformDoubleInput - setup(size); + vikunja::test::transform:: + TestSetupTransformDoubleInputPtr + setup(size); // setup initial values Data1* const host_mem_ptr1 = setup.get_host_input1_mem_ptr();