diff --git a/CMakeLists.txt b/CMakeLists.txt
index cc34500..5aedee0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(VIKUNJA_ENABLE_EXTRA_WARNINGS "Enable extra warnings" OFF)
option(BUILD_TESTING "Build the testing tree." OFF)
cmake_dependent_option(VIKUNJA_SYSTEM_CATCH2 "Use your local installation of Catch2" ON BUILD_TESTING OFF)
cmake_dependent_option(VIKUNJA_ENABLE_CXX_TEST "Builds test that checks if the C++ standard is set correctly" OFF BUILD_TESTING OFF)
+cmake_dependent_option(VIKUNJA_ENABLE_BENCHMARKS "Enable benchmarks" OFF BUILD_TESTING OFF)
+cmake_dependent_option(VIKUNJA_ENABLE_CUDA_THRUST_BENCHMARKS "Enable benchmarks using CUDA Thrust" OFF "VIKUNJA_ENABLE_BENCHMARKS;ALPAKA_ACC_GPU_CUDA_ENABLE" OFF)
# activate support for host/device lambdas in cuda
# needs to be set before alpaka is included
diff --git a/docs/source/advanced/cmake.rst b/docs/source/advanced/cmake.rst
index 8a1a809..78baa93 100644
--- a/docs/source/advanced/cmake.rst
+++ b/docs/source/advanced/cmake.rst
@@ -20,6 +20,7 @@ Common
Testing
+++++++
+.. _cmake-test:
**BUILD_TESTING** (OFF)
.. code-block::
@@ -29,16 +30,29 @@ Testing
**VIKUNJA_SYSTEM_CATCH2** (OFF)
.. code-block::
- Only works if BUILD_TESTING is ON.
+ Requires BUILD_TESTING to be ON.
Use your local installation of Catch2.
Otherwise, it will be automatically downloaded and installed in the local build folder.
**VIKUNJA_ENABLE_CXX_TEST** (OFF)
.. code-block::
- Only works if BUILD_TESTING is ON.
+ Requires BUILD_TESTING to be ON.
Special test that checks if ALPAKA_CXX_STANDARD works correctly.
- The implementation is very compiler specific, so it is possible that the test is not supported by your used C++ compiler.
+ The implementation is very compiler specific, so it is possible that the test is not
+ supported by your C++ compiler.
+
+**VIKUNJA_ENABLE_BENCHMARKS** (OFF)
+ .. code-block::
+
+ Requires BUILD_TESTING to be ON.
+ Enables the benchmarks. The benchmarks are built automatically and can be executed via CTest.
+
+**VIKUNJA_ENABLE_CUDA_THRUST_BENCHMARKS** (OFF)
+ .. code-block::
+
+ Requires VIKUNJA_ENABLE_BENCHMARKS and ALPAKA_ACC_GPU_CUDA_ENABLE to be ON.
+ Enables Thrust benchmarks for comparison.
alpaka
++++++
@@ -64,7 +78,7 @@ The following CMake variables are provided by alpaka. This section contains only
- ALPAKA_ACC_GPU_CUDA_ENABLE
- ALPAKA_ACC_GPU_HIP_ENABLE
- Important: Not all alpaka accelerator backends are tested together with vikunja,
+ Important: Not all alpaka accelerator backends are tested together with vikunja,
see CI tests.
**ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA** (ON)
@@ -75,4 +89,4 @@ The following CMake variables are provided by alpaka. This section contains only
**ALPAKA_CUDA_EXPT_EXTENDED_LAMBDA** (ON)
.. code-block::
- Enable lambda support in Alpaka 0.7.x and later for the CUDA accelerator.
\ No newline at end of file
+ Enable lambda support in Alpaka 0.7.x and later for the CUDA accelerator.
diff --git a/docs/source/basic/algorithm.rst b/docs/source/basic/algorithm.rst
index 2054657..e81c55c 100644
--- a/docs/source/basic/algorithm.rst
+++ b/docs/source/basic/algorithm.rst
@@ -1,5 +1,5 @@
Algorithms
-=========
+==========
This page provides an overview of all algorithms implemented in vikunja.
@@ -8,7 +8,7 @@ All algorithms have the property that the order in which the input elements are
Transform
---------
-Takes a range of elements as input, applies an unary operator to each element, and writes the result to an output range in the same order.
+Takes a range of elements as input, applies an unary operator to each element, and writes the result to an output range in the same order.
.. only:: html
@@ -35,4 +35,4 @@ Takes a range of elements as input and reduces it to a single element via an ope
.. only:: latex
.. image:: images/reduction.pdf
- :alt: scheme: reduce algorithm
\ No newline at end of file
+ :alt: scheme: reduce algorithm
diff --git a/docs/source/basic/installation.rst b/docs/source/basic/installation.rst
index 4defddf..841c55a 100644
--- a/docs/source/basic/installation.rst
+++ b/docs/source/basic/installation.rst
@@ -19,8 +19,8 @@ Vikunja builds and installs itself using `CMake `_. Before y
.. code-block:: bash
git clone https://github.com/alpaka-group/vikunja.git
- mkdir vikunja/build
- cd vikunja/build
+ mkdir vikunja/build
+ cd vikunja/build
cmake ..
cmake --build .
cmake --install .
@@ -38,6 +38,8 @@ Enable and run the tests:
cmake --build .
ctest
+Read this :doc:`section ` for more information about the tests.
+
Enable and run an example:
.. code-block:: bash
diff --git a/docs/source/basic/introduction.rst b/docs/source/basic/introduction.rst
index 0af13a2..47a98af 100644
--- a/docs/source/basic/introduction.rst
+++ b/docs/source/basic/introduction.rst
@@ -7,7 +7,7 @@ The basic concept of vikunja is to run an ``algorithm`` with an ``operator`` ove
* **Transform**: Takes a range of elements as input, applies an operator to each element, and writes the result to an output range.
* **Reduce**: Takes a range of elements as input and returns a single element. The reduce operator takes two elements of the input range, applies an operation to them, and returns a single element. The operator is applied up to the point where only one element remains.
- * For more examples see: :ref:`Algorithm `
+ * For more examples see: :doc:`Algorithm `
* An ``operator`` describes an algorithm which is applied to one (unary operator) or two (binary operator) elements and returns a result. The following examples assume that **i** is the first and **j** the second input element:
* **sum**: `return i+j;`
diff --git a/docs/source/development/test.rst b/docs/source/development/test.rst
new file mode 100644
index 0000000..1e16606
--- /dev/null
+++ b/docs/source/development/test.rst
@@ -0,0 +1,59 @@
+Testing and Benchmarking
+========================
+
+Vikunja offers different types of tests. The source code is tested via unit and integration tests with `Catch2 `_. The CMake code is tested with integration tests and custom scripts.
+
+Source Code Tests
+-----------------
+
+Before you start writing source code tests, you should read the `Catch2 documentation `_. Tests written with Catch2 are standalone executables. They have their own source code files and ``CMakeLists.txt`` files located in the ``test/unit`` and ``test/integ`` folders. If you set the CMake argument ``-DBUILD_TESTING=ON``, the tests will be built automatically. All test executables are registered via the CMake function ``add_test``. Therefore, you can automatically run all tests from the build folder with the ``ctest`` command:
+
+.. code-block:: bash
+
+ mkdir build && cd build
+ cmake .. -DBUILD_TESTING=ON
+ cmake --build .
+ ctest
+
+For more CMake arguments for the tests, see the :ref:`CMake section `.
+
+If you only want to run a single test, you can run the test executable directly. All test executables are located in ``/tests``. It is also possible to run the executable with the ``--help`` flag to show additional options. For example, the ``-s`` flag displays additional information created with the Catch2 function ``INFO()``.
+
+.. code-block:: bash
+
+ mkdir build && cd build
+ cmake .. -DBUILD_TESTING=ON
+ cmake --build .
+ # display extra test options
+ test/integ/reduce/test_reduce --help
+ # run test with extra output
+ test/integ/reduce/test_reduce -s
+
+.. tip::
+
+ Each test is a CMake target that you can build separately. A test target always starts with ``test_``. To get all available test CMake targets, run ``cmake --build . -t help | grep 'test_'`` in the build folder. You can build a specific test with ``cmake --build . -t test_IndividualTestCase``.
+
+CMake Tests
+-----------
+
+The CMake integration tests check whether vikunja can be used correctly in another project via the CMake functions ``find_package()`` or ``add_subdirectory``. The CI contains test jobs which create dummy projects that use the vikunja library. The job names start with ``integration``. All associated files for the tests are in ``script/integration_test``.
+
+CXX Test
+++++++++
+
+There is a special Catch2 test that tests vikunja's build system to see if the C++ standard is set correctly. The name of the test is ``test_cxx``. It compares the C++ standard set by the compiler with an expected standard passed as an argument. By default, ``ctest`` automatically passes the expected C++ standard depending on the CMake variable ``ALPAKA_CXX_STANDARD``. If you run the test manually, you must pass it yourself:
+
+.. code-block:: bash
+
+ # expects, that the code was compiled with C++ 17
+ test/unit/cxx/test_cxx --cxx 17
+
+
+Benchmarks
+----------
+
+Vikunja uses `Catch2 benchmark `_ to automatically run benchmarks. By default, benchmarks are not enabled. To enable them, the CMake arguments ``-DBUILD_TESTING=ON -DVIKUNJA_ENABLE_BENCHMARKS=ON`` must be set. The benchmarks are created automatically and can be run with ``ctest``. As with the tests, you can run a particular benchmark directly from the executable file, e.g. ``test/benchmarks/transform/bench_vikunja_transform``. All benchmark executables are located in ``/test/benchmarks``.
+
+.. tip::
+
+ If you run `` --help``, you get benchmark specific options.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4f8635d..6dc3666 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -41,6 +41,7 @@ Generally, **follow the manual pages in-order** to get started. Individual chapt
:maxdepth: 1
:caption: Development
+ development/test.rst
development/docs.rst
development/styleguide.rst
development/ci.rst
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 319e6ef..ac73469 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -33,8 +33,14 @@ target_link_libraries(vikunjaTestSetup
PUBLIC
Catch2::Catch2
)
+if(VIKUNJA_ENABLE_BENCHMARKS)
+ target_compile_definitions(vikunjaTestSetup PRIVATE CATCH_CONFIG_ENABLE_BENCHMARKING)
+endif()
add_library(vikunja::testSetup ALIAS vikunjaTestSetup)
list(APPEND _VIKUNJA_TEST_OPTIONS "--use-colour yes")
add_subdirectory("unit/")
add_subdirectory("integ/")
+if(VIKUNJA_ENABLE_BENCHMARKS)
+ add_subdirectory("benchmarks/")
+endif()
diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt
new file mode 100644
index 0000000..5928b76
--- /dev/null
+++ b/test/benchmarks/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2022 Simeon Ehrig
+#
+# This file is part of vikunja.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+cmake_minimum_required(VERSION 3.18)
+
+add_library(vikunjaBenchSetup INTERFACE)
+target_compile_definitions(vikunjaBenchSetup INTERFACE CATCH_CONFIG_ENABLE_BENCHMARKING)
+target_include_directories(vikunjaBenchSetup INTERFACE include)
+add_library(vikunja::benchSetup ALIAS vikunjaBenchSetup)
+
+add_subdirectory("helper/")
+add_subdirectory("transform/")
+add_subdirectory("reduce/")
diff --git a/test/benchmarks/helper/CMakeLists.txt b/test/benchmarks/helper/CMakeLists.txt
new file mode 100644
index 0000000..ac2fa6d
--- /dev/null
+++ b/test/benchmarks/helper/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright 2022 Simeon Ehrig
+#
+# This file is part of vikunja.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+cmake_minimum_required(VERSION 3.18)
+
+set(_TARGET_NAME "test_bench_helper")
+
+alpaka_add_executable(
+ ${_TARGET_NAME}
+ test_bench_helper.cpp
+ )
+
+target_link_libraries(${_TARGET_NAME}
+ PRIVATE
+ vikunja::testSetup
+ vikunja::benchSetup
+ vikunja::internalvikunja
+)
+
+add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_VIKUNJA_TEST_OPTIONS})
diff --git a/test/benchmarks/helper/test_bench_helper.cpp b/test/benchmarks/helper/test_bench_helper.cpp
new file mode 100644
index 0000000..c7b4ace
--- /dev/null
+++ b/test/benchmarks/helper/test_bench_helper.cpp
@@ -0,0 +1,123 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+
+
+TEMPLATE_TEST_CASE("allocate_mem_iota compare std::iota", "[iota]", int, float, double)
+{
+ using Data = TestType;
+ using Setup = vikunja::test::TestAlpakaSetup<
+ alpaka::DimInt<1u>, // dim
+ int, // Idx
+ alpaka::AccCpuSerial, // host type
+ alpaka::ExampleDefaultAcc, // device type
+ alpaka::Blocking // queue type
+ >;
+ using Vec = alpaka::Vec;
+
+ Setup::Idx size = GENERATE(1, 10, 3045, 2'000'000);
+ Data begin = GENERATE(0, 1, 45, -42);
+
+ INFO((vikunja::test::print_acc_info(size)));
+ INFO("begin: " + std::to_string(begin));
+
+ Setup setup;
+ Vec extent = Vec::all(static_cast(size));
+
+ auto devMem = vikunja::bench::allocate_mem_iota(setup, extent, begin);
+ auto hostMem(alpaka::allocBuf(setup.devHost, extent));
+ Data* const hostMemPtr(alpaka::getPtrNative(hostMem));
+
+ alpaka::memcpy(setup.queueAcc, hostMem, devMem, extent);
+
+ std::vector expected_result(size);
+ std::iota(std::begin(expected_result), std::end(expected_result), begin);
+
+ for(Setup::Idx i = 0; i < size; ++i)
+ {
+ REQUIRE(static_cast(expected_result[i]) == hostMemPtr[i]);
+ }
+}
+
+TEMPLATE_TEST_CASE("allocate_mem_iota different increment", "[iota]", int, float, double)
+{
+ using Data = TestType;
+ using Setup = vikunja::test::TestAlpakaSetup<
+ alpaka::DimInt<1u>, // dim
+ int, // Idx
+ alpaka::AccCpuSerial, // host type
+ alpaka::ExampleDefaultAcc, // device type
+ alpaka::Blocking // queue type
+ >;
+ using Vec = alpaka::Vec;
+
+ Setup::Idx size = GENERATE(1, 10, 3045);
+ Data begin = GENERATE(0, 1, 45, -42);
+ Data increment = GENERATE(1, -1, 45, -42);
+
+ INFO((vikunja::test::print_acc_info(size)));
+ INFO("begin: " + std::to_string(begin));
+ INFO("increment: " + std::to_string(increment));
+
+ Setup setup;
+ Vec extent = Vec::all(static_cast(size));
+
+ auto devMem = vikunja::bench::allocate_mem_iota(setup, extent, begin, increment);
+ auto hostMem(alpaka::allocBuf(setup.devHost, extent));
+ Data* const hostMemPtr(alpaka::getPtrNative(hostMem));
+
+ alpaka::memcpy(setup.queueAcc, hostMem, devMem, extent);
+
+ for(Setup::Idx i = 0; i < size; ++i)
+ {
+ Data expected_result = begin + static_cast(i) * increment;
+ REQUIRE_MESSAGE(expected_result == hostMemPtr[i], "failed with index: " + std::to_string(i));
+ }
+}
+
+TEMPLATE_TEST_CASE("allocate_mem_constant", "[iota]", int, float, double)
+{
+ using Data = TestType;
+ using Setup = vikunja::test::TestAlpakaSetup<
+ alpaka::DimInt<1u>, // dim
+ int, // Idx
+ alpaka::AccCpuSerial, // host type
+ alpaka::ExampleDefaultAcc, // device type
+ alpaka::Blocking // queue type
+ >;
+ using Vec = alpaka::Vec;
+
+ Setup::Idx size = GENERATE(1, 10, 3045, 2'000'000);
+ Data constant = GENERATE(0, 1, 45, -42);
+
+ INFO((vikunja::test::print_acc_info(size)));
+ INFO("constant: " + std::to_string(constant));
+
+ Setup setup;
+ Vec extent = Vec::all(static_cast(size));
+
+ auto devMem = vikunja::bench::allocate_mem_constant(setup, extent, constant);
+ auto hostMem(alpaka::allocBuf(setup.devHost, extent));
+ Data* const hostMemPtr(alpaka::getPtrNative(hostMem));
+
+ alpaka::memcpy(setup.queueAcc, hostMem, devMem, extent);
+
+ for(Setup::Idx i = 0; i < size; ++i)
+ {
+ REQUIRE(static_cast(constant) == hostMemPtr[i]);
+ }
+}
diff --git a/test/benchmarks/include/vikunja/bench/memory.hpp b/test/benchmarks/include/vikunja/bench/memory.hpp
new file mode 100644
index 0000000..5b2a0c8
--- /dev/null
+++ b/test/benchmarks/include/vikunja/bench/memory.hpp
@@ -0,0 +1,213 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+
+#include
+#include
+
+namespace vikunja::bench
+{
+ template
+ class IotaFunctor
+ {
+ private:
+ TData const m_begin;
+ TData const m_increment;
+
+ public:
+ //! Iota functor for generic data types.
+ //!
+ //! \tparam TData Type of each element
+ //! \param init Value of the first element.
+ //! \param increment Distance between two elements.
+ IotaFunctor(TData const init, TData const increment) : m_begin(init), m_increment(increment)
+ {
+ }
+
+ //! Writes the result of `init + index * increment` to each element of the output vector.
+ //!
+ //! \tparam TAcc The accelerator environment to be executed on.
+ //! \tparam TElem The element type.
+ //! \param acc The accelerator to be executed on.
+ //! \param output The destination vector.
+ //! \param numElements The number of elements.
+ ALPAKA_NO_HOST_ACC_WARNING
+ template
+ ALPAKA_FN_ACC auto operator()(TAcc const& acc, TData* const output, TIdx const& numElements) const -> void
+ {
+ static_assert(alpaka::Dim::value == 1, "The VectorAddKernel expects 1-dimensional indices!");
+
+ TIdx const gridThreadIdx(alpaka::getIdx(acc)[0u]);
+ TIdx const threadElemExtent(alpaka::getWorkDiv(acc)[0u]);
+ TIdx const threadFirstElemIdx(gridThreadIdx * threadElemExtent);
+
+ if(threadFirstElemIdx < numElements)
+ {
+ // Calculate the number of elements for this thread.
+ // The result is uniform for all but the last thread.
+ TIdx const threadLastElemIdx(threadFirstElemIdx + threadElemExtent);
+ TIdx const threadLastElemIdxClipped(alpaka::math::min(acc, numElements, threadLastElemIdx));
+
+ for(TIdx i(threadFirstElemIdx); i < threadLastElemIdxClipped; ++i)
+ {
+ output[i] = m_begin + static_cast(i) * m_increment;
+ }
+ }
+ }
+ };
+
+
+ //! Allocates memory and initializes each value with `init + index * increment`,
+ //! where index is the position in the output vector. The allocation is done with `setup.devAcc`.
+ //!
+ //! \tparam TData Data type of the memory buffer.
+ //! \tparam TSetup Fully specialized type of `vikunja::test::TestAlpakaSetup`.
+ //! \tparam Type of the extent.
+ //! \tparam TBuf Type of the alpaka memory buffer.
+ //! \param setup Instance of `vikunja::test::TestAlpakaSetup`. `setup.devAcc` and `setup.queueDev` are used
+ //! for allocation and initialization of the the memory.
+ //! \param extent Size of the memory buffer. Needs to be 1 dimensional.
+ //! \param init Value of the first element. Depending on TData, it can be negative.
+ //! \param increment Distance between two elements of the vector. If the value is negative, the value of an
+ //! element is greater than its previous element.
+ template<
+ typename TData,
+ typename TSetup,
+ typename TExtent,
+ typename TBuf = alpaka::Buf, typename TSetup::Idx>>
+ TBuf allocate_mem_iota(
+ TSetup& setup,
+ TExtent const& extent,
+ TData const init = TData{0},
+ TData const increment = TData{1})
+ {
+ // TODO: test also 2 and 3 dimensional memory
+ static_assert(TExtent::Dim::value == 1);
+
+ // TODO: optimize utilization for CPU backends
+ typename TSetup::Idx const elementsPerThread = 1;
+ typename TSetup::Idx linSize = extent.prod();
+
+ TBuf devMem(alpaka::allocBuf(setup.devAcc, extent));
+
+ alpaka::WorkDivMembers const workDiv(
+ alpaka::getValidWorkDiv(
+ setup.devAcc,
+ extent,
+ elementsPerThread,
+ false,
+ alpaka::GridBlockExtentSubDivRestrictions::Unrestricted));
+
+ IotaFunctor iotaFunctor(init, increment);
+
+ alpaka::exec(
+ setup.queueAcc,
+ workDiv,
+ iotaFunctor,
+ alpaka::getPtrNative(devMem),
+ linSize);
+
+ return devMem;
+ }
+
+ template
+ class ConstantInitFunctor
+ {
+ private:
+ TData const m_constant;
+
+ public:
+ //! Functor to write a constant value into each element of a vector.
+ //!
+ //! \tparam TData Type of each element
+ //! \param constant Value to which all elements are set.
+ ConstantInitFunctor(TData const constant) : m_constant(constant)
+ {
+ }
+
+ //! Writes the constant to each element of the output vector.
+ //!
+ //! \tparam TAcc The accelerator environment to be executed on.
+ //! \tparam TElem The element type.
+ //! \param acc The accelerator to be executed on.
+ //! \param output The destination vector.
+ //! \param numElements The number of elements.
+ ALPAKA_NO_HOST_ACC_WARNING
+ template
+ ALPAKA_FN_ACC auto operator()(TAcc const& acc, TData* const output, TIdx const& numElements) const -> void
+ {
+ static_assert(alpaka::Dim::value == 1, "The VectorAddKernel expects 1-dimensional indices!");
+
+ TIdx const gridThreadIdx(alpaka::getIdx(acc)[0u]);
+ TIdx const threadElemExtent(alpaka::getWorkDiv(acc)[0u]);
+ TIdx const threadFirstElemIdx(gridThreadIdx * threadElemExtent);
+
+ if(threadFirstElemIdx < numElements)
+ {
+ // Calculate the number of elements for this thread.
+ // The result is uniform for all but the last thread.
+ TIdx const threadLastElemIdx(threadFirstElemIdx + threadElemExtent);
+ TIdx const threadLastElemIdxClipped(alpaka::math::min(acc, numElements, threadLastElemIdx));
+
+ for(TIdx i(threadFirstElemIdx); i < threadLastElemIdxClipped; ++i)
+ {
+ output[i] = m_constant;
+ }
+ }
+ }
+ };
+
+ //! Allocates memory and initializes each value with a constant value.
+ //! The allocation is done with `setup.devAcc`.
+ //!
+ //! \tparam TData Data type of the memory buffer.
+ //! \tparam TSetup Fully specialized type of `vikunja::test::TestAlpakaSetup`.
+ //! \tparam Type of the extent.
+ //! \tparam TBuf Type of the alpaka memory buffer.
+ //! \param setup Instance of `vikunja::test::TestAlpakaSetup`. `setup.devAcc` and `setup.queueDev` are used
+ //! for allocation and initialization of the the memory.
+ //! \param extent Size of the memory buffer. Needs to be 1 dimensional.
+ //! \param constant Value of the constant.
+ template<
+ typename TData,
+ typename TSetup,
+ typename TExtent,
+ typename TBuf = alpaka::Buf, typename TSetup::Idx>>
+ TBuf allocate_mem_constant(TSetup& setup, TExtent const& extent, TData const constant)
+ {
+ // TODO: test also 2 and 3 dimensional memory
+ static_assert(TExtent::Dim::value == 1);
+
+ // TODO: optimize utilization for CPU backends
+ typename TSetup::Idx const elementsPerThread = 1;
+ typename TSetup::Idx linSize = extent.prod();
+
+ TBuf devMem(alpaka::allocBuf(setup.devAcc, extent));
+
+ alpaka::WorkDivMembers const workDiv(
+ alpaka::getValidWorkDiv(
+ setup.devAcc,
+ extent,
+ elementsPerThread,
+ false,
+ alpaka::GridBlockExtentSubDivRestrictions::Unrestricted));
+
+ ConstantInitFunctor constantInitFunctor(constant);
+
+ alpaka::exec(
+ setup.queueAcc,
+ workDiv,
+ constantInitFunctor,
+ alpaka::getPtrNative(devMem),
+ linSize);
+
+ return devMem;
+ }
+} // namespace vikunja::bench
diff --git a/test/benchmarks/reduce/CMakeLists.txt b/test/benchmarks/reduce/CMakeLists.txt
new file mode 100644
index 0000000..c6a882e
--- /dev/null
+++ b/test/benchmarks/reduce/CMakeLists.txt
@@ -0,0 +1,47 @@
+# Copyright 2022 Simeon Ehrig
+#
+# This file is part of vikunja.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+cmake_minimum_required(VERSION 3.18)
+
+set(_TARGET_NAME_VIKUNJA_REDUCE "bench_vikunja_reduce")
+
+alpaka_add_executable(
+ ${_TARGET_NAME_VIKUNJA_REDUCE}
+ bench_vikunja_reduce.cpp
+ )
+
+target_link_libraries(${_TARGET_NAME_VIKUNJA_REDUCE}
+ PRIVATE
+ vikunja::testSetup
+ vikunja::benchSetup
+ vikunja::internalvikunja
+)
+
+add_test(NAME ${_TARGET_NAME_VIKUNJA_REDUCE} COMMAND ${_TARGET_NAME_VIKUNJA_REDUCE} ${_VIKUNJA_TEST_OPTIONS})
+# avoid running the benchmarks in parallel
+set_tests_properties(${_TARGET_NAME_VIKUNJA_REDUCE} PROPERTIES RUN_SERIAL TRUE)
+
+
+if(VIKUNJA_ENABLE_CUDA_THRUST_BENCHMARKS)
+ set(_TARGET_NAME_THRUST_REDUCE "bench_thrust_reduce")
+
+ alpaka_add_executable(
+ ${_TARGET_NAME_THRUST_REDUCE}
+ bench_thrust_reduce.cpp
+ )
+
+ target_link_libraries(${_TARGET_NAME_THRUST_REDUCE}
+ PRIVATE
+ vikunja::testSetup
+ vikunja::benchSetup
+ vikunja::internalvikunja
+ )
+
+ add_test(NAME ${_TARGET_NAME_THRUST_REDUCE} COMMAND ${_TARGET_NAME_THRUST_REDUCE} ${_VIKUNJA_TEST_OPTIONS})
+ set_tests_properties(${_TARGET_NAME_THRUST_REDUCE} PROPERTIES RUN_SERIAL TRUE)
+endif()
diff --git a/test/benchmarks/reduce/bench_thrust_reduce.cpp b/test/benchmarks/reduce/bench_thrust_reduce.cpp
new file mode 100644
index 0000000..463ace0
--- /dev/null
+++ b/test/benchmarks/reduce/bench_thrust_reduce.cpp
@@ -0,0 +1,61 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+#include
+
+#include
+#include
+
+template
+inline void reduce_benchmark(int size)
+{
+ std::vector hostMemInput(size);
+ for(int i = 0; i < size; ++i)
+ {
+ hostMemInput[i] = static_cast(i) + static_cast(1);
+ }
+
+ thrust::device_vector devMemInput(hostMemInput);
+
+ TData result = thrust::reduce(devMemInput.begin(), devMemInput.end(), static_cast(0));
+
+ TData expected_result = (static_cast(size) * (static_cast(size) + 1)) / static_cast(2);
+ // verify, that vikunja reduce is working with problem size
+ REQUIRE(expected_result == Approx(result));
+
+ // honeypot to check that the function call in the benchmark block has not been removed by the optimizer
+ result = static_cast(0);
+
+ BENCHMARK("reduce thrust")
+ {
+ return result = thrust::reduce(devMemInput.begin(), devMemInput.end(), static_cast(0));
+ };
+
+ REQUIRE(expected_result == Approx(result));
+}
+
+TEMPLATE_TEST_CASE("bechmark reduce", "[benchmark][reduce][thrust]", int, float, double)
+{
+ using Data = TestType;
+
+ if constexpr(std::is_same_v)
+ {
+ reduce_benchmark(GENERATE(100, 100'000, 1'270'000, 1'600'000));
+ }
+ else if constexpr(std::is_same_v)
+ {
+ // removed 1'270'000 because of rounding errors.
+ reduce_benchmark(GENERATE(100, 100'000, 2'000'000));
+ }
+ else if constexpr(std::is_same_v)
+ {
+ reduce_benchmark(GENERATE(100, 100'000, 1'270'000, 2'000'000));
+ }
+}
diff --git a/test/benchmarks/reduce/bench_vikunja_reduce.cpp b/test/benchmarks/reduce/bench_vikunja_reduce.cpp
new file mode 100644
index 0000000..2fb4913
--- /dev/null
+++ b/test/benchmarks/reduce/bench_vikunja_reduce.cpp
@@ -0,0 +1,101 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+
+template
+inline void reduce_benchmark(TIdx size)
+{
+ using Setup = vikunja::test::TestAlpakaSetup<
+ alpaka::DimInt<1u>, // dim
+ TIdx, // Idx
+ alpaka::AccCpuSerial, // host type
+ alpaka::ExampleDefaultAcc, // device type
+ alpaka::Blocking // queue type
+ >;
+ using Vec = alpaka::Vec;
+
+ INFO((vikunja::test::print_acc_info(size)));
+
+ Setup setup;
+ Vec extent = Vec::all(static_cast(size));
+
+ auto devMemInput = vikunja::bench::allocate_mem_iota(
+ setup,
+ extent,
+ static_cast(1), // first value
+ static_cast(1) // increment
+ );
+ TData* devMemInputPtrBegin = alpaka::getPtrNative(devMemInput);
+ TData* devMemInputPtrEnd = devMemInputPtrBegin + size;
+
+ auto devMemOutput = alpaka::allocBuf(setup.devAcc, extent);
+ TData* devMemOutputPtrBegin = alpaka::getPtrNative(devMemOutput);
+
+ auto functor = [] ALPAKA_FN_HOST_ACC(TData const i, TData const j) -> TData { return i + j; };
+
+ TData result = vikunja::reduce::deviceReduce(
+ setup.devAcc,
+ setup.devHost,
+ setup.queueAcc,
+ devMemInputPtrBegin,
+ devMemInputPtrEnd,
+ functor);
+
+ TData expected_result = (extent.prod() * (extent.prod() + static_cast(1)) / static_cast(2));
+
+ // verify, that vikunja reduce is working with problem size
+ REQUIRE(expected_result == Approx(result));
+
+ // honeypot to check that the function call in the benchmark block has not been removed by the optimizer
+ result = static_cast(0);
+
+ BENCHMARK("reduce vikunja")
+ {
+ return result = vikunja::reduce::deviceReduce(
+ setup.devAcc,
+ setup.devHost,
+ setup.queueAcc,
+ devMemInputPtrBegin,
+ devMemInputPtrEnd,
+ functor);
+ };
+
+ REQUIRE(expected_result == Approx(result));
+}
+
+TEMPLATE_TEST_CASE("bechmark reduce", "[benchmark][reduce][vikunja]", int, float, double)
+{
+ using Data = TestType;
+ using Idx = std::uint64_t;
+
+ if constexpr(std::is_same_v)
+ {
+ reduce_benchmark(GENERATE(100, 100'000, 1'270'000, 1'600'000));
+ }
+ else if constexpr(std::is_same_v)
+ {
+ // removed 1'270'000 because of precision errors.
+ reduce_benchmark(GENERATE(100, 100'000, 2'000'000));
+ }
+ else if constexpr(std::is_same_v)
+ {
+ reduce_benchmark(GENERATE(100, 100'000, 1'270'000, 2'000'000));
+ }
+}
diff --git a/test/benchmarks/transform/CMakeLists.txt b/test/benchmarks/transform/CMakeLists.txt
new file mode 100644
index 0000000..5693714
--- /dev/null
+++ b/test/benchmarks/transform/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Copyright 2022 Simeon Ehrig
+#
+# This file is part of vikunja.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+cmake_minimum_required(VERSION 3.18)
+
+set(_TARGET_NAME_VIKUNJA_TRANSFORM "bench_vikunja_transform")
+
+alpaka_add_executable(
+ ${_TARGET_NAME_VIKUNJA_TRANSFORM}
+ bench_vikunja_transform.cpp
+ )
+
+target_link_libraries(${_TARGET_NAME_VIKUNJA_TRANSFORM}
+ PRIVATE
+ vikunja::testSetup
+ vikunja::benchSetup
+ vikunja::internalvikunja
+)
+
+add_test(NAME ${_TARGET_NAME_VIKUNJA_TRANSFORM} COMMAND ${_TARGET_NAME_VIKUNJA_TRANSFORM} ${_VIKUNJA_TEST_OPTIONS})
+# avoid running the benchmarks in parallel
+set_tests_properties(${_TARGET_NAME_VIKUNJA_TRANSFORM} PROPERTIES RUN_SERIAL TRUE)
+
+if(VIKUNJA_ENABLE_CUDA_THRUST_BENCHMARKS)
+ set(_TARGET_NAME_THRUST_TRANSFORM "bench_thrust_transform")
+
+ alpaka_add_executable(
+ ${_TARGET_NAME_THRUST_TRANSFORM}
+ bench_thrust_transform.cpp
+ )
+
+ target_link_libraries(${_TARGET_NAME_THRUST_TRANSFORM}
+ PRIVATE
+ vikunja::testSetup
+ vikunja::benchSetup
+ vikunja::internalvikunja
+ )
+
+ add_test(NAME ${_TARGET_NAME_THRUST_TRANSFORM} COMMAND ${_TARGET_NAME_THRUST_TRANSFORM} ${_VIKUNJA_TEST_OPTIONS})
+ set_tests_properties(${_TARGET_NAME_THRUST_TRANSFORM} PROPERTIES RUN_SERIAL TRUE)
+endif()
diff --git a/test/benchmarks/transform/bench_thrust_transform.cpp b/test/benchmarks/transform/bench_thrust_transform.cpp
new file mode 100644
index 0000000..ed86842
--- /dev/null
+++ b/test/benchmarks/transform/bench_thrust_transform.cpp
@@ -0,0 +1,56 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+
+#include
+#include
+
+template
+inline void transform_benchmark(int size)
+{
+ std::vector hostMemInput(size);
+ for(int i = 0; i < size; ++i)
+ {
+ hostMemInput[i] = static_cast(i) + static_cast(1);
+ }
+
+ thrust::device_vector devMemInput(hostMemInput);
+ thrust::device_vector devMemOutput(size);
+
+ auto functor = [] __device__(TData const i) -> TData { return 2 * i; };
+ thrust::transform(devMemInput.begin(), devMemInput.end(), devMemOutput.begin(), functor);
+
+ std::vector hostMemOutput(size);
+ thrust::copy(devMemOutput.begin(), devMemOutput.end(), hostMemOutput.begin());
+
+ for(int i = 0; i < size; ++i)
+ {
+ TData expected_result = static_cast(2) * static_cast(i + 1);
+ REQUIRE(expected_result == Approx(hostMemOutput[i]));
+ }
+ // honeypot to check that the function call in the benchmark block has not been removed by the optimizer
+ hostMemOutput[0] = static_cast(42);
+
+ BENCHMARK("transform thrust")
+ {
+ return thrust::transform(devMemInput.begin(), devMemInput.end(), devMemOutput.begin(), functor);
+ };
+
+ thrust::copy(devMemOutput.begin(), devMemOutput.end(), hostMemOutput.begin());
+
+ REQUIRE(static_cast(2) == Approx(hostMemOutput[0]));
+}
+
+TEMPLATE_TEST_CASE("bechmark transform", "[benchmark][thrust][vikunja]", int, float, double)
+{
+ using Data = TestType;
+
+ transform_benchmark(GENERATE(100, 100'000, 1'270'000, 2'000'000));
+}
diff --git a/test/benchmarks/transform/bench_vikunja_transform.cpp b/test/benchmarks/transform/bench_vikunja_transform.cpp
new file mode 100644
index 0000000..2c71e3b
--- /dev/null
+++ b/test/benchmarks/transform/bench_vikunja_transform.cpp
@@ -0,0 +1,96 @@
+/* Copyright 2022 Simeon Ehrig
+ *
+ * This file is part of vikunja.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+
+template
+inline void transform_benchmark(TIdx size)
+{
+ using Setup = vikunja::test::TestAlpakaSetup<
+ alpaka::DimInt<1u>, // dim
+ TIdx, // Idx
+ alpaka::AccCpuSerial, // host type
+ alpaka::ExampleDefaultAcc, // device type
+ alpaka::Blocking // queue type
+ >;
+ using Vec = alpaka::Vec;
+
+ INFO((vikunja::test::print_acc_info(size)));
+
+ Setup setup;
+ Vec extent = Vec::all(static_cast(size));
+
+ auto devMemInput = vikunja::bench::allocate_mem_iota(
+ setup,
+ extent,
+ static_cast(1), // first value
+ static_cast(1) // increment
+ );
+ TData* devMemInputPtrBegin = alpaka::getPtrNative(devMemInput);
+ TData* devMemInputPtrEnd = devMemInputPtrBegin + size;
+
+ auto devMemOutput = alpaka::allocBuf(setup.devAcc, extent);
+ TData* devMemOutputPtrBegin = alpaka::getPtrNative(devMemOutput);
+
+ auto hostMemOutput = alpaka::allocBuf(setup.devHost, extent);
+ TData* hostMemOutputPtrBegin = alpaka::getPtrNative(hostMemOutput);
+
+ auto functor = [] ALPAKA_FN_HOST_ACC(TData const i) -> TData { return 2 * i; };
+
+ vikunja::transform::deviceTransform(
+ setup.devAcc,
+ setup.queueAcc,
+ devMemInputPtrBegin,
+ devMemInputPtrEnd,
+ devMemOutputPtrBegin,
+ functor);
+
+ alpaka::memcpy(setup.queueAcc, hostMemOutput, devMemOutput, extent);
+
+ for(auto i = static_cast(0); i < size; ++i)
+ {
+ TData expected_result = static_cast(2) * static_cast(i + 1);
+ REQUIRE(expected_result == Approx(hostMemOutputPtrBegin[i]));
+ }
+
+ // honeypot to check that the function call in the benchmark block has not been removed by the optimizer
+ hostMemOutputPtrBegin[0] = static_cast(42);
+
+
+ BENCHMARK("transform vikunja")
+ {
+ return vikunja::transform::deviceTransform(
+ setup.devAcc,
+ setup.queueAcc,
+ devMemInputPtrBegin,
+ devMemInputPtrEnd,
+ devMemOutputPtrBegin,
+ functor);
+ };
+
+ alpaka::memcpy(setup.queueAcc, hostMemOutput, devMemOutput, extent);
+
+ REQUIRE(static_cast(2) == Approx(hostMemOutputPtrBegin[0]));
+}
+
+TEMPLATE_TEST_CASE("bechmark transform", "[benchmark][transform][vikunja]", int, float, double)
+{
+ using Data = TestType;
+ using Idx = std::uint64_t;
+
+ transform_benchmark(GENERATE(100, 100'000, 1'270'000, 2'000'000));
+}
diff --git a/test/include/vikunja/test/utility.hpp b/test/include/vikunja/test/utility.hpp
index b9baaf1..d375cbd 100644
--- a/test/include/vikunja/test/utility.hpp
+++ b/test/include/vikunja/test/utility.hpp
@@ -16,6 +16,13 @@
#include
+#define REQUIRE_MESSAGE(cond, msg) \
+ do \
+ { \
+ INFO(msg); \
+ REQUIRE(cond); \
+ } while((void) 0, 0)
+
namespace vikunja
{
namespace test