Skip to content

Commit

Permalink
add benchmark helper function allocate_mem_constant()
Browse files Browse the repository at this point in the history
  • Loading branch information
SimeonEhrig committed Jan 25, 2022
1 parent 7e1a2e0 commit 1ae302e
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
33 changes: 33 additions & 0 deletions test/benchmarks/helper/test_bench_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,36 @@ TEMPLATE_TEST_CASE("allocate_mem_iota different increment", "[iota]", int, float
REQUIRE_MESSAGE(expected_result == hostMemPtr[i], "failed with index: " + std::to_string(i));
}
}

TEMPLATE_TEST_CASE("allocate_mem_constant", "[iota]", int, float, double)
{
using Data = TestType;
using Setup = vikunja::test::TestAlpakaSetup<
alpaka::DimInt<1u>, // dim
int, // Idx
alpaka::AccCpuSerial, // host type
alpaka::ExampleDefaultAcc, // device type
alpaka::Blocking // queue type
>;
using Vec = alpaka::Vec<Setup::Dim, Setup::Idx>;

Setup::Idx size = GENERATE(1, 10, 3045, 2'000'000);
Data constant = GENERATE(0, 1, 45, -42);

INFO((vikunja::test::print_acc_info<Setup::Dim>(size)));
INFO("constant: " + std::to_string(constant));

Setup setup;
Vec extent = Vec::all(static_cast<Setup::Idx>(size));

auto devMem = vikunja::bench::allocate_mem_constant<Data>(setup, extent, constant);
auto hostMem(alpaka::allocBuf<Data, typename Setup::Idx>(setup.devHost, extent));
Data* const hostMemPtr(alpaka::getPtrNative(hostMem));

alpaka::memcpy(setup.queueAcc, hostMem, devMem, extent);

for(Setup::Idx i = 0; i < size; ++i)
{
REQUIRE(static_cast<Data>(constant) == hostMemPtr[i]);
}
}
94 changes: 94 additions & 0 deletions test/benchmarks/include/vikunja/bench/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,98 @@ namespace vikunja::bench

return devMem;
}

template<typename TData>
class ConstantInitFunctor
{
private:
TData const m_constant;

public:
//! Functor to write constant value in each element of a vector.
//!
//! \tparam TData Type of each element
//! \param begin Value of all elements.
ConstantInitFunctor(TData const constant) : m_constant(constant)
{
}

//! Writes the constant to each element of the output vector.
//!
//! \tparam TAcc The accelerator environment to be executed on.
//! \tparam TElem The element type.
//! \param acc The accelerator to be executed on.
//! \param output The destination vector.
//! \param numElements The number of elements.
ALPAKA_NO_HOST_ACC_WARNING
template<typename TAcc, typename TIdx>
ALPAKA_FN_ACC auto operator()(TAcc const& acc, TData* const output, TIdx const& numElements) const -> void
{
static_assert(alpaka::Dim<TAcc>::value == 1, "The VectorAddKernel expects 1-dimensional indices!");

TIdx const gridThreadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
TIdx const threadElemExtent(alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]);
TIdx const threadFirstElemIdx(gridThreadIdx * threadElemExtent);

if(threadFirstElemIdx < numElements)
{
// Calculate the number of elements for this thread.
// The result is uniform for all but the last thread.
TIdx const threadLastElemIdx(threadFirstElemIdx + threadElemExtent);
TIdx const threadLastElemIdxClipped(alpaka::math::min(acc, numElements, threadLastElemIdx));

for(TIdx i(threadFirstElemIdx); i < threadLastElemIdxClipped; ++i)
{
output[i] = m_constant;
}
}
}
};

//! Allocates memory and initialises each value with a constant value.
//! The allocation is done with `setup.devAcc`.
//!
//! \tparam TData Data type of the memory buffer.
//! \tparam TSetup Fully specialized type of `vikunja::test::TestAlpakaSetup`.
//! \tparam Type of the extent.
//! \tparam TBuf Type of the alpaka memory buffer.
//! \param setup Instance of `vikunja::test::TestAlpakaSetup`. The `setup.devAcc` and `setup.queueDev` is used
//! for allocation and initialization of the the memory.
//! \param extent Size of the memory buffer. Needs to be 1 dimensional.
//! \param begin Value of the constant.
template<
typename TData,
typename TSetup,
typename TExtent,
typename TBuf = alpaka::Buf<typename TSetup::DevAcc, TData, alpaka::DimInt<1u>, typename TSetup::Idx>>
TBuf allocate_mem_constant(TSetup& setup, TExtent const& extent, TData const constant)
{
// TODO: test also 2 and 3 dimensional memory
static_assert(TExtent::Dim::value == 1);

// TODO: optimize utilization for CPU backends
typename TSetup::Idx const elementsPerThread = 1;
typename TSetup::Idx linSize = extent.prod();

TBuf devMem(alpaka::allocBuf<TData, typename TSetup::Idx>(setup.devAcc, extent));

alpaka::WorkDivMembers<typename TSetup::Dim, typename TSetup::Idx> const workDiv(
alpaka::getValidWorkDiv<typename TSetup::Acc>(
setup.devAcc,
extent,
elementsPerThread,
false,
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted));

ConstantInitFunctor constantInitFunctor(constant);

alpaka::exec<typename TSetup::Acc>(
setup.queueAcc,
workDiv,
constantInitFunctor,
alpaka::getPtrNative(devMem),
linSize);

return devMem;
}
} // namespace vikunja::bench

0 comments on commit 1ae302e

Please sign in to comment.