Skip to content

Commit

Permalink
Merge pull request #1558 from evoskuil/master
Browse files Browse the repository at this point in the history
Enable vector parse and vector k-addition.
  • Loading branch information
evoskuil authored Dec 4, 2024
2 parents 4a0fb22 + 9aa6b0d commit f464f46
Show file tree
Hide file tree
Showing 13 changed files with 237 additions and 202 deletions.
12 changes: 6 additions & 6 deletions include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,25 @@ template <size_t Word, size_t Lanes>
INLINE auto CLASS::
pack(const xblock_t<Lanes>& xblock) NOEXCEPT
{
using xword = to_extended<word_t, Lanes>;
using xword_t = to_extended<word_t, Lanes>;

if constexpr (Lanes == 2)
{
return byteswap<word_t>(set<xword>(
return byteswap<word_t>(set<xword_t>(
xblock[0][Word],
xblock[1][Word]));
}
else if constexpr (Lanes == 4)
{
return byteswap<word_t>(set<xword>(
return byteswap<word_t>(set<xword_t>(
xblock[0][Word],
xblock[1][Word],
xblock[2][Word],
xblock[3][Word]));
}
else if constexpr (Lanes == 8)
{
return byteswap<word_t>(set<xword>(
return byteswap<word_t>(set<xword_t>(
xblock[0][Word],
xblock[1][Word],
xblock[2][Word],
Expand All @@ -68,7 +68,7 @@ pack(const xblock_t<Lanes>& xblock) NOEXCEPT
}
else if constexpr (Lanes == 16)
{
return byteswap<word_t>(set<xword>(
return byteswap<word_t>(set<xword_t>(
xblock[ 0][Word],
xblock[ 1][Word],
xblock[ 2][Word],
Expand Down Expand Up @@ -181,7 +181,7 @@ vector_schedule_sequential_compress(state_t& state, iblocks_t& blocks) NOEXCEPT
constexpr auto lanes = capacity<xWord, word_t>;
static_assert(is_valid_lanes<lanes>);

if constexpr (have<xWord>())
if constexpr (have<xWord>)
{
if (blocks.size() >= lanes)
{
Expand Down
45 changes: 20 additions & 25 deletions include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -50,43 +50,38 @@ template<size_t Round, typename xWord>
INLINE void CLASS::
vector_konstant(wbuffer_t<xWord>& wbuffer) NOEXCEPT
{
constexpr auto r = Round;
constexpr auto s = SHA::word_bits;
constexpr auto lanes = capacity<xWord, word_t>;
constexpr auto r = Round * lanes;

if constexpr (lanes == 2)
if constexpr (lanes == 16)
{
wbuffer[Round] = f::add<s>(wbuffer[Round], set<xWord>(
K::get[r + 1], K::get[r + 0]));
}
else if constexpr (lanes == 4)
{
wbuffer[Round] = f::add<s>(wbuffer[Round], set<xWord>(
K::get[r + 3], K::get[r + 2], K::get[r + 1], K::get[r + 0]));
K::get[r + 0], K::get[r + 1], K::get[r + 2], K::get[r + 3],
K::get[r + 4], K::get[r + 5], K::get[r + 6], K::get[r + 7],
K::get[r + 8], K::get[r + 9], K::get[r + 10], K::get[r + 11],
K::get[r + 12], K::get[r + 13], K::get[r + 14], K::get[r + 15]));
}
else if constexpr (lanes == 8)
{
wbuffer[Round] = f::add<s>(wbuffer[Round], set<xWord>(
K::get[r + 7], K::get[r + 6], K::get[r + 5], K::get[r + 4],
K::get[r + 3], K::get[r + 2], K::get[r + 1], K::get[r + 0]));
K::get[r + 0], K::get[r + 1], K::get[r + 2], K::get[r + 3],
K::get[r + 4], K::get[r + 5], K::get[r + 6], K::get[r + 7]));
}
else if constexpr (lanes == 16)
else if constexpr (lanes == 4)
{
wbuffer[Round] = f::add<s>(wbuffer[Round], set<xWord>(
K::get[r + 15], K::get[r + 14], K::get[r + 13], K::get[r + 12],
K::get[r + 11], K::get[r + 10], K::get[r + 9], K::get[r + 8],
K::get[r + 7], K::get[r + 6], K::get[r + 5], K::get[r + 4],
K::get[r + 3], K::get[r + 2], K::get[r + 1], K::get[r + 0]));
K::get[r + 0], K::get[r + 1], K::get[r + 2], K::get[r + 3]));
}
}

TEMPLATE
void CLASS::
vector_konstant(buffer_t& buffer) NOEXCEPT
{
if constexpr (use_x512)
if constexpr (have_lanes<word_t, 16>)
{
auto& wbuffer = array_cast<xint512_t>(buffer);
auto& wbuffer = array_cast<to_extended<word_t, 16>>(buffer);
vector_konstant<0>(wbuffer);
vector_konstant<1>(wbuffer);
vector_konstant<2>(wbuffer);
Expand All @@ -97,9 +92,9 @@ vector_konstant(buffer_t& buffer) NOEXCEPT
vector_konstant<4>(wbuffer);
}
}
else if constexpr (use_x256)
else if constexpr (have_lanes<word_t, 8>)
{
auto& wbuffer = array_cast<xint256_t>(buffer);
auto& wbuffer = array_cast<to_extended<word_t, 8>>(buffer);
vector_konstant<0>(wbuffer);
vector_konstant<1>(wbuffer);
vector_konstant<2>(wbuffer);
Expand All @@ -115,9 +110,9 @@ vector_konstant(buffer_t& buffer) NOEXCEPT
vector_konstant<9>(wbuffer);
}
}
else if constexpr (use_x128)
else if constexpr (have_lanes<word_t, 4>)
{
auto& wbuffer = array_cast<xint128_t>(buffer);
auto& wbuffer = array_cast<to_extended<word_t, 4>>(buffer);
vector_konstant<0>(wbuffer);
vector_konstant<1>(wbuffer);
vector_konstant<2>(wbuffer);
Expand Down Expand Up @@ -262,10 +257,10 @@ konstant(buffer_t& buffer) NOEXCEPT
{
konstant_(buffer);
}
////else if constexpr (vector)
////{
//// vector_konstant(buffer);
////}
else if constexpr (vector && !with_clang)
{
vector_konstant(buffer);
}
else
{
konstant_(buffer);
Expand Down
2 changes: 1 addition & 1 deletion include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT
constexpr auto lanes = capacity<xWord, word_t>;
static_assert(is_valid_lanes<lanes>);

if constexpr (have<xWord>())
if constexpr (have<xWord>)
{
if (blocks.size() >= lanes)
{
Expand Down
184 changes: 133 additions & 51 deletions include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,51 @@ input(buffer_t& buffer, const block_t& block) NOEXCEPT
}
else if constexpr (bc::is_little_endian)
{
// TODO: evaluate 4/8/16 lane optimization using byteswap.
const auto& in = array_cast<word_t>(block);
buffer[0] = native_from_big_end(in[0]);
buffer[1] = native_from_big_end(in[1]);
buffer[2] = native_from_big_end(in[2]);
buffer[3] = native_from_big_end(in[3]);
buffer[4] = native_from_big_end(in[4]);
buffer[5] = native_from_big_end(in[5]);
buffer[6] = native_from_big_end(in[6]);
buffer[7] = native_from_big_end(in[7]);
buffer[8] = native_from_big_end(in[8]);
buffer[9] = native_from_big_end(in[9]);
buffer[10] = native_from_big_end(in[10]);
buffer[11] = native_from_big_end(in[11]);
buffer[12] = native_from_big_end(in[12]);
buffer[13] = native_from_big_end(in[13]);
buffer[14] = native_from_big_end(in[14]);
buffer[15] = native_from_big_end(in[15]);
if constexpr (have_lanes<word_t, 16> && !with_clang)
{
using xword_t = to_extended<word_t, 16>;
const auto& in = array_cast<xword_t>(block);
auto& out = array_cast<xword_t>(buffer);
out[0] = byteswap<word_t>(in[0]);
}
else if constexpr (have_lanes<word_t, 8> && !with_clang)
{
using xword_t = to_extended<word_t, 8>;
const auto& in = array_cast<xword_t>(block);
auto& out = array_cast<xword_t>(buffer);
out[0] = byteswap<word_t>(in[0]);
out[1] = byteswap<word_t>(in[1]);
}
else if constexpr (have_lanes<word_t, 4> && !with_clang)
{
using xword_t = to_extended<word_t, 4>;
const auto& in = array_cast<xword_t>(block);
auto& out = array_cast<xword_t>(buffer);
out[0] = byteswap<word_t>(in[0]);
out[1] = byteswap<word_t>(in[1]);
out[2] = byteswap<word_t>(in[2]);
out[3] = byteswap<word_t>(in[3]);
}
else
{
const auto& in = array_cast<word_t>(block);
buffer[0] = native_from_big_end(in[0]);
buffer[1] = native_from_big_end(in[1]);
buffer[2] = native_from_big_end(in[2]);
buffer[3] = native_from_big_end(in[3]);
buffer[4] = native_from_big_end(in[4]);
buffer[5] = native_from_big_end(in[5]);
buffer[6] = native_from_big_end(in[6]);
buffer[7] = native_from_big_end(in[7]);
buffer[8] = native_from_big_end(in[8]);
buffer[9] = native_from_big_end(in[9]);
buffer[10] = native_from_big_end(in[10]);
buffer[11] = native_from_big_end(in[11]);
buffer[12] = native_from_big_end(in[12]);
buffer[13] = native_from_big_end(in[13]);
buffer[14] = native_from_big_end(in[14]);
buffer[15] = native_from_big_end(in[15]);
}
}
else
{
Expand All @@ -92,7 +119,6 @@ input_left(buffer_t& buffer, const half_t& half) NOEXCEPT

if (std::is_constant_evaluated())
{
// TODO: evaluate 4/8 lane optimization using byteswap.
constexpr auto size = SHA::word_bytes;
from_big<0 * size>(buffer.at(0), half);
from_big<1 * size>(buffer.at(1), half);
Expand All @@ -105,15 +131,33 @@ input_left(buffer_t& buffer, const half_t& half) NOEXCEPT
}
else if constexpr (bc::is_little_endian)
{
const auto& in = array_cast<word>(half);
buffer[0] = native_from_big_end(in[0]);
buffer[1] = native_from_big_end(in[1]);
buffer[2] = native_from_big_end(in[2]);
buffer[3] = native_from_big_end(in[3]);
buffer[4] = native_from_big_end(in[4]);
buffer[5] = native_from_big_end(in[5]);
buffer[6] = native_from_big_end(in[6]);
buffer[7] = native_from_big_end(in[7]);
if constexpr (have_lanes<word_t, 8> && !with_clang)
{
using xword_t = to_extended<word_t, 8>;
const auto& in = array_cast<xword_t>(half);
auto& out = array_cast<xword_t>(buffer);
out[0] = byteswap<word_t>(in[0]);
}
else if constexpr (have_lanes<word_t, 4> && !with_clang)
{
using xword_t = to_extended<word_t, 4>;
const auto& in = array_cast<xword_t>(half);
auto& out = array_cast<xword_t>(buffer);
out[0] = byteswap<word_t>(in[0]);
out[1] = byteswap<word_t>(in[1]);
}
else
{
const auto& in = array_cast<word>(half);
buffer[0] = native_from_big_end(in[0]);
buffer[1] = native_from_big_end(in[1]);
buffer[2] = native_from_big_end(in[2]);
buffer[3] = native_from_big_end(in[3]);
buffer[4] = native_from_big_end(in[4]);
buffer[5] = native_from_big_end(in[5]);
buffer[6] = native_from_big_end(in[6]);
buffer[7] = native_from_big_end(in[7]);
}
}
else
{
Expand Down Expand Up @@ -141,16 +185,33 @@ input_right(buffer_t& buffer, const half_t& half) NOEXCEPT
}
else if constexpr (bc::is_little_endian)
{
// TODO: evaluate 4/8 lane optimization using byteswap.
const auto& in = array_cast<word>(half);
buffer[8] = native_from_big_end(in[0]);
buffer[9] = native_from_big_end(in[1]);
buffer[10] = native_from_big_end(in[2]);
buffer[11] = native_from_big_end(in[3]);
buffer[12] = native_from_big_end(in[4]);
buffer[13] = native_from_big_end(in[5]);
buffer[14] = native_from_big_end(in[6]);
buffer[15] = native_from_big_end(in[7]);
if constexpr (have_lanes<word_t, 8> && !with_clang)
{
using xword_t = to_extended<word_t, 8>;
const auto& in = array_cast<xword_t>(half);
auto& out = array_cast<xword_t>(buffer);
out[1] = byteswap<word_t>(in[0]);
}
else if constexpr (have_lanes<word_t, 4> && !with_clang)
{
using xword_t = to_extended<word_t, 4>;
const auto& in = array_cast<xword_t>(half);
auto& out = array_cast<xword_t>(buffer);
out[2] = byteswap<word_t>(in[0]);
out[3] = byteswap<word_t>(in[1]);
}
else
{
const auto& in = array_cast<word>(half);
buffer[8] = native_from_big_end(in[0]);
buffer[9] = native_from_big_end(in[1]);
buffer[10] = native_from_big_end(in[2]);
buffer[11] = native_from_big_end(in[3]);
buffer[12] = native_from_big_end(in[4]);
buffer[13] = native_from_big_end(in[5]);
buffer[14] = native_from_big_end(in[6]);
buffer[15] = native_from_big_end(in[7]);
}
}
else
{
Expand Down Expand Up @@ -185,30 +246,51 @@ output(const state_t& state) NOEXCEPT
}
else if constexpr (bc::is_little_endian)
{
if constexpr (SHA::strength == 160)
if constexpr (SHA::strength != 160)
{
return array_cast<byte_t>(state_t
if constexpr (have_lanes<word_t, 8> && !with_clang)
{
native_to_big_end(state[0]),
native_to_big_end(state[1]),
native_to_big_end(state[2]),
native_to_big_end(state[3]),
native_to_big_end(state[4])
});
using xword_t = to_extended<word_t, 8>;
const auto& in = array_cast<xword_t>(state);
return array_cast<byte_t>(wstate_t<xword_t>
{
byteswap<word_t>(in[0])
});
}
else if constexpr (have_lanes<word_t, 4> && !with_clang)
{
using xword_t = to_extended<word_t, 4>;
const auto& in = array_cast<xword_t>(state);
return array_cast<byte_t>(wstate_t<xword_t>
{
byteswap<word_t>(in[0]),
byteswap<word_t>(in[1])
});
}
else
{
return array_cast<byte_t>(state_t
{
native_to_big_end(state[0]),
native_to_big_end(state[1]),
native_to_big_end(state[2]),
native_to_big_end(state[3]),
native_to_big_end(state[4]),
native_to_big_end(state[5]),
native_to_big_end(state[6]),
native_to_big_end(state[7])
});
}
}
else
{
// TODO: evaluate 4/8 lane optimization using byteswap.
return array_cast<byte_t>(state_t
{
native_to_big_end(state[0]),
native_to_big_end(state[1]),
native_to_big_end(state[2]),
native_to_big_end(state[3]),
native_to_big_end(state[4]),
native_to_big_end(state[5]),
native_to_big_end(state[6]),
native_to_big_end(state[7])
native_to_big_end(state[4])
});
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/bitcoin/system/impl/hash/sha/algorithm_sigma.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ TEMPLATE
void CLASS::
schedule_sigma(buffer_t& buffer) NOEXCEPT
{
if constexpr (SHA::strength != 160 && have_lanes<word_t, 8>())
if constexpr (SHA::strength != 160 && have_lanes<word_t, 8>)
{
prepare8<16>(buffer);
prepare8<24>(buffer);
Expand Down
Loading

0 comments on commit f464f46

Please sign in to comment.