From 044f5c5345a78dd19ce678c99fd11552c69b1eef Mon Sep 17 00:00:00 2001 From: "John M. Schanck" Date: Sat, 23 Jan 2021 15:40:05 -0500 Subject: [PATCH] Removing GeMSS --- gemss/VERSION | 1 - gemss/checkout.sh | 67 - gemss/meta/crypto_sign_gemss-128_META.yml | 8 - gemss/meta/crypto_sign_gemss-192_META.yml | 8 - gemss/meta/crypto_sign_gemss-256_META.yml | 8 - .../meta/crypto_sign_gemss-blue-128_META.yml | 8 - .../meta/crypto_sign_gemss-blue-192_META.yml | 8 - .../meta/crypto_sign_gemss-blue-256_META.yml | 8 - gemss/meta/crypto_sign_gemss-red-128_META.yml | 8 - gemss/meta/crypto_sign_gemss-red-192_META.yml | 8 - gemss/meta/crypto_sign_gemss-red-256_META.yml | 8 - gemss/package.sh | 661 --- ...entation_sign_GeMSS128_include_add_gf2nx.h | 30 - ...mplementation_sign_GeMSS128_include_arch.h | 192 - ...Implementation_sign_GeMSS128_include_bit.h | 383 -- ...ign_GeMSS128_include_chooseRootHFE_gf2nx.h | 12 - ...ntation_sign_GeMSS128_include_conv_gf2nx.h | 12 - ...entation_sign_GeMSS128_include_div_gf2nx.h | 41 - ...ion_sign_GeMSS128_include_dotProduct_gf2.h | 77 - ...on_sign_GeMSS128_include_findRoots_gf2nx.h | 22 - ...sign_GeMSS128_include_frobeniusMap_gf2nx.h | 21 - ...plementation_sign_GeMSS128_include_gf2nx.h | 12 - ...mplementation_sign_GeMSS128_include_hash.h | 346 -- ...mplementation_sign_GeMSS128_include_init.h | 32 - ...plementation_sign_GeMSS128_include_macro.h | 33 - ...mentation_sign_GeMSS128_include_mul_gf2n.h | 16 - ...mentation_sign_GeMSS128_include_mul_gf2x.h | 4798 ----------------- ...ion_sign_GeMSS128_include_parameters_HFE.h | 13 - ...mentation_sign_GeMSS128_include_rem_gf2n.h | 202 - ...ementation_sign_GeMSS128_include_signHFE.h | 23 - ...ntation_sign_GeMSS128_include_simd_intel.h | 342 -- ...mentation_sign_GeMSS128_include_sqr_gf2n.h | 20 - ...mentation_sign_GeMSS128_include_sqr_gf2x.h | 372 -- ...ntation_sign_GeMSS128_include_tools_gf2m.h | 42 - ...ntation_sign_GeMSS128_include_tools_gf2n.h | 52 - ...tation_sign_GeMSS128_include_tools_gf2nv.h | 40 - ...ntation_sign_GeMSS128_include_tools_gf2v.h | 26 - ...sign_GeMSS128_src_changeVariablesMQS_gf2.c | 89 - ...on_sign_GeMSS128_src_chooseRootHFE_gf2nx.c | 141 - ...ementation_sign_GeMSS128_src_convMQS_gf2.c | 40 - ...lementation_sign_GeMSS128_src_convMQ_gf2.c | 405 -- ...lementation_sign_GeMSS128_src_conv_gf2nx.c | 12 - ...ation_sign_GeMSS128_src_determinantn_gf2.c | 66 - ...tion_sign_GeMSS128_src_determinantnv_gf2.c | 66 - ...plementation_sign_GeMSS128_src_div_gf2nx.c | 62 - ...tion_sign_GeMSS128_src_evalMQSnocst8_gf2.c | 39 - ..._sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c | 129 - ...mentation_sign_GeMSS128_src_evalMQSv_gf2.c | 68 - ...n_sign_GeMSS128_src_findRootsSplit_gf2nx.c | 74 - ...tation_sign_GeMSS128_src_findRoots_gf2nx.c | 142 - ...ion_sign_GeMSS128_src_frobeniusMap_gf2nx.c | 84 - ...ation_sign_GeMSS128_src_genSecretMQS_gf2.c | 415 -- ...ation_sign_GeMSS128_src_initMatrixId_gf2.c | 140 - ...ntation_sign_GeMSS128_src_invMatrixn_gf2.c | 94 - ...tation_sign_GeMSS128_src_invMatrixnv_gf2.c | 94 - ...mplementation_sign_GeMSS128_src_inv_gf2n.c | 28 - ...on_sign_GeMSS128_src_mixEquationsMQS_gf2.c | 20 - ...mplementation_sign_GeMSS128_src_mul_gf2n.c | 213 - ...mplementation_sign_GeMSS128_src_mul_gf2x.c | 211 - ...plementation_sign_GeMSS128_src_predicate.c | 57 - ...ntation_sign_GeMSS128_src_randMatrix_gf2.c | 397 -- ...mplementation_sign_GeMSS128_src_rem_gf2n.c | 9 - ...ed_Implementation_sign_GeMSS128_src_sign.c | 94 - ...Implementation_sign_GeMSS128_src_signHFE.c | 220 - ...tation_sign_GeMSS128_src_sign_keypairHFE.c | 199 - ...mentation_sign_GeMSS128_src_sign_openHFE.c | 41 - ...plementation_sign_GeMSS128_src_sort_gf2n.c | 37 - ...mplementation_sign_GeMSS128_src_sqr_gf2n.c | 234 - ...tion_sign_GeMSS128_src_vecMatProduct_gf2.c | 261 - ...entation_sign_GeMSS128_include_add_gf2nx.h | 30 - ...mplementation_sign_GeMSS128_include_arch.h | 131 - ...Implementation_sign_GeMSS128_include_bit.h | 322 -- ...ign_GeMSS128_include_chooseRootHFE_gf2nx.h | 12 - ...ntation_sign_GeMSS128_include_conv_gf2nx.h | 12 - ...entation_sign_GeMSS128_include_div_gf2nx.h | 41 - ...ion_sign_GeMSS128_include_dotProduct_gf2.h | 77 - ...on_sign_GeMSS128_include_findRoots_gf2nx.h | 22 - ...sign_GeMSS128_include_frobeniusMap_gf2nx.h | 12 - ...plementation_sign_GeMSS128_include_gf2nx.h | 12 - ...mplementation_sign_GeMSS128_include_hash.h | 346 -- ...mplementation_sign_GeMSS128_include_init.h | 32 - ...plementation_sign_GeMSS128_include_macro.h | 33 - ...mentation_sign_GeMSS128_include_mul_gf2n.h | 16 - ...ion_sign_GeMSS128_include_parameters_HFE.h | 13 - ...ementation_sign_GeMSS128_include_signHFE.h | 23 - ...mentation_sign_GeMSS128_include_sqr_gf2n.h | 20 - ...ntation_sign_GeMSS128_include_tools_gf2m.h | 42 - ...ntation_sign_GeMSS128_include_tools_gf2n.h | 52 - ...tation_sign_GeMSS128_include_tools_gf2nv.h | 40 - ...ntation_sign_GeMSS128_include_tools_gf2v.h | 26 - ...sign_GeMSS128_src_changeVariablesMQS_gf2.c | 33 - ...on_sign_GeMSS128_src_chooseRootHFE_gf2nx.c | 141 - ...ementation_sign_GeMSS128_src_convMQS_gf2.c | 40 - ...lementation_sign_GeMSS128_src_convMQ_gf2.c | 405 -- ...lementation_sign_GeMSS128_src_conv_gf2nx.c | 12 - ...ation_sign_GeMSS128_src_determinantn_gf2.c | 66 - ...tion_sign_GeMSS128_src_determinantnv_gf2.c | 66 - ...plementation_sign_GeMSS128_src_div_gf2nx.c | 62 - ...tion_sign_GeMSS128_src_evalMQSnocst8_gf2.c | 39 - ..._sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c | 129 - ...mentation_sign_GeMSS128_src_evalMQSv_gf2.c | 45 - ...n_sign_GeMSS128_src_findRootsSplit_gf2nx.c | 74 - ...tation_sign_GeMSS128_src_findRoots_gf2nx.c | 142 - ...ion_sign_GeMSS128_src_frobeniusMap_gf2nx.c | 21 - ...ation_sign_GeMSS128_src_genSecretMQS_gf2.c | 322 -- ...ation_sign_GeMSS128_src_initMatrixId_gf2.c | 140 - ...ntation_sign_GeMSS128_src_invMatrixn_gf2.c | 94 - ...tation_sign_GeMSS128_src_invMatrixnv_gf2.c | 94 - ...on_sign_GeMSS128_src_mixEquationsMQS_gf2.c | 20 - ...mplementation_sign_GeMSS128_src_mul_gf2n.c | 213 - ...plementation_sign_GeMSS128_src_predicate.c | 57 - ...ntation_sign_GeMSS128_src_randMatrix_gf2.c | 397 -- ...ce_Implementation_sign_GeMSS128_src_sign.c | 94 - ...Implementation_sign_GeMSS128_src_signHFE.c | 220 - ...tation_sign_GeMSS128_src_sign_keypairHFE.c | 200 - ...mentation_sign_GeMSS128_src_sign_openHFE.c | 41 - ...mplementation_sign_GeMSS128_src_sqr_gf2n.c | 223 - ...tion_sign_GeMSS128_src_vecMatProduct_gf2.c | 258 - gemss/update_patches.sh | 47 - 119 files changed, 17150 deletions(-) delete mode 100644 gemss/VERSION delete mode 100755 gemss/checkout.sh delete mode 100644 gemss/meta/crypto_sign_gemss-128_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-192_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-256_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-blue-128_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-blue-192_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-blue-256_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-red-128_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-red-192_META.yml delete mode 100644 gemss/meta/crypto_sign_gemss-red-256_META.yml delete mode 100755 gemss/package.sh delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2nv.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2v.h delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQ_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_conv_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantn_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2x.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_predicate.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_randMatrix_gf2.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_rem_gf2n.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_keypairHFE.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_openHFE.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sqr_gf2n.c delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2nv.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2v.h delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQ_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_conv_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantn_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_predicate.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_randMatrix_gf2.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_keypairHFE.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_openHFE.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c delete mode 100755 gemss/update_patches.sh diff --git a/gemss/VERSION b/gemss/VERSION deleted file mode 100644 index 4078569..0000000 --- a/gemss/VERSION +++ /dev/null @@ -1 +0,0 @@ -GeMSS-Round2_V2.a diff --git a/gemss/checkout.sh b/gemss/checkout.sh deleted file mode 100755 index 890fb64..0000000 --- a/gemss/checkout.sh +++ /dev/null @@ -1,67 +0,0 @@ -PYTHON=/usr/bin/python3 - -BASE=`dirname $0` -BASE=`cd ${BASE} && pwd` - -VERSION=$(cat ${BASE}/VERSION) -V1=upstream -V2=upstream-patched - -ARCHIVE=${VERSION/.a/.zip} - -PATCHES=${BASE}/patches -SCRIPTS=${BASE}/scripts - -cd ${BASE} - -if [ -e "${V1}" ] -then - read -p "${V1} directory already exists. Delete it? " yn - if [ "${yn:-n}" != "y" ] - then - exit -1 - fi - rm -rf ${V1} -fi - -if [ -e "${V2}" ] -then - read -p "${V2} directory already exists. Delete it? " yn - if [ "${yn:-n}" != "y" ] - then - exit -1 - fi - rm -rf ${V2} -fi - -if [ ! -f ${BASE}/${ARCHIVE} ] -then - wget -P ${BASE} http://www-polsys.lip6.fr/~ryckeghem/packages/${ARCHIVE} -fi -unzip -qq -d ${BASE} ${BASE}/${ARCHIVE} - -mv ${VERSION} ${V1} -mkdir -p ${V2} - - -# De-duplicate files with symlinks. All patches are applied to -# Reference_Implementation/GeMSS-128 and Optimized_Implementation/GeMSS-128 - -( cd ${V1}/Reference_Implementation/sign/ -for X in */*/* -do - IFS=/ read PARAM SUBDIR FILE <<< $X - if [ ${PARAM} == 'GeMSS128' ]; then continue; fi - if [ ${FILE} == 'choice_crypto.h' ]; then continue; fi - ln -sf ../../GeMSS128/${SUBDIR}/${FILE} ${PARAM}/${SUBDIR}/${FILE} -done -) - -cp -rp ${V1}/* ${V2} - -( cd ${V2} -for X in ${PATCHES}/* -do - patch -p1 < ${X} -done -) diff --git a/gemss/meta/crypto_sign_gemss-128_META.yml b/gemss/meta/crypto_sign_gemss-128_META.yml deleted file mode 100644 index 23df5e3..0000000 --- a/gemss/meta/crypto_sign_gemss-128_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-128 -type: signature -claimed-nist-level: 1 -length-public-key: 352188 -length-secret-key: 16 -length-signature: 33 -nistkat-sha256: a60bb56ac878e85fd716ed5c8a003cb44034410509ae9140ddb4fa8c4ce248bb -testvectors-sha256: b207250643b2e76732f99aa91f06129f17cfa26567d127d432fb759179f93953 diff --git a/gemss/meta/crypto_sign_gemss-192_META.yml b/gemss/meta/crypto_sign_gemss-192_META.yml deleted file mode 100644 index 5aa8035..0000000 --- a/gemss/meta/crypto_sign_gemss-192_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-192 -type: signature -claimed-nist-level: 3 -length-public-key: 1237964 -length-secret-key: 24 -length-signature: 52 -nistkat-sha256: 47c4ad0a28de204c77d44c85e9e578689a3a7c490c9d3d3bdbc544cb7bc087b2 -testvectors-sha256: cb5ffcd708f16700e2a46355b0d6121096ff785ddc5143d9a603441428ee3049 diff --git a/gemss/meta/crypto_sign_gemss-256_META.yml b/gemss/meta/crypto_sign_gemss-256_META.yml deleted file mode 100644 index e423e1c..0000000 --- a/gemss/meta/crypto_sign_gemss-256_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-256 -type: signature -claimed-nist-level: 5 -length-public-key: 3040700 -length-secret-key: 32 -length-signature: 72 -nistkat-sha256: ac118f8b8e554be7b0a3df2e541a8dc6e42324a44d519ab69cb125068f64333b -testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa diff --git a/gemss/meta/crypto_sign_gemss-blue-128_META.yml b/gemss/meta/crypto_sign_gemss-blue-128_META.yml deleted file mode 100644 index 11f4a08..0000000 --- a/gemss/meta/crypto_sign_gemss-blue-128_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-blue-128 -type: signature -claimed-nist-level: 1 -length-public-key: 363609 -length-secret-key: 16 -length-signature: 34 -nistkat-sha256: 728f19ede55490cfa7d34de2e645a96a39dca2dda38d712dfb77fa5142c85ad8 -testvectors-sha256: 8e62484fd253f76fb0d90d0819fd7ba46ec17ca9d7afb6adedd514fe7d1456db diff --git a/gemss/meta/crypto_sign_gemss-blue-192_META.yml b/gemss/meta/crypto_sign_gemss-blue-192_META.yml deleted file mode 100644 index 272164c..0000000 --- a/gemss/meta/crypto_sign_gemss-blue-192_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-blue-192 -type: signature -claimed-nist-level: 3 -length-public-key: 1264117 -length-secret-key: 24 -length-signature: 53 -nistkat-sha256: 4832164471d1a7ea4d73c65c83d256b2ec402c4110838fc564cde2b3f05472d8 -testvectors-sha256: 1e738fa80841ac0386ce47b275ec482d4c881093105ac9bd78b60cbbd9eba314 diff --git a/gemss/meta/crypto_sign_gemss-blue-256_META.yml b/gemss/meta/crypto_sign_gemss-blue-256_META.yml deleted file mode 100644 index 4f35f86..0000000 --- a/gemss/meta/crypto_sign_gemss-blue-256_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-blue-256 -type: signature -claimed-nist-level: 5 -length-public-key: 3087963 -length-secret-key: 32 -length-signature: 74 -nistkat-sha256: ae7777b16435db97b3b7d8e5bf86e321ba2c44b8841a7042ea43ea77b5a6fc87 -testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa diff --git a/gemss/meta/crypto_sign_gemss-red-128_META.yml b/gemss/meta/crypto_sign_gemss-red-128_META.yml deleted file mode 100644 index 72b90ef..0000000 --- a/gemss/meta/crypto_sign_gemss-red-128_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-red-128 -type: signature -claimed-nist-level: 1 -length-public-key: 375213 -length-secret-key: 16 -length-signature: 36 -nistkat-sha256: adfd43ea1924dbe539aed44da0e216587afecd5feeff0c82a67abb30671160ca -testvectors-sha256: 4244f2dde66719767fa3ff7cb1dc2fbf9de738edef9c4d474feba70ad6b07cce diff --git a/gemss/meta/crypto_sign_gemss-red-192_META.yml b/gemss/meta/crypto_sign_gemss-red-192_META.yml deleted file mode 100644 index 5e964ad..0000000 --- a/gemss/meta/crypto_sign_gemss-red-192_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-red-192 -type: signature -claimed-nist-level: 3 -length-public-key: 1290543 -length-secret-key: 24 -length-signature: 55 -nistkat-sha256: a3edfab4e1387318d720589d7173a5df5221b611d4aac70ac1f9c41e31da2223 -testvectors-sha256: b6b18406f3be5e845454ce73ab6d11221cf21b18ec098db16d6e227d3a7a6185 diff --git a/gemss/meta/crypto_sign_gemss-red-256_META.yml b/gemss/meta/crypto_sign_gemss-red-256_META.yml deleted file mode 100644 index 97c3408..0000000 --- a/gemss/meta/crypto_sign_gemss-red-256_META.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: gemss-red-256 -type: signature -claimed-nist-level: 5 -length-public-key: 3135591 -length-secret-key: 32 -length-signature: 75 -nistkat-sha256: 624ec7629d3a835fc86a2f4cc1cb2386666b19c0f0c3df9b06f7006edb10726b -testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa diff --git a/gemss/package.sh b/gemss/package.sh deleted file mode 100755 index ce975d9..0000000 --- a/gemss/package.sh +++ /dev/null @@ -1,661 +0,0 @@ -#!/bin/bash - -BASE=`dirname $0` -BASE=`cd ${BASE} && pwd` -cd ${BASE} - -VERSION=$(cat ${BASE}/VERSION) -ARCHIVE=${VERSION/.a/.zip} - -BUILD=${BASE}/build -BUILD_UPSTREAM=${BUILD}/upstream -BUILD_CRYPTO_SIGN=${BUILD}/crypto_sign -BUILD_TEST=${BUILD}/test -BUILD_DFILE=${BUILD}/dfile - -SCRIPTS=${BASE}/scripts - -# some useful sed arguments -GRAB=( -e '/\#define/!d' -e '/\#define _.*_H$/d' -e 's/\(define .*\)U$/\1/' ) -STRAIGHTEN_IF=( -e :a -e '/#if.*\\$/N; s/\\\n//; ta' ) -STRAIGHTEN_DEF=( -e :a -e '/#define.*\\$/N; s/\\\n//; ta' ) - -set -eu - -function task { - echo -e "[ ]" $1 -} - -function endtask { - echo -e "\e[1A[x]" -} - -function cleanup { - rm -rf ${BUILD_UPSTREAM} -} -trap cleanup EXIT - -if [ -e "${BUILD_CRYPTO_SIGN}" ] -then - read -p "${BUILD_CRYPTO_SIGN} directory already exists. Delete it? " yn - if [ "${yn:-n}" != "y" ] - then - exit -1 - fi - rm -rf ${BUILD_CRYPTO_SIGN} ${BUILD_TEST} -fi -mkdir -p ${BUILD_CRYPTO_SIGN} ${BUILD_TEST} ${BUILD_DFILE} - -if [ ! -f ${BASE}/${ARCHIVE} ] -then - wget -P ${BASE} http://www-polsys.lip6.fr/~ryckeghem/packages/${ARCHIVE} -fi - -task "Unpacking ${ARCHIVE}" -( unzip -qq -d ${BUILD} ${BASE}/${ARCHIVE} -mv ${BUILD}/${VERSION}* ${BUILD_UPSTREAM} -cd ${BUILD_UPSTREAM} -for X in {Ref*,Opt*}/sign/*/*/* -do - IFS=/ read IMPL SIGN PARAM SUB FILE <<< ${X} - if [ ${PARAM} == 'GeMSS128' ]; then continue; fi - if [ ${FILE} == 'choice_crypto.h' ]; then continue; fi - ln -sf ../../GeMSS128/${SUB}/${FILE} ${X} -done ) -endtask - -task 'Applying patches to upstream source code' -( cd ${BUILD_UPSTREAM} - -for X in ${BASE}/patches/* -do - patch -s -p1 < ${X} -done ) -endtask - -task 'Copying files' -( for COLOR in '' 'Blue' 'Red' -do - for SECURITY in 128 192 256 - do - INPARAM=${COLOR}GeMSS${SECURITY} - OUTPARAM=gemss-$(echo $COLOR | tr [:upper:] [:lower:])-${SECURITY} - OUTPARAM=${OUTPARAM/--/-} - mkdir -p ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/{clean,avx2} - cp -Lp --no-preserve=mode ${BUILD_UPSTREAM}/Reference_Implementation/sign/${INPARAM}/*/*.{c,h} ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/clean/ - cp -Lp --no-preserve=mode ${BUILD_UPSTREAM}/Optimized_Implementation/sign/${INPARAM}/*/*.{c,h} ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/avx2/ - for F in PQCgenKAT_sign.c debug.h KAT_int.c hash.c KAT_int.h prefix_name.h\ - randombytes_FIPS.c randombytes_FIPS.h randombytes.h rem_gf2x.h rng.c rng.h - do - rm -f ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/*/$F - done - sed -i -s '/include "KAT_int.h"/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h - done -done ) -endtask - -task 'Removing ifdefs' -( -UNIFDEFOPTS="-B -k -m \ --D__x86_64__ -U__cplusplus \ --U__AVX2__ -U__AVX__ -U__PCLMUL__ -U__POPCNT__ -U__SSE2__ -U__SSE4_1__ -U__SSE__ -U__SSSE3__ \ --DENABLED_GF2X=0 \ --UKAT_INT -USUPERCOP -UTEST_LIBRARY \ --UBlueGeMSS -UDualModeMS -UFGeMSS -UGeMSS -UMQSOFT_REF -UQUARTZ -UQUARTZ_V1 -URedGeMSS \ --DENABLED_SEED_SK=1 -DGEN_MINUS_VINEGARS=1 -DITOH -DInnerMode -UUNIQUE_ROOT \ --DNB_BITS_UINT=64" - -# All of the compilation decisions are set by choice_crypto.h -sed -s -i "${STRAIGHTEN_IF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/choice_crypto.h -unifdef ${UNIFDEFOPTS} -DBlueGeMSS ${BUILD_CRYPTO_SIGN}/gemss-blue-{128,192,256}/*/choice_crypto.h || true -unifdef ${UNIFDEFOPTS} -DGeMSS ${BUILD_CRYPTO_SIGN}/gemss-{128,192,256}/*/choice_crypto.h || true -unifdef ${UNIFDEFOPTS} -DRedGeMSS ${BUILD_CRYPTO_SIGN}/gemss-red-{128,192,256}/*/choice_crypto.h || true - -# We'll build without libgf2x -sed -i 's/ENABLED_GF2X 1/ENABLED_GF2X 0/' ${BUILD_CRYPTO_SIGN}/*/*/arch.h - -for X in ${BUILD_CRYPTO_SIGN}/*/*/choice_crypto.h -do - ( cd $(dirname ${X}) - PARAM=$(echo ${X} | awk -F/ '{print $(NF-2)}' ) - IMPL=$(echo ${X} | awk -F/ '{print $(NF-1)}' ) - DFILE=${BUILD_DFILE}/${PARAM} - - cat choice_crypto.h | sed "${GRAB[@]}" > ${DFILE} - - if [ ${IMPL} == "avx2" ] - then - UNIFDEFOPTS="${UNIFDEFOPTS} -DENABLED_AVX2 -DENABLED_SSE -DENABLED_POPCNT -DENABLED_PCLMUL \ - -DENABLED_PCLMUL_AVX2 -DENABLED_PCLMUL_SSE2 -DENABLED_AVX -DENABLED_SSE2 \ - -DENABLED_SSE4_1 -DENABLED_SSSE3" - else - UNIFDEFOPTS="${UNIFDEFOPTS} -UENABLED_SSE -UENABLED_POPCNT -UENABLED_PCLMUL \ - -UENABLED_PCLMUL_AVX2 -UENABLED_PCLMUL_SSE2 -UENABLED_AVX -UENABLED_SSE2 \ - -UENABLED_SSE4_1 -UENABLED_SSSE3" - fi - - # Remove line breaks in multi-line #if statements. - sed -s -i "${STRAIGHTEN_IF[@]}" config_gf2n.h sign_keypairHFE.c rem_gf2n.c rem_gf2n.h - - unifdef ${UNIFDEFOPTS} -f ${DFILE} parameters_HFE.h || true - cat parameters_HFE.h | sed "${GRAB[@]}" >> ${DFILE} - - unifdef ${UNIFDEFOPTS} -f ${DFILE} arch.h || true - cat arch.h | sed "${STRAIGHTEN_DEF[@]}" | sed "${GRAB[@]}" >> ${DFILE} - - unifdef ${UNIFDEFOPTS} -f ${DFILE} chooseRootHFE_gf2nx.h || true - cat chooseRootHFE_gf2nx.h | sed "${GRAB[@]}" >> ${DFILE} - - FILE=config_gf2n.h - unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true - grep "#define K[123]" ${FILE} | sed "${GRAB[@]}" >> ${DFILE} - unifdef ${UNIFDEFOPTS} -UK2 -UK3 -f ${DFILE} ${FILE} || true - grep "#define __.*NOMIAL_GF2N__" ${FILE} | sed "${GRAB[@]}" >> ${DFILE} - unifdef ${UNIFDEFOPTS} -U__TRINOMIAL_GF2N__ -U__PENTANOMIAL_GF2N__ -f ${DFILE} ${FILE} || true - UNIFDEFOPTS="${UNIFDEFOPTS} -UK2 -UK3 -U__TRINOMIAL_GF2N__ -U__PENTANOMIAL_GF2N__" - - FILE=config_HFE.h - unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true - cat ${FILE} | sed "${GRAB[@]}" >> ${DFILE} - - FILE=frobeniusMap_gf2nx.h - unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true - grep "#define.*II" ${FILE} | sed "${GRAB[@]}" >> ${DFILE} - - K=$(grep 'define K [0-9]' ${DFILE} | awk '{print $(NF)}') - N=$(grep 'define HFEn [0-9]' ${DFILE} | awk '{print $(NF)}') - V=$(grep 'define HFEv [0-9]' ${DFILE} | awk '{print $(NF)}') - DELTA=$(grep 'define HFEDELTA [0-9]' ${DFILE} | awk '{print $(NF)}') - HFEDEG=$(grep 'define HFEDeg [0-9]' ${DFILE} | awk '{print $(NF)}') - HFEDEGI=$(grep 'define HFEDegI [0-9]' ${DFILE} | awk '{print $(NF)}') - HFEDEGJ=$(grep 'define HFEDegJ [0-9]' ${DFILE} | awk '{print $(NF)}') - NB_ITE=$(grep 'define NB_ITE [0-9]' ${DFILE} | awk '{print $(NF)}') - II=$(grep 'define II [0-9]' ${DFILE} | awk '{print $(NF)}') - - NR=$((${N}%64)) - NQ=$((${N}/64)) - NR8=$((${N}%8)) - NQ8=$((${N}/8)) - NB_WORD_MUL=$(((2*(${N}-1))/64+1)) - NB_WORD_MMUL=$(((2*(${N}-1))/64+1)) - - VR=$((${V}%64)) - VQ=$((${V}/64)) - VR8=$((${V}%8)) - VQ8=$((${V}/8)) - - M=$((${N}-${DELTA})) - MR=$((${M}%64)) - MQ=$((${M}/64)) - MR8=$((${M}%8)) - MQ8=$((${M}/8)) - - NV=$((${N}+${V})) - NVR=$((${NV}%64)) - NVQ=$((${NV}/64)) - NVR8=$((${NV}%8)) - NVQ8=$((${NV}/8)) - - NB_MONOMIAL_PK=$(((${NV}*(${NV}+1))/2+1)) - - HFENq=$((${NB_MONOMIAL_PK}/64)) - HFENq8=$((${NB_MONOMIAL_PK}/8)) - HFENr=$((${NB_MONOMIAL_PK}%64)) - HFENr8=$((${NB_MONOMIAL_PK}%8)) - HFENr8c=$(((8-(${NB_MONOMIAL_PK}%8))%8)) - - LOST_BITS=$(((${MR8}-1)*${HFENr8c})) - - NB_WORD_GFqn=${NQ} - [ ${NR} -ne 0 ] && ((NB_WORD_GFqn+=1)) - - NB_WORD_GFqv=${VQ} - [ ${VR} -ne 0 ] && ((NB_WORD_GFqv+=1)) - - NB_WORD_GF2nv=${NVQ} - [ ${NVR} -ne 0 ] && ((NB_WORD_GF2nv+=1)) - - NB_BYTES_GFqm=${MQ8} - [ ${MR8} -ne 0 ] && ((NB_BYTES_GFqm+=1)) - - NB_WORD_GF2m=${MQ} - [ ${MR} -ne 0 ] && ((NB_WORD_GF2m+=1)) - - NB_WORD_GF2nvm=$((${NB_WORD_GF2nv}-${NB_WORD_GF2m})) - [ ${MR} -ne 0 ] && ((NB_WORD_GF2nvm+=1)) - - [ $((${DELTA}+${V})) -lt $((8-${MR8})) ] && - VAL_BITS_M=$((${DELTA}+${V})) || - VAL_BITS_M=$((8-${MR8})) - - echo "#define HFENq ${HFENq}" >> ${DFILE} - echo "#define HFENq8 ${HFENq8}" >> ${DFILE} - echo "#define HFENr ${HFENr}" >> ${DFILE} - echo "#define HFENr8 ${HFENr8}" >> ${DFILE} - echo "#define HFENr8c ${HFENr8c}" >> ${DFILE} - echo "#define HFEm ${M}" >> ${DFILE} - echo "#define HFEmq ${MQ}" >> ${DFILE} - echo "#define HFEmq8 ${MQ8}" >> ${DFILE} - echo "#define HFEmr ${MR}" >> ${DFILE} - echo "#define HFEmr8 ${MR8}" >> ${DFILE} - echo "#define HFEn ${N}" >> ${DFILE} - echo "#define HFEnr ${NR}" >> ${DFILE} - echo "#define HFEnr8 $((${N}%8))" >> ${DFILE} - echo "#define HFEnv ${NV}" >> ${DFILE} - echo "#define HFEnvr ${NVR}" >> ${DFILE} - echo "#define HFEnvr8 ${NVR8}" >> ${DFILE} - echo "#define HFEnvrm1 $(((${NV}-1)%64))" >> ${DFILE} - echo "#define HFEv ${V}" >> ${DFILE} - echo "#define HFEvr ${VR}" >> ${DFILE} - echo "#define HFEvr8 ${VR8}" >> ${DFILE} - echo "#define KI ${NR}" >> ${DFILE} - echo "#define LAST_ROW_Q $(((${NV}-${LOST_BITS})/64))" >> ${DFILE} - echo "#define LAST_ROW_R $(((${NV}-${LOST_BITS})%64))" >> ${DFILE} - echo "#define LOST_BITS ${LOST_BITS}" >> ${DFILE} - echo "#define NB_MONOMIAL_PK ${NB_MONOMIAL_PK}" >> ${DFILE} - echo "#define NB_WHOLE_BLOCKS $(((${NV}-(64-((${NB_MONOMIAL_PK}-${LOST_BITS}-${NVR})%64))%64)>>6))" >> ${DFILE} - echo "#define NB_WORD_GF2m ${NB_WORD_GF2m}" >> ${DFILE} - echo "#define NB_WORD_GF2m_TMP ${NB_WORD_GF2m}" >> ${DFILE} - echo "#define NB_WORD_GF2nv ${NB_WORD_GF2nv}" >> ${DFILE} - echo "#define NB_WORD_GF2nv_TMP ${NB_WORD_GF2nv}" >> ${DFILE} - echo "#define NB_WORD_GF2nvm ${NB_WORD_GF2nvm}" >> ${DFILE} - echo "#define NB_WORD_GFqn ${NB_WORD_GFqn}" >> ${DFILE} - echo "#define NB_WORD_GFqn_TMP ${NB_WORD_GFqn}" >> ${DFILE} - echo "#define NB_WORD_GFqv ${NB_WORD_GFqv}" >> ${DFILE} - echo "#define NB_WORD_GFqv_TMP ${NB_WORD_GFqv}" >> ${DFILE} - echo "#define NB_WORD_MMUL ${NB_WORD_MMUL}" >> ${DFILE} - echo "#define NB_WORD_MMUL_TMP ${NB_WORD_MMUL}" >> ${DFILE} - echo "#define NB_WORD_MUL ${NB_WORD_MUL}" >> ${DFILE} - echo "#define REM_MACRO 0" >> ${DFILE} - echo "#define SIZE_ALIGNED_GFqm 0" >> ${DFILE} - echo "#define SIZE_ALIGNED_GFqn 0" >> ${DFILE} - - FILE=gf2nx.h - sed -i "s/(HFEDeg&1U\?)/($((${HFEDEG}%2)))/" ${FILE} - unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true - grep 'define ENABLED_REMOVE_ODD_DEGREE' ${FILE} >> ${DFILE} - - # Done extracting defines. Do a full pass to simplify later work. - - unifdef ${UNIFDEFOPTS} -f ${DFILE} *.{c,h} || true - - # Clean up some arithmetic that unifdef cannot handle. - - FILE=chooseRootHFE_gf2nx.h - sed -i 's/FIRST_ROOT+DETERMINIST_ROOT+QUARTZ_ROOT/1/' ${FILE} - - FILE=bit.h - sed -i 's/if (NB_BITS_UINT==64U)/if (1)/' ${FILE} - - FILE=convMQ_gf2.c - VAL=$((((${NB_MONOMIAL_PK}-${LOST_BITS}+7)/8)%8)) - sed -i "s/((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7/${VAL}/" ${FILE} - - FILE=evalMQSv_gf2.c - NB_WORD_EQ=${NQ} - [ ${NR} -ne 0 ] && ((NB_WORD_EQ+=1)) - unifdef ${UNIFDEFOPTS} -DNB_VAR=${V} -DNB_VARq=${VQ} -DNB_VARr=${VR} -DNB_EQr=${NR}\ - -DNB_WORD_EQ_TMP=${NB_WORD_EQ} -DNB_WORD_EQ=${NB_WORD_EQ}\ - -DHYBRID_FUNCTIONS=0\ - -f ${DFILE} ${FILE} || true - - FILE=evalMQSnocst8_gf2.c - NB_BYTES_EQ=${MQ8} - [ ${MR} -ne 0 ] && ((NB_BYTES_EQ+=1)) - NB_WORD_EQ_NOCST8=$(((${NB_BYTES_EQ}+7)/8)) - LEN_UNROLLED_64=$(grep 'define LEN_UNROLLED_64' evalMQSnocst8_gf2.c | awk '{print $(NF)}') - sed -i "s/NB_EQ&63/${MR}/" ${FILE} - sed -i "s/LEN_UNROLLED_64<<1/$((${LEN_UNROLLED_64}*2))/" evalMQSnocst8_gf2.c - unifdef ${UNIFDEFOPTS} -DNB_VAR=${NV} -DNB_VARq=${NVQ} -DNB_VARr=${NVR} -DNB_EQr=${MR}\ - -DNB_BYTES_EQ=${NB_BYTES_EQ} -DNB_WORD_EQ=$((NB_WORD_EQ_NOCST8))\ - -DLEN_UNROLLED_64=${LEN_UNROLLED_64}\ - -f ${DFILE} ${FILE} || true - - FILE=evalMQSnocst8_quo_gf2.c - [ ${MQ8} == 0 ] && NB_EQ=${M} || NB_EQ=$((8*${MQ8})) - NB_EQR=$((${NB_EQ}%8)) - NB_BYTES_EQ=$((${NB_EQ}/8)) - [ ${NB_EQR} -ne 0 ] && ((NB_BYTES_EQ+=1)) - NB_WORD_EQ=$(((${NB_BYTES_EQ}+7)/8)) - sed -i "s/NB_EQ&63/$((${NB_EQ}%64))/" ${FILE} - LEN_UNROLLED_64=$(grep 'define LEN_UNROLLED_64' ${FILE} | awk '{print $(NF)}') - sed -i "s/LEN_UNROLLED_64<<1/$((${LEN_UNROLLED_64}*2))/" ${FILE} - unifdef ${UNIFDEFOPTS} -DNB_VAR=${NV} -DNB_VARq=${NVQ} -DNB_VARr=${NVR}\ - -DNB_EQq_orig=${MQ8} -DNB_EQr=$((${NB_EQ}%8)) \ - -DNB_BYTES_EQ=${NB_BYTES_EQ} -DNB_WORD_EQ=${NB_WORD_EQ} \ - -DLEN_UNROLLED_64=${LEN_UNROLLED_64} \ - -f ${DFILE} ${FILE} || true - - FILE=frobeniusMap_gf2nx.c - [ ${IMPL} == "avx2" ] && - sed -i "s/(HFEn-HFEDegI)%II/$(((${N} - ${HFEDEGI})%${II}))/" ${FILE} - - FILE=frobeniusMap_gf2nx.h - [ ${IMPL} == "avx2" ] && - sed -i "s/((HFEDeg%POW_II)?1:0)/$(((${HFEDEG}%(1<<${II}))?1:0))/" ${FILE} - - FILE=inv_gf2n.c - sed -i "s/HFEn&63/$((${N}%64))/" ${FILE} - - FILE=mixEquationsMQS_gf2.c - sed -i "s/NB_BYTES_GFqm&7/$((${NB_BYTES_GFqm}%8))/" ${FILE} - - FILE=parameters_HFE.h - sed -i "s/K<<1/$((2*${K}))/" ${FILE} - sed -i "s/(1U< -#include - -#define CRYPTO_ALGNAME \"${PARAM}\" - -#define CRYPTO_SECRETKEYBYTES ${SIZE_SK} -#define CRYPTO_PUBLICKEYBYTES ${SIZE_PK} -#define CRYPTO_BYTES ${SIZE_SIGN} - -int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); -int crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *msg, size_t len, const uint8_t *sk); -int crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk); -int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); -int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); - -#endif -" > api.h -) -done ) -endtask - -task 'Sorting #includes' -( for PARAM in gemss-{,blue-,red-}{128,192,256} -do - for IMPL in clean avx2 - do - for F in ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL}/*.h - do - START=$(grep -n -m 1 '^\s*#include' ${F} | cut -d: -f1) - if [ x${START} == x ]; then continue; fi - GUARD=$(head -n $((${START}-1)) ${F}) - INCL1=$(grep '^\s*#include \"' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u) - INCL2=$(grep '^\s*#include <' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u) - REST=$(tail -n+$((${START}+1)) ${F} | sed '/^\s*#include/d') - echo "${GUARD}\n${INCL1}\n${INCL2}\n${REST}" | sed 's/\\n/\n/g' > ${F} - done - for F in ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL}/*.c - do - INCL1=$(grep '^\s*#include \"' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u) - INCL2=$(grep '^\s*#include <' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u) - REST=$(sed '/^\s*#include/d' ${F}) - echo "${INCL1}\n${INCL2}\n${REST}" | sed 's/\\n/\n/g' > ${F} - done - done -done ) -endtask - -#MANIFEST=${BUILD_TEST}/duplicate_consistency -#mkdir -p ${MANIFEST} -#task "Preparing for duplicate consistency" -#( cd ${MANIFEST} -#for P1 in gemss-{,blue-,red-}{128,192,256} -#do -# for OUT in clean avx2 -# do -# sha1sum ${BUILD_CRYPTO_SIGN}/${P1}/${OUT}/*.{h,c} > ${P1}_${OUT}.xxx -# done -#done -#) -#endtask -# -#( cd ${MANIFEST} -#for P1 in gemss-{,blue-,red-}{128,192,256} -#do -# for OUT in clean avx2 -# do -# task "${P1}/${OUT} duplicate consistency" -# echo "\ -#consistency_checks:" > ${P1}_${OUT}.yml -# for P2 in gemss-{,blue-,red-}{128,192,256} -# do -# for IN in clean avx2 -# do -# if ([ "${P1}" == "${P2}" ] && [ "${IN}" == "${OUT}" ]) || [ "${P1}" \> "${P2}" ]; then continue; fi -# echo "\ -#- source: -# scheme: ${P2} -# implementation: ${IN} -# files:" >> ${P1}_${OUT}.yml -# for HASH in $(cat ${P2}_${IN}.xxx | cut -d ' ' -f 1) -# do -# X=$(grep $HASH ${P1}_${OUT}.xxx | cut -d ' ' -f 3) -# if [ x${X} != 'x' ] -# then -# [ -e ${BUILD_CRYPTO_SIGN}/${P2}/${OUT}/$(basename $X) ] && \ -# echo "\ -# - $(basename $X)" >> ${P1}_${OUT}.yml -# fi -# done -# done -# done -# endtask -# done -#done -#) -#rm -rf ${MANIFEST}/*.xxx - -task 'Namespacing' -( - -# Fix definitions that need namespacing but are split over multiple lines -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/conv_gf2nx.h -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/convMQS_gf2.h -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/evalMQShybrid_gf2.h -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/evalMQnocst_gf2.h -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/avx2/frobeniusMap_gf2nx.h -sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/avx2/genCanonicalBasis_gf2n.h - -# GeMSS has its own namespacing macro. We'll delete it and do it our way. -sed -i -s '/include "prefix_name.h"/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h -sed -i -s 's/^\s*\(int\|void\|gf2\|UINT\|uint64_t\|unsigned int\)\s\+PREFIX_NAME(\([^)]*\))/\1 \2/' ${BUILD_CRYPTO_SIGN}/*/*/*.{h,c} - -# Insert hooks for namespacing. These will be removed later. -sed -i -s 's/^\s*\(int\|void\|gf2\|UINT\|uint64_t\|unsigned int\)\s\+\([^(]*\)(/#define \2 CRYPTO_NAMESPACE(\2)\n&/' ${BUILD_CRYPTO_SIGN}/*/*/*.h -sed -i -s '/#define.*PREFIX_NAME/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h - -for PARAM in gemss-{,blue-,red-}{128,192,256} -do - for IMPL in clean avx2 - do - ( cd ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL} - NAMESPACE=$(echo PQCLEAN_${PARAM//-/}_${IMPL} | tr [:lower:] [:upper:]) - for X in $(grep CRYPTO_NAMESPACE *.{c,h} | cut -f2 -d' ' | sort -u); do - sed -i -s "s/\([^a-zA-Z_]\)${X}\([^a-zA-Z\._]\|$\)/\1${NAMESPACE}_${X}\2/g" *.c *.h - done - sed -i -s '/CRYPTO_NAMESPACE/d' *.{c,h} - sed -i -s "s/CRYPTO_/${NAMESPACE}_CRYPTO_/" *.{c,h} - sed -i "s/API_H/${NAMESPACE}_API_H/" api.h - - sed -i -s "s/f_/${NAMESPACE}_/" tools_gf2n.h tools_gf2m.h hash.h - ) - done -done ) -endtask - -task 'Copying metadata' -( # Makefiles and other metadata -for PARAM in gemss-{,blue-,red-}{128,192,256} -do - ( cd ${BUILD_CRYPTO_SIGN}/${PARAM}/ - - #echo "Public Domain" > clean/LICENSE - #cp -Lp clean/LICENSE avx2/LICENSE - cp -Lp ${BASE}/meta/crypto_sign_${PARAM}_META.yml META.yml - echo "\ -principal-submitters: - - A. Casanova - - J.-C. Faugere - - G. Macario-Rat - - J. Patarin - - L. Perret - - J. Ryckeghem -implementations: - - name: clean - version: ${VERSION} - - name: avx2 - version: ${VERSION} - supported_platforms: - - architecture: x86_64 - operating_systems: - - Linux - - Darwin - required_flags: - - avx2" >> META.yml - - echo "\ -# This Makefile can be used with GNU Make or BSD Make - -LIB=lib${PARAM}_clean.a -HEADERS=$(basename -a clean/*.h | tr '\n' ' ') -OBJECTS=$(basename -a clean/*.c | sed 's/\.c/.o/' | tr '\n' ' ') - -CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common \$(EXTRAFLAGS) - -all: \$(LIB) - -%.o: %.c \$(HEADERS) - \$(CC) \$(CFLAGS) -c -o \$@ $< - -\$(LIB): \$(OBJECTS) - \$(AR) -r \$@ \$(OBJECTS) - -clean: - \$(RM) \$(OBJECTS) - \$(RM) \$(LIB)" > clean/Makefile - -echo "\ -# This Makefile can be used with Microsoft Visual Studio's nmake using the command: -# nmake /f Makefile.Microsoft_nmake - -LIBRARY=lib${PARAM}_clean.lib -OBJECTS=$(basename -a clean/*.c | sed 's/\.c/.obj/' | tr '\n' ' ') - -CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX - -all: \$(LIBRARY) - -# Make sure objects are recompiled if headers change. -\$(OBJECTS): *.h - -\$(LIBRARY): \$(OBJECTS) - LIB.EXE /NOLOGO /WX /OUT:\$@ \$** - -clean: - -DEL \$(OBJECTS) - -DEL \$(LIBRARY)" > clean/Makefile.Microsoft_nmake - -echo "\ -# This Makefile can be used with GNU Make or BSD Make - -LIB=lib${PARAM}_avx2.a -HEADERS=$(basename -a avx2/*.h | tr '\n' ' ') -OBJECTS=$(basename -a avx2/*.c | sed 's/\.c/.o/' | tr '\n' ' ') - -CFLAGS=-O3 -mavx2 -mbmi -mpclmul -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common \$(EXTRAFLAGS) - -all: \$(LIB) - -%.o: %.s \$(HEADERS) - \$(AS) -o \$@ $< - -%.o: %.c \$(HEADERS) - \$(CC) \$(CFLAGS) -c -o \$@ $< - -\$(LIB): \$(OBJECTS) - \$(AR) -r \$@ \$(OBJECTS) - -clean: - \$(RM) \$(OBJECTS) - \$(RM) \$(LIB)" > avx2/Makefile - - ) -done ) -endtask - -task 'Styling' -astyle \ - --style=google \ - --indent=spaces \ - --indent-preproc-define \ - --indent-preproc-cond \ - --pad-oper \ - --pad-comma \ - --pad-header \ - --align-pointer=name \ - --add-braces \ - --convert-tabs \ - --mode=c \ - --suffix=none \ - ${BUILD_CRYPTO_SIGN}/*/*/*.{c,h} >/dev/null -endtask - -task "Removing 256-bit implementations. See https://github.com/PQClean/PQClean/pull/326#issuecomment-700374801" -rm -rf ${BUILD_CRYPTO_SIGN}/*256 -endtask - -# Package -task "Packaging pqclean-gemss-$(date +"%Y%m%d").tar.gz" -tar czf ${BASE}/pqclean-gemss-$(date +"%Y%m%d").tar.gz -C ${BUILD} crypto_sign test -endtask - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h deleted file mode 100644 index 788dfc5..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h +++ /dev/null @@ -1,30 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/add_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/add_gf2nx.h -@@ -14,7 +14,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define add2_gf2nx(res,A,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]^=(A)[i];\ - } -@@ -30,7 +30,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define copy_gf2nx(res,A,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]=(A)[i];\ - } -@@ -45,7 +45,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define set0_gf2nx(res,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]=0;\ - } - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h deleted file mode 100644 index a6280bf..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h +++ /dev/null @@ -1,192 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/arch.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/arch.h -@@ -6,6 +6,8 @@ - #include - #include "macro.h" - -+#include -+ - - /****************** uintXX_t for compatibility ******************/ - -@@ -26,18 +28,6 @@ - #define ZERO8 ((uint8_t)0) - #define ONE8 ((uint8_t)1) - --/* 0x... */ --#define PRINT_X64(a) printf("0x%"PRIx64,a); --#define PRINT_X32(a) printf("0x%"PRIx32,a); --#define PRINT_X16(a) printf("0x%"PRIx16,a); --#define PRINT_X8(a) printf("0x%"PRIx8,a); --/* ... */ --#define PRINT_U64(a) printf("%"PRIx64,a); --#define PRINT_U32(a) printf("%"PRIx32,a); --#define PRINT_U16(a) printf("%"PRIx16,a); --#define PRINT_U8(a) printf("%"PRIx8,a); -- -- - /****************** Definition of an UINT ******************/ - - /* XXX For the moment, this parameter cannot be modified. XXX */ -@@ -81,9 +71,6 @@ - /** Print an UINT. */ - #define PRINT_UINT(a) CONCAT(PRINT_X,NB_BITS_UINT)(a); - -- -- -- - /** A reserved variable to do a for loop on a buffer of UINT. */ - #define RESERVED_VARIABLE reserved_variable - -@@ -103,6 +90,35 @@ - } \ - } - -+/** Load a UINT from unsigned char * **/ -+ -+#define LOAD_UINT(a, p) \ -+ (a) = (p)[7]; (a) <<= 8;\ -+ (a) |= (p)[6]; (a) <<= 8;\ -+ (a) |= (p)[5]; (a) <<= 8;\ -+ (a) |= (p)[4]; (a) <<= 8;\ -+ (a) |= (p)[3]; (a) <<= 8;\ -+ (a) |= (p)[2]; (a) <<= 8;\ -+ (a) |= (p)[1]; (a) <<= 8;\ -+ (a) |= (p)[0]; -+ -+#define LOAD_UINT_ARRAY(a, p, N) \ -+ FOR_LOOP(LOAD_UINT((a)[RESERVED_VARIABLE], &(p)[8*RESERVED_VARIABLE]), (N)) -+ -+/** Store a UINT to an unsigned char * **/ -+#define STORE_UINT(p, a) \ -+ (p)[0] = ((a) >> 0x00) & 0xff; \ -+ (p)[1] = ((a) >> 0x08) & 0xff; \ -+ (p)[2] = ((a) >> 0x10) & 0xff; \ -+ (p)[3] = ((a) >> 0x18) & 0xff; \ -+ (p)[4] = ((a) >> 0x20) & 0xff; \ -+ (p)[5] = ((a) >> 0x28) & 0xff; \ -+ (p)[6] = ((a) >> 0x30) & 0xff; \ -+ (p)[7] = ((a) >> 0x38) & 0xff; -+ -+#define STORE_UINT_ARRAY(a, p, N) \ -+ FOR_LOOP(STORE_UINT(&(p)[8*RESERVED_VARIABLE], (a)[RESERVED_VARIABLE]), (N)) -+ - - - /****************** C++ compatibility ******************/ -@@ -160,60 +176,44 @@ - - - #ifdef __SSE__ -- #include -- /** To use sse. */ - #define ENABLED_SSE - #endif - - #ifdef __SSE2__ -- #include -- /** To use sse2. */ - #define ENABLED_SSE2 - #endif - - #ifdef __SSSE3__ -- #include -- /** To use ssse3. */ - #define ENABLED_SSSE3 - #endif - - #ifdef __SSE4_1__ -- #include -- /** To use sse4.1. */ - #define ENABLED_SSE4_1 - #endif - - #ifdef __AVX__ -- #include -- /** To use avx. */ - #define ENABLED_AVX - #endif - - #ifdef __AVX2__ -- /** To use avx2. */ - #define ENABLED_AVX2 - #endif - - #if (defined(__PCLMUL__) && defined(ENABLED_SSE)) -- #include -- /** To use multiplication in binary field with PCLMULQDQ and sse. */ - #define ENABLED_PCLMUL - #endif - - #if (defined(ENABLED_PCLMUL) && defined(ENABLED_SSE2)) -- #include - /** To use multiplication in binary field with PCLMULQDQ and sse2. */ - #define ENABLED_PCLMUL_SSE2 - #endif - - #if (defined(ENABLED_PCLMUL) && defined(ENABLED_AVX2)) -- #include - /** To use multiplication in binary field with PCLMULQDQ and avx2. */ - #define ENABLED_PCLMUL_AVX2 - #endif - - #ifdef __POPCNT__ -- #include - /** Improve the computation of the number of bits set to 1 in a 64-bit - * or 32-bit integer. */ - #define ENABLED_POPCNT -@@ -237,7 +237,7 @@ - /** Verify if the allocation by malloc or calloc succeeds. - * Exit in the failure case. */ - #define VERIFY_ALLOC(p) \ -- if(!p) \ -+ if(!(p)) \ - {\ - exit(ERROR_ALLOC);\ - } -@@ -245,7 +245,7 @@ - /** Verify if the allocation by malloc or calloc succeeds. - * Return ERROR_ALLOC in the failure case. */ - #define VERIFY_ALLOC_RET(p) \ -- if(!p) \ -+ if(!(p)) \ - {\ - return(ERROR_ALLOC);\ - } -@@ -272,7 +272,7 @@ - p=(type)_mm_malloc((nmemb)*(size),16); - #else - #define ALIGNED16_MALLOC(p,type,nmemb,size) \ -- if(posix_memalign((void**)(&p),16,(nmemb)*(size)))\ -+ if(posix_memalign((void**)(&(p)),16,(nmemb)*(size)))\ - {\ - exit(1);\ - } -@@ -280,7 +280,7 @@ - - #define ALIGNED16_CALLOC(p,type,nmemb,size) \ - ALIGNED16_MALLOC(p,type,nmemb,size);\ -- memset((void*)p,0,(nmemb)*(size)) -+ memset((void*)(p),0,(nmemb)*(size)) - - - /** Align the data on 32 bytes, useful for avx. */ -@@ -291,7 +291,7 @@ - p=(type)_mm_malloc((nmemb)*(size),32); - #else - #define ALIGNED32_MALLOC(p,type,nmemb,size) \ -- if(posix_memalign((void**)(&p),32,(nmemb)*(size)))\ -+ if(posix_memalign((void**)(&(p)),32,(nmemb)*(size)))\ - {\ - exit(1);\ - } -@@ -299,7 +299,7 @@ - - #define ALIGNED32_CALLOC(p,type,nmemb,size) \ - ALIGNED32_MALLOC(p,type,nmemb,size);\ -- memset((void*)p,0,(nmemb)*(size)); -+ memset((void*)(p),0,(nmemb)*(size)); - - - #define NO_ALIGNED_MALLOC(p,type,nmemb,size) p=(type)malloc((nmemb)*(size)); - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h deleted file mode 100644 index e36ad1b..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h +++ /dev/null @@ -1,383 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/bit.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/bit.h -@@ -8,18 +8,8 @@ - - /* Tools for the bits manipulation */ - -- --/* (2^k) - 1, k<64, and -1 for k=0 */ --#define mask64(k) ((k)?(ONE64<<(k))-ONE64:MONE64) -- --/* (2^k) - 1, k<32, and -1 for k=0 */ --#define mask32(k) ((k)?(ONE32<<(k))-ONE32:MONE32) -- --#define maskUINT(k) ((k)?(UINT_1<<(k))-UINT_1:UINT_M1) -- -- - /** The i-th bit of the UINT val. */ --#define ITHBIT(val,i) ((val>>i)&UINT_1) -+#define ITHBIT(val,i) (((val)>>(i))&UINT_1) - - - /** Compute the MSB position of one UINT. */ -@@ -27,14 +17,14 @@ - Output: res the MSB position of U. If U is zero, res=0 - */ - #define MSB_SP(res,U,j) \ -- res=0;\ -+ (res)=0;\ - /* Search the MSB position of one word */\ -- for(j=NB_BITS_UINT>>1;j!=0;j>>=1) \ -+ for((j)=NB_BITS_UINT>>1;(j)!=0;(j)>>=1) \ - {\ -- if((U)>>(res^j))\ -+ if((U)>>((res)^(j)))\ - {\ - /* To remember the choice of the high part */\ -- res^=j;\ -+ (res)^=(j);\ - }\ - } - -@@ -43,15 +33,15 @@ - Output: res the MSB position of U. If U is zero, res=0 - */ - #define MSB_MP(res,U,i,j,nb_word) \ -- i=nb_word-1;\ -+ (i)=(nb_word)-1;\ - /* Search the first word different from zero */\ -- while(i&&(!U[i])) \ -+ while((i)&&(!(U)[i])) \ - {\ -- --i;\ -+ --(i);\ - }\ - /* Search the MSB of one word */\ -- MSB_SP(res,U[i],j);\ -- res^=i<>32); -+ (n)=POPCNT_U32(n)+POPCNT_U32(n>>32); - #endif - - #define COUNTBITS32_POP(n) \ -- n=POPCNT_U32(n); -+ (n)=POPCNT_U32(n); - - #define ORBITS64_POP(n) \ - COUNTBITS64_POP(n); \ - /* The result is in {0,1,...,64} */\ -- n+=63;\ -+ (n)+=63;\ - /* Now, the result is in {63,64,...,127} */\ -- n>>=6;\ -+ (n)>>=6;\ - /* Now, the result is in {0,1,...,1} */ - - - #define NORBITS64_POP(n) \ -- --n;\ -+ --(n);\ - COUNTBITS64_POP(n); \ - /* If n=0, then Hamming_weight(n-1)==64, else Hamming_weight(n-1)<64 */\ - /* Now, the result is in {64,0,...,63} */\ -- n>>=6;\ -+ (n)>>=6;\ - /* Now, the result is in {1,0,...,0} */ - - - #define NORBITS64_POP2(n) \ - COUNTBITS64_POP(n); \ - /* The result is in {0,1,...,64} */\ -- --n;\ -+ --(n);\ - /* Now, the result is in {2^{64}-1,0,...,63} */\ -- n>>=63;\ -+ (n)>>=63;\ - /* Now, the result is in {1,0,...,0} */ - - - #define XORBITS64_POP(n) \ - COUNTBITS64_POP(n); \ -- n&=ONE64; -+ (n)&=ONE64; - #endif - - -@@ -120,18 +110,18 @@ - - /* 5 logical operations */ - #define ORBITS64_SHORT(n) \ -- n|=n << 32U;\ -- n>>=32U;\ -- n+=((uint64_t)0xFFFFFFFF);\ -- n>>=32U; -+ (n)|=(n) << 32U;\ -+ (n)>>=32U;\ -+ (n)+=((uint64_t)0xFFFFFFFF);\ -+ (n)>>=32U; - - - /* 5 logical operations */ - #define NORBITS64_SHORT(n) \ -- n|=n << 32U;\ -- n>>=32U;\ -- --n;\ -- n>>=63U; -+ (n)|=(n) << 32U;\ -+ (n)>>=32U;\ -+ --(n);\ -+ (n)>>=63U; - - - /* The third fastest method, based on the variable-precision SWAR algorithm */ -@@ -141,95 +131,95 @@ - - /* 12 logical operations */ - #define COUNTBITS64_SWAR(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -- n=(((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)=((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) >> 56U; - - - /* 13 logical operations */ - #define ORBITS64_SWAR(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ - /* We change ((n/(2^56))+63)/64 in (n+63*(2^56))/(2^62) */\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) + ((uint64_t)0x3F00000000000000)) >> 62U; - - - /* 13 logical operations */ - #define NORBITS64_SWAR(n) \ -- --n;\ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ --(n);\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101))) >> 62U; - - - /* 13 logical operations */ - #define NORBITS64_SWAR2(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ - /* We remark that ({0,1,...,63}+255)<<56 ={255<<56,0<<56,...,62<<56}*2^56*/\ - /* So, the 63-th bit is 1 iff Hamming_weight(n)=0 */\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) + ((uint64_t)0xFF00000000000000)) >> 63U; - - - /* Slow, 13 logical operations */ - #define XORBITS64_SWAR2(n) \ - COUNTBITS64_SWAR(n); \ -- n&=ONE64; -+ (n)&=ONE64; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS64_SWAR(n) \ - /* +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+***+***+***+***+***+***+***+***+***+***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000+000+000+000+000+000+000+000+000+000+000+000+000 */\ - /* Then, we sum the 16 bits and store them in the bits 63 to 67. */\ - /* So the 63-th bit in the bit of parity. */\ -- n=((n & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\ -+ (n)=(((n) & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\ - >> 63; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS32_SWAR(n) \ - /* +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+***+***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000+000+000+000+000 */\ - /* Then, we sum the 8 bits and store them in the bits 31 to 34. */\ - /* So the 31-th bit in the bit of parity. */\ -- n=((n & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31; -+ (n)=(((n) & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS16_SWAR(n) \ - /* +*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000 */\ - /* Then, we sum the 4 bits and store them in the bits 15 to 17. */\ - /* So the 15-th bit in the bit of parity. */\ -- n=((n & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15; -+ (n)=(((n) & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15; - - - /* A special algorithm with 5 logical operations */ - #define XORBITS8_SWAR(n) \ - /* +*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +0+0+0+0 */\ - /* Then, we sum the 4 bits and store them in the bits 7 to 9. */\ - /* So the 15-th bit in the bit of parity. */\ -- n=((n & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7; -+ (n)=(((n) & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7; - - - /* The slowest method, based on the "dichotomic xor/or" */ -@@ -238,55 +228,55 @@ - /* A generic method using the dichotomic principle */ - #define ORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n|=n>>RESERVED_VARIABLE)\ -- n&=UINT_1; -+ (n)|=(n)>>RESERVED_VARIABLE)\ -+ (n)&=UINT_1; - - - #define NORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n|=n>>RESERVED_VARIABLE)\ -- n=~n;\ -- n&=UINT_1; -+ (n)|=(n)>>RESERVED_VARIABLE)\ -+ (n)=~(n);\ -+ (n)&=UINT_1; - - - #define XORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n^=n>>RESERVED_VARIABLE)\ -- n&=UINT_1; -+ (n)^=(n)>>RESERVED_VARIABLE)\ -+ (n)&=UINT_1; - - - /* 13 logical operations */ - #define ORBITS64_DICHO(n) \ -- n|=n >> 32U;\ -- n|=n >> 16U;\ -- n|=n >> 8U;\ -- n|=n >> 4U;\ -- n|=n >> 2U;\ -- n|=n >> 1U;\ -- n&=ONE64; -+ (n)|=(n) >> 32U;\ -+ (n)|=(n) >> 16U;\ -+ (n)|=(n) >> 8U;\ -+ (n)|=(n) >> 4U;\ -+ (n)|=(n) >> 2U;\ -+ (n)|=(n) >> 1U;\ -+ (n)&=ONE64; - - - /* 14 logical operations */ - #define NORBITS64_DICHO(n) \ -- n|=n >> 32U;\ -- n|=n >> 16U;\ -- n|=n >> 8U;\ -- n|=n >> 4U;\ -- n|=n >> 2U;\ -- n|=n >> 1U;\ -- n=~n;\ -- n&=ONE64; -+ (n)|=(n) >> 32U;\ -+ (n)|=(n) >> 16U;\ -+ (n)|=(n) >> 8U;\ -+ (n)|=(n) >> 4U;\ -+ (n)|=(n) >> 2U;\ -+ (n)|=(n) >> 1U;\ -+ (n)=~(n);\ -+ (n)&=ONE64; - - - /* 13 logical operations */ - #define XORBITS64_DICHO(n) \ -- n^=n >> 32U;\ -- n^=n >> 16U;\ -- n^=n >> 8U;\ -- n^=n >> 4U;\ -- n^=n >> 2U;\ -- n^=n >> 1U;\ -- n&=ONE64; -+ (n)^=(n) >> 32U;\ -+ (n)^=(n) >> 16U;\ -+ (n)^=(n) >> 8U;\ -+ (n)^=(n) >> 4U;\ -+ (n)^=(n) >> 2U;\ -+ (n)^=(n) >> 1U;\ -+ (n)&=ONE64; - - - /* Choose the best method */ -@@ -311,22 +301,10 @@ - #endif - - --#if (NB_BITS_UINT==64U) -- #define COUNTBITS_UINT CONCAT(COUNTBITS,NB_BITS_UINT) -- #define ORBITS_UINT CONCAT( ORBITS,NB_BITS_UINT) -- #define NORBITS_UINT CONCAT( NORBITS,NB_BITS_UINT) -- #define XORBITS_UINT CONCAT( XORBITS,NB_BITS_UINT) --#elif defined(MQSOFT_REF) -- #define COUNTBITS_UINT COUNTBITS64_SWAR -- #define ORBITS_UINT(n) ORBITS(n,NB_BITS_UINT) -- #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT) -- #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT) --#else -- #define COUNTBITS_UINT COUNTBITS64_SWAR -- #define ORBITS_UINT(n) ORBITS(n,NB_BITS_UINT) -- #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT) -- #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT) --#endif -+#define COUNTBITS_UINT COUNTBITS64 -+#define ORBITS_UINT ORBITS64 -+#define NORBITS_UINT NORBITS64 -+#define XORBITS_UINT XORBITS64 - - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h deleted file mode 100644 index cdf0268..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h -@@ -30,7 +30,7 @@ - #include "gf2nx.h" - - int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - #define chooseRootHFE_gf2nx PREFIX_NAME(chooseRootHFE_gf2nx) - #endif - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h deleted file mode 100644 index 76647dd..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/conv_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/conv_gf2nx.h -@@ -10,7 +10,7 @@ - - - void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense, -- const complete_sparse_monic_gf2nx F); -+ complete_sparse_monic_gf2nx F); - #define convHFEpolynomialSparseToDense_gf2nx \ - PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx) - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h deleted file mode 100644 index 2d7c011..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h +++ /dev/null @@ -1,41 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/div_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/div_gf2nx.h -@@ -24,13 +24,13 @@ - - - unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - #define div_r_HFE_gf2nx PREFIX_NAME(div_r_HFE_gf2nx) - #define div_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx) -@@ -39,16 +39,16 @@ - - #if ENABLED_REMOVE_ODD_DEGREE - void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx) - #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(divsqr_r_HFE_cst_gf2nx) - #else -- #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx) -- #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(div_r_HFE_cst_gf2nx) -+ #define divsqr_r_HFE_cstdeg_gf2nx div_r_HFE_cstdeg_gf2nx -+ #define divsqr_r_HFE_cst_gf2nx div_r_HFE_cst_gf2nx - #endif - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h deleted file mode 100644 index 566feaf..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h +++ /dev/null @@ -1,77 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/dotProduct_gf2.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/dotProduct_gf2.h -@@ -10,50 +10,50 @@ - - /* Dot product of vector of bits */ - #define DOTPRODUCT(res,a,b,SIZE) \ -- res=(a)[0]&(b)[0];\ -+ (res)=(a)[0]&(b)[0];\ - FOR_LOOP_COMPLETE(1,RESERVED_VARIABLE<(SIZE),++RESERVED_VARIABLE,\ -- res^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\ -+ (res)^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\ - XORBITS_UINT(res); - - - /* Inlined version */ - #define DOTPRODUCT1(res,a,b) \ -- res=(a)[0]&(b)[0];\ -+ (res)=(a)[0]&(b)[0];\ - XORBITS_UINT(res); - - #define DOTPRODUCT2(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ - XORBITS_UINT(res); - - #define DOTPRODUCT3(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ - XORBITS_UINT(res); - - #define DOTPRODUCT4(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ - XORBITS_UINT(res); - - #define DOTPRODUCT5(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -- res^=(a)[4]&(b)[4];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ -+ (res)^=(a)[4]&(b)[4];\ - XORBITS_UINT(res); - - #define DOTPRODUCT6(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -- res^=(a)[4]&(b)[4];\ -- res^=(a)[5]&(b)[5];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ -+ (res)^=(a)[4]&(b)[4];\ -+ (res)^=(a)[5]&(b)[5];\ - XORBITS_UINT(res); - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h deleted file mode 100644 index 56bb54c..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h +++ /dev/null @@ -1,22 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h -@@ -19,14 +19,14 @@ - convHFEpolynomialSparseToDense_gf2nx(poly2,F);\ - /* Initialize to F-U */\ - add2_gf2n(poly2,U);\ -- l=gcd_gf2nx(&i,poly2,d2,poly,l); -+ (l)=gcd_gf2nx(&(i),poly2,d2,poly,l); - - --int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots, -- const complete_sparse_monic_gf2nx F, -+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - #define findRootsHFE_gf2nx PREFIX_NAME(findRootsHFE_gf2nx) - #define findUniqRootHFE_gf2nx PREFIX_NAME(findUniqRootHFE_gf2nx) - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h deleted file mode 100644 index 41e1286..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h +++ /dev/null @@ -1,21 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h -@@ -9,7 +9,7 @@ - #include "gf2nx.h" - - --unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const -+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, - complete_sparse_monic_gf2nx F, cst_gf2n U); - #define frobeniusMap_HFE_gf2nx PREFIX_NAME(frobeniusMap_HFE_gf2nx) - -@@ -87,7 +87,7 @@ - #define KX (HFEDeg-KP) - - --unsigned int PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)(gf2nx Xqn, const -+unsigned int PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)(gf2nx Xqn, - complete_sparse_monic_gf2nx F, cst_gf2n U); - #define frobeniusMap_multisqr_HFE_gf2nx \ - PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx) - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h deleted file mode 100644 index 03eec7c..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/gf2nx.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/gf2nx.h -@@ -119,7 +119,7 @@ - - /* A structure with a special list to find the exponents of the monomials */ - typedef struct { -- cst_sparse_monic_gf2nx poly; -+ UINT poly[NB_UINT_HFEPOLY]; - /* List of the successive differences of the exponents of the monomials of - poly multiplied by NB_WORD_GFqn */ - unsigned int L[NB_COEFS_HFEPOLY]; - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h deleted file mode 100644 index e2e9b51..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h +++ /dev/null @@ -1,346 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/hash.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/hash.h -@@ -1,311 +1,40 @@ - #ifndef _HASH_H - #define _HASH_H - --#include "arch.h" - #include "choice_crypto.h" --#include "parameters_HFE.h" --#include "predicate.h" --#include "init.h" -- -- --/******************************************************************/ --/****************** Choice of the hash functions ******************/ --/******************************************************************/ -- -- --/* Choice of the hash function */ --/* The user is allowed to switch between SHA2 and SHA3 */ --#if (defined(QUARTZ)||defined(QUARTZ_V1)) -- #define CHOICE_HASH_SHA1 --#elif 0 -- #define CHOICE_HASH_SHA2 --#else -- #define CHOICE_HASH_SHA3 --#endif -- -- --/******************************************************************/ --/******************** Enable the hash functions *******************/ --/******************************************************************/ -- -- --/* Use of third libraries */ --/* The user is allowed to switch between OpenSSL and XKCP */ --/* The user can define several macros, while several SHA3 are not defined -- (if several SHA3 are defined, XKCP has priority). */ --#ifdef CHOICE_HASH_SHA1 -- #define ENABLED_SHA1_OPENSSL --#endif --#if defined(CHOICE_HASH_SHA2) -- #define ENABLED_SHA2_OPENSSL --#endif --#if defined(CHOICE_HASH_SHA3) -- /* XKCP is constant-time and faster than OpenSSL */ -- #define ENABLED_SHA3_XKCP -- /* #define ENABLED_SHA3_OPENSSL */ --#endif -- -- --#define ENABLED_SHAKE_XKCP --/* #define ENABLED_TUPLEHASH_XKCP XXX Disabled XXX */ -- -- --/******************************************************************/ --/***************** Include for the hash functions *****************/ --/******************************************************************/ -- -- --/* We minimize the numbers of #include to decrease the dependencies with the -- third libraries. */ --#if (defined(ENABLED_SHA1_OPENSSL)||defined(ENABLED_SHA2_OPENSSL)) -- #include --#endif -- -- --#ifdef ENABLED_SHA2_OPENSSL -- #include "randombytes.h" -- -- #if ENABLED_OPENSSL_FIPS -- #include -- #include -- #endif --#endif -- -- --#ifdef ENABLED_SHA3_OPENSSL -- #include -- #include "prefix_name.h" -- int PREFIX_NAME(sha3_256)(unsigned char *output, const unsigned char *m, -- size_t len); -- int PREFIX_NAME(sha3_384)(unsigned char *output, const unsigned char *m, -- size_t len); -- int PREFIX_NAME(sha3_512)(unsigned char *output, const unsigned char *m, -- size_t len); -- #define sha3_256 PREFIX_NAME(sha3_256) -- #define sha3_384 PREFIX_NAME(sha3_384) -- #define sha3_512 PREFIX_NAME(sha3_512) --#endif -- -- --#if (defined(ENABLED_SHA3_XKCP)||defined(ENABLED_SHAKE_XKCP)) -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --#ifdef ENABLED_SHAKE_XKCP -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --#ifdef ENABLED_TUPLEHASH_XKCP -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --/******************************************************************/ --/**************** Macro to call the hash functions ****************/ --/******************************************************************/ -- -- --#define SHA1_OPENSSL(output,m,len) SHA1(m,len,output) --#define SHA256_OPENSSL(output,m,len) SHA256(m,len,output) --#define SHA384_OPENSSL(output,m,len) SHA384(m,len,output) --#define SHA512_OPENSSL(output,m,len) SHA512(m,len,output) -- --#define SHA256_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA256_OPENSSL(output,m,len); --#define SHA384_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA384_OPENSSL(output,m,len); --#define SHA512_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA512_OPENSSL(output,m,len); -- --/* Format: SHA3_*(output,m,len) */ --#if 0 -- #define SHA3_256_XKCP SHA3_256 -- #define SHA3_384_XKCP SHA3_384 -- #define SHA3_512_XKCP SHA3_512 --#else -- /* SHA3_* is inlined from SimpleFIPS202.c */ -- #define SHA3_256_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(1088, 512, m, len, 0x06, output, 32) -- #define SHA3_384_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(832, 768, m, len, 0x06, output, 48) -- #define SHA3_512_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(576, 1024, m, len, 0x06, output, 64) --#endif -- --/* Format: SHAKE*(output,outputByteLen,input,inputByteLen) */ --#if 0 -- #define SHAKE128_XKCP SHAKE128 -- #define SHAKE256_XKCP SHAKE256 --#else -- /* SHAKE* is inlined from SimpleFIPS202.c */ -- #define SHAKE128_XKCP(output,outputByteLen,m,len) \ -- KeccakWidth1600_Sponge(1344, 256, m, len, 0x1F, output, outputByteLen) -- #define SHAKE256_XKCP(output,outputByteLen,m,len) \ -- KeccakWidth1600_Sponge(1088, 512, m, len, 0x1F, output, outputByteLen) --#endif -- --/* To call with: -- Keccak_HashInstance hashInstance; -- Keccak_HashIUF_SHAKE*_XKCP(&hashInstance,data,databitlen); -- And after a call to Keccak_HashIUF_SHAKE*_XKCP, to use one or several times: -- Keccak_HashSqueeze(&hashInstance,output,outputbitlen); -- XXX Here, length in bits XXX --*/ --#define Keccak_HashIUF_SHAKE128_XKCP(hashInstance,data,databitlen) \ -- Keccak_HashInitialize_SHAKE128(hashInstance);\ -- Keccak_HashUpdate(hashInstance,data,databitlen);\ -- Keccak_HashFinal(hashInstance,0); --#define Keccak_HashIUF_SHAKE256_XKCP(hashInstance,data,databitlen) \ -- Keccak_HashInitialize_SHAKE256(hashInstance);\ -- Keccak_HashUpdate(hashInstance,data,databitlen);\ -- Keccak_HashFinal(hashInstance,0); -- --#define TUPLEHASH128_XKCP TupleHash128 --#define TUPLEHASH256_XKCP TupleHash256 -- -- --/************************************************************************/ --/* Macro to call the hash functions corresponding to the security level */ --/************************************************************************/ -+#include "fips202.h" - -+#define CHOICE_HASH_SHA3 - - /* Choice of the hash function */ - #if (K<=128) -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA256_OPENSSL_FIPS -- #else -- #define SHA2 SHA256_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_256_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_256 -- #endif --#elif (K<=192) -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA384_OPENSSL_FIPS -- #else -- #define SHA2 SHA384_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_384_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_384 -- #endif --#else -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA512_OPENSSL_FIPS -- #else -- #define SHA2 SHA512_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_512_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_512 -- #endif --#endif -- -- --/* Choice of SHAKE and TupleHash */ --#if (K<=128) -- #define SHAKE SHAKE128_XKCP -- #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE128_XKCP -- #define TUPLEHASH TUPLEHASH128_XKCP --#else -- #define SHAKE SHAKE256_XKCP -- #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE256_XKCP -- #define TUPLEHASH TUPLEHASH256_XKCP --#endif -- -- --/******************************************************************/ --/******** Macro to call the chosen hash function of MQsoft ********/ --/******************************************************************/ -- -- --#ifdef CHOICE_HASH_SHA1 -- #define HASH SHA1 --#elif defined(CHOICE_HASH_SHA2) -- #define HASH SHA2 --#else -- #define HASH SHA3 --#endif -- -- --/******************************************************************/ --/************************** Other tools ***************************/ --/******************************************************************/ -- -- --BEGIN_EXTERNC -- /* For KeccakWidth1600_Sponge */ -- #include --END_EXTERNC -- -- --#if (K<=80) -- #define SIZE_DIGEST 20 -- #define SIZE_DIGEST_UINT 3 --#elif (K<=128) -- #define SIZE_DIGEST 32 -- #define SIZE_DIGEST_UINT 4 -- #define SIZE_2_DIGEST 64 -- #define EQUALHASH_NOCST ISEQUAL4_NOCST -- #define COPYHASH COPY4 -+ #define HASH sha3_256 -+ #define SHAKE shake128 -+ #define SIZE_DIGEST 32 -+ #define SIZE_DIGEST_UINT 4 -+ #define SIZE_2_DIGEST 64 -+ #define EQUALHASH_NOCST ISEQUAL4_NOCST -+ #define COPYHASH COPY4 - #elif (K<=192) -- #define SIZE_DIGEST 48 -- #define SIZE_DIGEST_UINT 6 -- #define SIZE_2_DIGEST 96 -- #define EQUALHASH_NOCST ISEQUAL6_NOCST -- #define COPYHASH COPY6 --#else -- #define SIZE_DIGEST 64 -- #define SIZE_DIGEST_UINT 8 -- #define SIZE_2_DIGEST 128 -- #define EQUALHASH_NOCST ISEQUAL8_NOCST -- #define COPYHASH COPY8 -+ #define HASH sha3_384 -+ #define SHAKE shake256 -+ #define SIZE_DIGEST 48 -+ #define SIZE_DIGEST_UINT 6 -+ #define SIZE_2_DIGEST 96 -+ #define EQUALHASH_NOCST ISEQUAL6_NOCST -+ #define COPYHASH COPY6 -+#else -+ #define HASH sha3_512 -+ #define SHAKE shake256 -+ #define SIZE_DIGEST 64 -+ #define SIZE_DIGEST_UINT 8 -+ #define SIZE_2_DIGEST 128 -+ #define EQUALHASH_NOCST ISEQUAL8_NOCST -+ #define COPYHASH COPY8 - #endif - -- - #define EQUALHASH(a,b) f_ISEQUAL(a,b,SIZE_DIGEST_UINT) - -- --/* XXX Bytes XXX */ - #define expandSeed(output,outputByteLen,seed,seedByteLen) \ - SHAKE(output,outputByteLen,seed,seedByteLen) - -@@ -313,6 +42,4 @@ - #define expandSeedIUF Keccak_HashIUF_SHAKE - #define expandSeedSqueeze Keccak_HashSqueeze - -- - #endif -- - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h deleted file mode 100644 index 25b6547..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h +++ /dev/null @@ -1,32 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/init.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/init.h -@@ -116,23 +116,23 @@ - - #define SET1_2(c) \ - SET1_1(c);\ -- SET0_1(c+1); -+ SET0_1((c)+1); - - #define SET1_3(c) \ - SET1_1(c);\ -- SET0_2(c+1); -+ SET0_2((c)+1); - - #define SET1_4(c) \ - SET1_1(c);\ -- SET0_3(c+1); -+ SET0_3((c)+1); - - #define SET1_5(c) \ - SET1_1(c);\ -- SET0_4(c+1); -+ SET0_4((c)+1); - - #define SET1_6(c) \ - SET1_1(c);\ -- SET0_5(c+1); -+ SET0_5((c)+1); - - - #endif - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h deleted file mode 100644 index 213fc47..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h +++ /dev/null @@ -1,33 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/macro.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/macro.h -@@ -7,29 +7,5 @@ - /** This macro permits to concat the names. */ - #define CONCAT(a,b) CONCAT2(a,b) - -- --/** Print a name as a string. */ --#define PRINTF_NAME(name) puts(#name); --#define PRINTF_NAME1(name) PRINTF_NAME(name) --#define PRINTF_NAME2(name) PRINTF_NAME1(name) -- -- --/** Compute Floor(a/b) with a and b positive integers, a can be zero. */ --#define DIV_FLOOR(a,b) ((a)/(b)) --#define DIV_CEIL1(a,b) (((a)/(b))+(((a)%(b))?1:0)) --/* Faster but overflow if (a+b-1) >= 2^x for x=size_of_the_type_in_bits */ --#define DIV_CEIL2(a,b) (((a)+(b)-1)/(b)) --/* Faster but incorrect only when a == 0 and b>1 */ --#define DIV_CEIL3(a,b) ((((a)-1)/(b))+1) --/** Compute Ceiling(a/b) with a and b positive integers, a can be zero. */ --#define DIV_CEIL DIV_CEIL2 -- -- --/** Return the minimum. */ --#define MINI(a,b) (((a)<(b))?(a):(b)) --/** Return the maximum. */ --#define MAXI(a,b) (((a)>(b))?(a):(b)) -- -- - #endif - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h deleted file mode 100644 index d580554..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h +++ /dev/null @@ -1,16 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/mul_gf2n.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/mul_gf2n.h -@@ -79,11 +79,7 @@ - - - /* Function mul in GF(2^x), then modular reduction */ --#define MUL_THEN_REM_GF2N void \ -- PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn], \ -- const uint64_t B[NB_WORD_GFqn]) --MUL_THEN_REM_GF2N; -+void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn]); - #define mul_then_rem_gf2n PREFIX_NAME(mul_then_rem_gf2n) - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h deleted file mode 100644 index d2d6c57..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h +++ /dev/null @@ -1,4798 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/mul_gf2x.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/mul_gf2x.h -@@ -76,9 +76,9 @@ - /* Classical, 189 = 31×6+3 instructions */ - #define MUL32_NO_UNROLLED_NO_SIMD_GF2X(C,A,B,i) \ - (C)=(-((B)&ONE64))&(A);\ -- for(i=1;i<32;++i)\ -+ for((i)=1;(i)<32;++(i))\ - {\ -- (C)^=((-(((B)>>i)&ONE64))&(A))<>(i))&ONE64))&(A))<<(i);\ - } - - -@@ -123,9 +123,9 @@ - (C)=(-((B)&ONE64))&(A);\ - /* Optimization: the '&1' is removed */\ - (C)^=((-((B)>>63))&(A))<<63;\ -- for(i=1;i<63;++i)\ -+ for((i)=1;(i)<63;++(i))\ - {\ -- (C)^=((-(((B)>>i)&ONE64))&(A))<>(i))&ONE64))&(A))<<(i);\ - } - - -@@ -203,14 +203,14 @@ - #define MUL64_NO_UNROLLED_NO_SIMD_GF2X(C,A,B,i,tmp) \ - (C)[0]=(-((B)&ONE64))&(A);\ - /* Optimization: the '&1' is removed */\ -- tmp=((-((B)>>63))&(A));\ -- (C)[0]^=tmp<<63;\ -- (C)[1]=tmp>>1;\ -- for(i=1;i<63;++i)\ -+ (tmp)=((-((B)>>63))&(A));\ -+ (C)[0]^=(tmp)<<63;\ -+ (C)[1]=(tmp)>>1;\ -+ for((i)=1;(i)<63;++(i))\ - {\ -- tmp=((-(((B)>>i)&ONE64))&(A));\ -- (C)[0]^=tmp<>(64-i);\ -+ (tmp)=((-(((B)>>(i))&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<(i);\ -+ (C)[1]^=(tmp)>>(64-(i));\ - } - - -@@ -218,197 +218,197 @@ - #define MUL64_NO_SIMD_GF2X(C,A,B,tmp) \ - (C)[0]=(-((B)&ONE64))&(A);\ - /* Optimization: the '&1' is removed */\ -- tmp=((-((B)>>63))&(A));\ -- (C)[0]^=tmp<<63;\ -- (C)[1]=tmp>>1;\ --\ -- tmp=((-(((B)>>1)&ONE64))&(A));\ -- (C)[0]^=tmp<<1;\ -- (C)[1]^=tmp>>63;\ -- tmp=((-(((B)>>2)&ONE64))&(A));\ -- (C)[0]^=tmp<<2;\ -- (C)[1]^=tmp>>62;\ -- tmp=((-(((B)>>3)&ONE64))&(A));\ -- (C)[0]^=tmp<<3;\ -- (C)[1]^=tmp>>61;\ -- tmp=((-(((B)>>4)&ONE64))&(A));\ -- (C)[0]^=tmp<<4;\ -- (C)[1]^=tmp>>60;\ -- tmp=((-(((B)>>5)&ONE64))&(A));\ -- (C)[0]^=tmp<<5;\ -- (C)[1]^=tmp>>59;\ -- tmp=((-(((B)>>6)&ONE64))&(A));\ -- (C)[0]^=tmp<<6;\ -- (C)[1]^=tmp>>58;\ -- tmp=((-(((B)>>7)&ONE64))&(A));\ -- (C)[0]^=tmp<<7;\ -- (C)[1]^=tmp>>57;\ -- tmp=((-(((B)>>8)&ONE64))&(A));\ -- (C)[0]^=tmp<<8;\ -- (C)[1]^=tmp>>56;\ -- tmp=((-(((B)>>9)&ONE64))&(A));\ -- (C)[0]^=tmp<<9;\ -- (C)[1]^=tmp>>55;\ -- tmp=((-(((B)>>10)&ONE64))&(A));\ -- (C)[0]^=tmp<<10;\ -- (C)[1]^=tmp>>54;\ -- tmp=((-(((B)>>11)&ONE64))&(A));\ -- (C)[0]^=tmp<<11;\ -- (C)[1]^=tmp>>53;\ -- tmp=((-(((B)>>12)&ONE64))&(A));\ -- (C)[0]^=tmp<<12;\ -- (C)[1]^=tmp>>52;\ -- tmp=((-(((B)>>13)&ONE64))&(A));\ -- (C)[0]^=tmp<<13;\ -- (C)[1]^=tmp>>51;\ -- tmp=((-(((B)>>14)&ONE64))&(A));\ -- (C)[0]^=tmp<<14;\ -- (C)[1]^=tmp>>50;\ -- tmp=((-(((B)>>15)&ONE64))&(A));\ -- (C)[0]^=tmp<<15;\ -- (C)[1]^=tmp>>49;\ -- tmp=((-(((B)>>16)&ONE64))&(A));\ -- (C)[0]^=tmp<<16;\ -- (C)[1]^=tmp>>48;\ -- tmp=((-(((B)>>17)&ONE64))&(A));\ -- (C)[0]^=tmp<<17;\ -- (C)[1]^=tmp>>47;\ -- tmp=((-(((B)>>18)&ONE64))&(A));\ -- (C)[0]^=tmp<<18;\ -- (C)[1]^=tmp>>46;\ -- tmp=((-(((B)>>19)&ONE64))&(A));\ -- (C)[0]^=tmp<<19;\ -- (C)[1]^=tmp>>45;\ -- tmp=((-(((B)>>20)&ONE64))&(A));\ -- (C)[0]^=tmp<<20;\ -- (C)[1]^=tmp>>44;\ -- tmp=((-(((B)>>21)&ONE64))&(A));\ -- (C)[0]^=tmp<<21;\ -- (C)[1]^=tmp>>43;\ -- tmp=((-(((B)>>22)&ONE64))&(A));\ -- (C)[0]^=tmp<<22;\ -- (C)[1]^=tmp>>42;\ -- tmp=((-(((B)>>23)&ONE64))&(A));\ -- (C)[0]^=tmp<<23;\ -- (C)[1]^=tmp>>41;\ -- tmp=((-(((B)>>24)&ONE64))&(A));\ -- (C)[0]^=tmp<<24;\ -- (C)[1]^=tmp>>40;\ -- tmp=((-(((B)>>25)&ONE64))&(A));\ -- (C)[0]^=tmp<<25;\ -- (C)[1]^=tmp>>39;\ -- tmp=((-(((B)>>26)&ONE64))&(A));\ -- (C)[0]^=tmp<<26;\ -- (C)[1]^=tmp>>38;\ -- tmp=((-(((B)>>27)&ONE64))&(A));\ -- (C)[0]^=tmp<<27;\ -- (C)[1]^=tmp>>37;\ -- tmp=((-(((B)>>28)&ONE64))&(A));\ -- (C)[0]^=tmp<<28;\ -- (C)[1]^=tmp>>36;\ -- tmp=((-(((B)>>29)&ONE64))&(A));\ -- (C)[0]^=tmp<<29;\ -- (C)[1]^=tmp>>35;\ -- tmp=((-(((B)>>30)&ONE64))&(A));\ -- (C)[0]^=tmp<<30;\ -- (C)[1]^=tmp>>34;\ -- tmp=((-(((B)>>31)&ONE64))&(A));\ -- (C)[0]^=tmp<<31;\ -- (C)[1]^=tmp>>33;\ --\ -- tmp=((-(((B)>>32)&ONE64))&(A));\ -- (C)[0]^=tmp<<32;\ -- (C)[1]^=tmp>>32;\ -- tmp=((-(((B)>>33)&ONE64))&(A));\ -- (C)[0]^=tmp<<33;\ -- (C)[1]^=tmp>>31;\ -- tmp=((-(((B)>>34)&ONE64))&(A));\ -- (C)[0]^=tmp<<34;\ -- (C)[1]^=tmp>>30;\ -- tmp=((-(((B)>>35)&ONE64))&(A));\ -- (C)[0]^=tmp<<35;\ -- (C)[1]^=tmp>>29;\ -- tmp=((-(((B)>>36)&ONE64))&(A));\ -- (C)[0]^=tmp<<36;\ -- (C)[1]^=tmp>>28;\ -- tmp=((-(((B)>>37)&ONE64))&(A));\ -- (C)[0]^=tmp<<37;\ -- (C)[1]^=tmp>>27;\ -- tmp=((-(((B)>>38)&ONE64))&(A));\ -- (C)[0]^=tmp<<38;\ -- (C)[1]^=tmp>>26;\ -- tmp=((-(((B)>>39)&ONE64))&(A));\ -- (C)[0]^=tmp<<39;\ -- (C)[1]^=tmp>>25;\ -- tmp=((-(((B)>>40)&ONE64))&(A));\ -- (C)[0]^=tmp<<40;\ -- (C)[1]^=tmp>>24;\ -- tmp=((-(((B)>>41)&ONE64))&(A));\ -- (C)[0]^=tmp<<41;\ -- (C)[1]^=tmp>>23;\ -- tmp=((-(((B)>>42)&ONE64))&(A));\ -- (C)[0]^=tmp<<42;\ -- (C)[1]^=tmp>>22;\ -- tmp=((-(((B)>>43)&ONE64))&(A));\ -- (C)[0]^=tmp<<43;\ -- (C)[1]^=tmp>>21;\ -- tmp=((-(((B)>>44)&ONE64))&(A));\ -- (C)[0]^=tmp<<44;\ -- (C)[1]^=tmp>>20;\ -- tmp=((-(((B)>>45)&ONE64))&(A));\ -- (C)[0]^=tmp<<45;\ -- (C)[1]^=tmp>>19;\ -- tmp=((-(((B)>>46)&ONE64))&(A));\ -- (C)[0]^=tmp<<46;\ -- (C)[1]^=tmp>>18;\ -- tmp=((-(((B)>>47)&ONE64))&(A));\ -- (C)[0]^=tmp<<47;\ -- (C)[1]^=tmp>>17;\ -- tmp=((-(((B)>>48)&ONE64))&(A));\ -- (C)[0]^=tmp<<48;\ -- (C)[1]^=tmp>>16;\ -- tmp=((-(((B)>>49)&ONE64))&(A));\ -- (C)[0]^=tmp<<49;\ -- (C)[1]^=tmp>>15;\ -- tmp=((-(((B)>>50)&ONE64))&(A));\ -- (C)[0]^=tmp<<50;\ -- (C)[1]^=tmp>>14;\ -- tmp=((-(((B)>>51)&ONE64))&(A));\ -- (C)[0]^=tmp<<51;\ -- (C)[1]^=tmp>>13;\ -- tmp=((-(((B)>>52)&ONE64))&(A));\ -- (C)[0]^=tmp<<52;\ -- (C)[1]^=tmp>>12;\ -- tmp=((-(((B)>>53)&ONE64))&(A));\ -- (C)[0]^=tmp<<53;\ -- (C)[1]^=tmp>>11;\ -- tmp=((-(((B)>>54)&ONE64))&(A));\ -- (C)[0]^=tmp<<54;\ -- (C)[1]^=tmp>>10;\ -- tmp=((-(((B)>>55)&ONE64))&(A));\ -- (C)[0]^=tmp<<55;\ -- (C)[1]^=tmp>>9;\ -- tmp=((-(((B)>>56)&ONE64))&(A));\ -- (C)[0]^=tmp<<56;\ -- (C)[1]^=tmp>>8;\ -- tmp=((-(((B)>>57)&ONE64))&(A));\ -- (C)[0]^=tmp<<57;\ -- (C)[1]^=tmp>>7;\ -- tmp=((-(((B)>>58)&ONE64))&(A));\ -- (C)[0]^=tmp<<58;\ -- (C)[1]^=tmp>>6;\ -- tmp=((-(((B)>>59)&ONE64))&(A));\ -- (C)[0]^=tmp<<59;\ -- (C)[1]^=tmp>>5;\ -- tmp=((-(((B)>>60)&ONE64))&(A));\ -- (C)[0]^=tmp<<60;\ -- (C)[1]^=tmp>>4;\ -- tmp=((-(((B)>>61)&ONE64))&(A));\ -- (C)[0]^=tmp<<61;\ -- (C)[1]^=tmp>>3;\ -- tmp=((-(((B)>>62)&ONE64))&(A));\ -- (C)[0]^=tmp<<62;\ -- (C)[1]^=tmp>>2; -+ (tmp)=((-((B)>>63))&(A));\ -+ (C)[0]^=(tmp)<<63;\ -+ (C)[1]=(tmp)>>1;\ -+\ -+ (tmp)=((-(((B)>>1)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<1;\ -+ (C)[1]^=(tmp)>>63;\ -+ (tmp)=((-(((B)>>2)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<2;\ -+ (C)[1]^=(tmp)>>62;\ -+ (tmp)=((-(((B)>>3)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<3;\ -+ (C)[1]^=(tmp)>>61;\ -+ (tmp)=((-(((B)>>4)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<4;\ -+ (C)[1]^=(tmp)>>60;\ -+ (tmp)=((-(((B)>>5)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<5;\ -+ (C)[1]^=(tmp)>>59;\ -+ (tmp)=((-(((B)>>6)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<6;\ -+ (C)[1]^=(tmp)>>58;\ -+ (tmp)=((-(((B)>>7)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<7;\ -+ (C)[1]^=(tmp)>>57;\ -+ (tmp)=((-(((B)>>8)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<8;\ -+ (C)[1]^=(tmp)>>56;\ -+ (tmp)=((-(((B)>>9)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<9;\ -+ (C)[1]^=(tmp)>>55;\ -+ (tmp)=((-(((B)>>10)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<10;\ -+ (C)[1]^=(tmp)>>54;\ -+ (tmp)=((-(((B)>>11)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<11;\ -+ (C)[1]^=(tmp)>>53;\ -+ (tmp)=((-(((B)>>12)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<12;\ -+ (C)[1]^=(tmp)>>52;\ -+ (tmp)=((-(((B)>>13)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<13;\ -+ (C)[1]^=(tmp)>>51;\ -+ (tmp)=((-(((B)>>14)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<14;\ -+ (C)[1]^=(tmp)>>50;\ -+ (tmp)=((-(((B)>>15)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<15;\ -+ (C)[1]^=(tmp)>>49;\ -+ (tmp)=((-(((B)>>16)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<16;\ -+ (C)[1]^=(tmp)>>48;\ -+ (tmp)=((-(((B)>>17)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<17;\ -+ (C)[1]^=(tmp)>>47;\ -+ (tmp)=((-(((B)>>18)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<18;\ -+ (C)[1]^=(tmp)>>46;\ -+ (tmp)=((-(((B)>>19)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<19;\ -+ (C)[1]^=(tmp)>>45;\ -+ (tmp)=((-(((B)>>20)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<20;\ -+ (C)[1]^=(tmp)>>44;\ -+ (tmp)=((-(((B)>>21)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<21;\ -+ (C)[1]^=(tmp)>>43;\ -+ (tmp)=((-(((B)>>22)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<22;\ -+ (C)[1]^=(tmp)>>42;\ -+ (tmp)=((-(((B)>>23)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<23;\ -+ (C)[1]^=(tmp)>>41;\ -+ (tmp)=((-(((B)>>24)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<24;\ -+ (C)[1]^=(tmp)>>40;\ -+ (tmp)=((-(((B)>>25)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<25;\ -+ (C)[1]^=(tmp)>>39;\ -+ (tmp)=((-(((B)>>26)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<26;\ -+ (C)[1]^=(tmp)>>38;\ -+ (tmp)=((-(((B)>>27)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<27;\ -+ (C)[1]^=(tmp)>>37;\ -+ (tmp)=((-(((B)>>28)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<28;\ -+ (C)[1]^=(tmp)>>36;\ -+ (tmp)=((-(((B)>>29)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<29;\ -+ (C)[1]^=(tmp)>>35;\ -+ (tmp)=((-(((B)>>30)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<30;\ -+ (C)[1]^=(tmp)>>34;\ -+ (tmp)=((-(((B)>>31)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<31;\ -+ (C)[1]^=(tmp)>>33;\ -+\ -+ (tmp)=((-(((B)>>32)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<32;\ -+ (C)[1]^=(tmp)>>32;\ -+ (tmp)=((-(((B)>>33)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<33;\ -+ (C)[1]^=(tmp)>>31;\ -+ (tmp)=((-(((B)>>34)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<34;\ -+ (C)[1]^=(tmp)>>30;\ -+ (tmp)=((-(((B)>>35)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<35;\ -+ (C)[1]^=(tmp)>>29;\ -+ (tmp)=((-(((B)>>36)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<36;\ -+ (C)[1]^=(tmp)>>28;\ -+ (tmp)=((-(((B)>>37)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<37;\ -+ (C)[1]^=(tmp)>>27;\ -+ (tmp)=((-(((B)>>38)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<38;\ -+ (C)[1]^=(tmp)>>26;\ -+ (tmp)=((-(((B)>>39)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<39;\ -+ (C)[1]^=(tmp)>>25;\ -+ (tmp)=((-(((B)>>40)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<40;\ -+ (C)[1]^=(tmp)>>24;\ -+ (tmp)=((-(((B)>>41)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<41;\ -+ (C)[1]^=(tmp)>>23;\ -+ (tmp)=((-(((B)>>42)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<42;\ -+ (C)[1]^=(tmp)>>22;\ -+ (tmp)=((-(((B)>>43)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<43;\ -+ (C)[1]^=(tmp)>>21;\ -+ (tmp)=((-(((B)>>44)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<44;\ -+ (C)[1]^=(tmp)>>20;\ -+ (tmp)=((-(((B)>>45)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<45;\ -+ (C)[1]^=(tmp)>>19;\ -+ (tmp)=((-(((B)>>46)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<46;\ -+ (C)[1]^=(tmp)>>18;\ -+ (tmp)=((-(((B)>>47)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<47;\ -+ (C)[1]^=(tmp)>>17;\ -+ (tmp)=((-(((B)>>48)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<48;\ -+ (C)[1]^=(tmp)>>16;\ -+ (tmp)=((-(((B)>>49)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<49;\ -+ (C)[1]^=(tmp)>>15;\ -+ (tmp)=((-(((B)>>50)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<50;\ -+ (C)[1]^=(tmp)>>14;\ -+ (tmp)=((-(((B)>>51)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<51;\ -+ (C)[1]^=(tmp)>>13;\ -+ (tmp)=((-(((B)>>52)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<52;\ -+ (C)[1]^=(tmp)>>12;\ -+ (tmp)=((-(((B)>>53)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<53;\ -+ (C)[1]^=(tmp)>>11;\ -+ (tmp)=((-(((B)>>54)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<54;\ -+ (C)[1]^=(tmp)>>10;\ -+ (tmp)=((-(((B)>>55)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<55;\ -+ (C)[1]^=(tmp)>>9;\ -+ (tmp)=((-(((B)>>56)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<56;\ -+ (C)[1]^=(tmp)>>8;\ -+ (tmp)=((-(((B)>>57)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<57;\ -+ (C)[1]^=(tmp)>>7;\ -+ (tmp)=((-(((B)>>58)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<58;\ -+ (C)[1]^=(tmp)>>6;\ -+ (tmp)=((-(((B)>>59)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<59;\ -+ (C)[1]^=(tmp)>>5;\ -+ (tmp)=((-(((B)>>60)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<60;\ -+ (C)[1]^=(tmp)>>4;\ -+ (tmp)=((-(((B)>>61)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<61;\ -+ (C)[1]^=(tmp)>>3;\ -+ (tmp)=((-(((B)>>62)&ONE64))&(A));\ -+ (C)[0]^=(tmp)<<62;\ -+ (C)[1]^=(tmp)>>2; - - - /* Karatsuba, 1205 = 505*2+189+6 instructions */ -@@ -422,8 +422,8 @@ - /* C[0] = C0 - C[1] = C0^C1^C2 - C[2] = C1^C2 */\ -- AA=(A)[0]^(A)[1];\ -- BB=(B)[0]^(B)[1];\ -+ (AA)=(A)[0]^(A)[1];\ -+ (BB)=(B)[0]^(B)[1];\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[1]^=RESERVED_BUF2[0];\ - (C)[2]^=RESERVED_BUF2[1];} -@@ -442,8 +442,8 @@ - C[1] = C0^C1^C2 - C[2] = C1^C2^C3 - C[3] = C3 */\ -- AA=(A)[0]^(A)[1];\ -- BB=(B)[0]^(B)[1];\ -+ (AA)=(A)[0]^(A)[1];\ -+ (BB)=(B)[0]^(B)[1];\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[1]^=RESERVED_BUF2[0];\ - (C)[2]^=RESERVED_BUF2[1];} -@@ -475,20 +475,20 @@ - C[2] = (C0^C1^C2)^(C3^C4) - C[3] = (C1^C2)^(C3^C4) - C[4] = C3^C4 */\ -- AA=(A)[0]^(A)[1];\ -- BB=(B)[0]^(B)[1];\ -+ (AA)=(A)[0]^(A)[1];\ -+ (BB)=(B)[0]^(B)[1];\ - /* (A0+A1)*(B0+B1) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[1]^=RESERVED_BUF2[0];\ - (C)[2]^=RESERVED_BUF2[1];\ -- AA=(A)[1]^(A)[2];\ -- BB=(B)[1]^(B)[2];\ -+ (AA)=(A)[1]^(A)[2];\ -+ (BB)=(B)[1]^(B)[2];\ - /* (A1+A2)*(B1+B2) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[3]^=RESERVED_BUF2[0];\ - (C)[4]^=RESERVED_BUF2[1];\ -- AA=(A)[0]^(A)[2];\ -- BB=(B)[0]^(B)[2];\ -+ (AA)=(A)[0]^(A)[2];\ -+ (BB)=(B)[0]^(B)[2];\ - /* (A0+A2)*(B0+B2) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[2]^=RESERVED_BUF2[0];\ -@@ -524,20 +524,20 @@ - C[3] = (C1^C2)^(C3^C4^C5) - C[4] = (C3^C4)^C5 - C[5] = C5 */\ -- AA=(A)[0]^(A)[1];\ -- BB=(B)[0]^(B)[1];\ -+ (AA)=(A)[0]^(A)[1];\ -+ (BB)=(B)[0]^(B)[1];\ - /* (A0+A1)*(B0+B1) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[1]^=RESERVED_BUF2[0];\ - (C)[2]^=RESERVED_BUF2[1];\ -- AA=(A)[1]^(A)[2];\ -- BB=(B)[1]^(B)[2];\ -+ (AA)=(A)[1]^(A)[2];\ -+ (BB)=(B)[1]^(B)[2];\ - /* (A1+A2)*(B1+B2) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[3]^=RESERVED_BUF2[0];\ - (C)[4]^=RESERVED_BUF2[1];\ -- AA=(A)[0]^(A)[2];\ -- BB=(B)[0]^(B)[2];\ -+ (AA)=(A)[0]^(A)[2];\ -+ (BB)=(B)[0]^(B)[2];\ - /* (A0+A2)*(B0+B2) */\ - MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\ - (C)[2]^=RESERVED_BUF2[0];\ -@@ -998,15 +998,15 @@ - /* ~ 2*8 = 16 instructions */ - /* Classical, 31 = 9+10+12 instructions */ - #define PMUL16_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one16) \ -- RA=PSET1_16(A0);\ -- RB=PSET_16((B0)>>14,(B0)>>10,(B0)>>6,(B0)>>2,(B0)>>12,(B0)>>8,(B0)>>4,B0);\ -+ (RA)=PSET1_16(A0);\ -+ (RB)=PSET_16((B0)>>14,(B0)>>10,(B0)>>6,(B0)>>2,(B0)>>12,(B0)>>8,(B0)>>4,B0);\ - \ -- CL=PAND_(RA,PMASK16_ONE(PAND_(RB,one16),one16));\ -- M=PAND_(RA,PMASK16_ONE(PAND_(PSRLI_16(RB,1),one16),one16));\ -+ (CL)=PAND_(RA,PMASK16_ONE(PAND_(RB,one16),one16));\ -+ (M)=PAND_(RA,PMASK16_ONE(PAND_(PSRLI_16(RB,1),one16),one16));\ - PXOR1_2(CL,PSLLI_16(M,1));\ -- CH=PSRLI_16(M,15);\ -+ (CH)=PSRLI_16(M,15);\ - \ -- C1=PXOR_(PUNPACKLO_16(CL,CH),PSLLI_32(PUNPACKHI_16(CL,CH),2));\ -+ (C1)=PXOR_(PUNPACKLO_16(CL,CH),PSLLI_32(PUNPACKHI_16(CL,CH),2));\ - PXOR1_2(C1,PSRLI_128(C1,7));\ - PXOR1_2(C1,PSLLI_32(PSRLI_128(C1,4),4));\ - PAND1_2(C1,PSET_32(0,0,0,MONE32)); -@@ -1015,37 +1015,37 @@ - /* ~ 4*6 = 24 instructions */ - /* Classical, 33 = 5+21+7 instructions */ - #define PMUL16_WS_CLAS0_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32) \ -- RA=PSET1_32(A0);\ -- RB=PSET_32((B0)>>12,(B0)>>4,(B0)>>8,B0);\ -+ (RA)=PSET1_32(A0);\ -+ (RB)=PSET_32((B0)>>12,(B0)>>4,(B0)>>8,B0);\ - \ -- CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\ -+ (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,1));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,2));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,3));\ - \ -- CH=PXOR_(CL,PSRLI_128(PSLLI_32(CL,4),8));\ -- C1=PXOR_(CH,PSRLI_128(CH,3));\ -+ (CH)=PXOR_(CL,PSRLI_128(PSLLI_32(CL,4),8));\ -+ (C1)=PXOR_(CH,PSRLI_128(CH,3));\ - PAND1_2(C1,PSET_32(0,0,0,MONE32)); - - - /* ~ 8*8 = 64 instructions */ - /* Classical, 72 = 5+59+8 instructions */ - #define PMUL32_NO_UNROLLED_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32,i) \ -- RA=PSET1_32(A0);\ -- RB=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\ -+ (RA)=PSET1_32(A0);\ -+ (RB)=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\ - \ -- CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -- CH=PSETZERO();\ -- for(i=1;i<8;++i)\ -+ (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -+ (CH)=PSETZERO();\ -+ for((i)=1;(i)<8;++(i))\ - {\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,i));\ -- PXOR1_2(CH,PSRLI_32(M,32-i));\ -+ PXOR1_2(CH,PSRLI_32(M,32-(i)));\ - }\ -- C1=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\ -+ (C1)=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\ - PXOR1_2(C1,PSRLI_128(C1,6));\ - PAND1_2(C1,PSET_64(0,MONE64)); - -@@ -1054,34 +1054,34 @@ - /* Classical, 71 = 5+58+8 instructions */ - /* The fastest method */ - #define PMUL32_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32) \ -- RA=PSET1_32(A0);\ -- RB=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\ -+ (RA)=PSET1_32(A0);\ -+ (RB)=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\ - \ -- CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\ -+ (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,1));\ -- CH=PSRLI_32(M,31);\ -+ (CH)=PSRLI_32(M,31);\ - \ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,2));\ - PXOR1_2(CH,PSRLI_32(M,30));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,3));\ - PXOR1_2(CH,PSRLI_32(M,29));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,4));\ - PXOR1_2(CH,PSRLI_32(M,28));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,5));\ - PXOR1_2(CH,PSRLI_32(M,27));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,6));\ - PXOR1_2(CH,PSRLI_32(M,26));\ -- M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32));\ -+ (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32));\ - PXOR1_2(CL,PSLLI_32(M,7));\ - PXOR1_2(CH,PSRLI_32(M,25));\ - \ -- C1=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\ -+ (C1)=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\ - PXOR1_2(C1,PSRLI_128(C1,6));\ - PAND1_2(C1,PSET_64(0,MONE64)); - -@@ -1091,11 +1091,11 @@ - #define PMUL32_WS_KAR_CLAS16_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one16) \ - PMUL16_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one16);\ - PMUL16_WS_CLAS_GF2X(M,(A0)>>16,(B0)>>16,RA,RB,CL,CH,M,one16);\ -- CM=PXOR_(C1,M);\ -+ (CM)=PXOR_(C1,M);\ - PXOR1_2(C1,PSLLI_128(M,4));\ - PMUL16_WS_CLAS_GF2X(M,(A0)^((A0)>>16),(B0)^((B0)>>16),RA,RB,CL,CH,M,one16);\ - \ -- M=PXOR_(CM,M);\ -+ (M)=PXOR_(CM,M);\ - PXOR1_2(C1,PSLLI_128(M,2)); - - -@@ -1104,165 +1104,165 @@ - #define PMUL32_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one32) \ - PMUL16_WS_CLAS0_GF2X(C1,(A0)&MONE16,B0,RA,RB,CL,CH,M,one32);\ - PMUL16_WS_CLAS0_GF2X(M,((A0)>>16)&MONE16,(B0)>>16,RA,RB,CL,CH,M,one32);\ -- CM=PXOR_(C1,M);\ -+ (CM)=PXOR_(C1,M);\ - PXOR1_2(C1,PSLLI_128(M,4));\ - PMUL16_WS_CLAS0_GF2X(M,((A0)^((A0)>>16))&MONE16,(B0)^((B0)>>16),\ - RA,RB,CL,CH,M,one32);\ - \ -- M=PXOR_(CM,M);\ -+ (M)=PXOR_(CM,M);\ - PXOR1_2(C1,PSLLI_128(M,2)); - - - /* ~ 32*8 = 256 instructions */ - /* Classical, 258 = 3+251+4 instructions */ - #define PMUL64_NO_UNROLLED_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one64,i) \ -- RA=PSET1_64(A0);\ -- RB=PSET_64((B0)>>32,B0);\ -+ (RA)=PSET1_64(A0);\ -+ (RB)=PSET_64((B0)>>32,B0);\ - \ -- CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -- CH=PSETZERO();\ -- for(i=1;i<32;++i)\ -+ (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -+ (CH)=PSETZERO();\ -+ for((i)=1;(i)<32;++(i))\ - {\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,i));\ -- PXOR1_2(CH,PSRLI_64(M,64-i));\ -+ PXOR1_2(CH,PSRLI_64(M,64-(i)));\ - }\ -- C1=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4)); -+ (C1)=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4)); - - - /* ~ 32*8 = 256 instructions */ - /* Classical, 257 = 3+250+4 instructions */ - #define PMUL64_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one64) \ -- RA=PSET1_64(A0);\ -- RB=PSET_64((B0)>>32,B0);\ -+ (RA)=PSET1_64(A0);\ -+ (RB)=PSET_64((B0)>>32,B0);\ - \ -- CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\ -+ (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,1));\ -- CH=PSRLI_64(M,63);\ -+ (CH)=PSRLI_64(M,63);\ - \ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,2));\ - PXOR1_2(CH,PSRLI_64(M,62));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,3));\ - PXOR1_2(CH,PSRLI_64(M,61));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,4));\ - PXOR1_2(CH,PSRLI_64(M,60));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,5));\ - PXOR1_2(CH,PSRLI_64(M,59));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,6));\ - PXOR1_2(CH,PSRLI_64(M,58));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,7));\ - PXOR1_2(CH,PSRLI_64(M,57));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,8));\ - PXOR1_2(CH,PSRLI_64(M,56));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,9));\ - PXOR1_2(CH,PSRLI_64(M,55));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,10));\ - PXOR1_2(CH,PSRLI_64(M,54));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,11));\ - PXOR1_2(CH,PSRLI_64(M,53));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,12));\ - PXOR1_2(CH,PSRLI_64(M,52));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,13));\ - PXOR1_2(CH,PSRLI_64(M,51));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,14));\ - PXOR1_2(CH,PSRLI_64(M,50));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,15));\ - PXOR1_2(CH,PSRLI_64(M,49));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,16));\ - PXOR1_2(CH,PSRLI_64(M,48));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,17));\ - PXOR1_2(CH,PSRLI_64(M,47));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,18));\ - PXOR1_2(CH,PSRLI_64(M,46));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,19));\ - PXOR1_2(CH,PSRLI_64(M,45));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,20));\ - PXOR1_2(CH,PSRLI_64(M,44));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,21));\ - PXOR1_2(CH,PSRLI_64(M,43));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,22));\ - PXOR1_2(CH,PSRLI_64(M,42));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,23));\ - PXOR1_2(CH,PSRLI_64(M,41));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,24));\ - PXOR1_2(CH,PSRLI_64(M,40));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,25));\ - PXOR1_2(CH,PSRLI_64(M,39));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,26));\ - PXOR1_2(CH,PSRLI_64(M,38));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,27));\ - PXOR1_2(CH,PSRLI_64(M,37));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,28));\ - PXOR1_2(CH,PSRLI_64(M,36));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,29));\ - PXOR1_2(CH,PSRLI_64(M,35));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,30));\ - PXOR1_2(CH,PSRLI_64(M,34));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,31));\ - PXOR1_2(CH,PSRLI_64(M,33));\ - \ -- C1=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4)); -+ (C1)=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4)); - - - /* ~ 16*13 = 208 instructions */ - /* Classical, 220 = 7+199+14 instructions */ - #define PMUL64_NO_UNROLLED_WS_CLAS2_GF2X(C1,A0,B0,RA,RAM,RB,CL,CH,CLM,CHM,M,\ - one32,i) \ -- RA=PSET_32((A0)>>32,(A0)>>32,A0,A0);\ -- RAM=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\ -- RB=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\ --\ -- C1=PMASK32_ONE(PAND_(RB,one32),one32);\ -- CL=PAND_(RA,C1);\ -- CH=PSETZERO();\ -- CLM=PAND_(RAM,C1);\ -- CHM=PSETZERO();\ -- for(i=1;i<16;++i)\ -+ (RA)=PSET_32((A0)>>32,(A0)>>32,A0,A0);\ -+ (RAM)=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\ -+ (RB)=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\ -+\ -+ (C1)=PMASK32_ONE(PAND_(RB,one32),one32);\ -+ (CL)=PAND_(RA,C1);\ -+ (CH)=PSETZERO();\ -+ (CLM)=PAND_(RAM,C1);\ -+ (CHM)=PSETZERO();\ -+ for((i)=1;(i)<16;++(i))\ - {\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,i));\ -- PXOR1_2(CH,PSRLI_32(M,32-i));\ -- M=PAND_(RAM,C1);\ -+ PXOR1_2(CH,PSRLI_32(M,32-(i)));\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,i));\ -- PXOR1_2(CHM,PSRLI_32(M,32-i));\ -+ PXOR1_2(CHM,PSRLI_32(M,32-(i)));\ - }\ - \ -- RAM=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\ -- CLM=PUNPACKLO_32(CL,CH);\ -- CHM=PUNPACKHI_32(CL,CH);\ -- C1=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\ -+ (RAM)=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\ -+ (CLM)=PUNPACKLO_32(CL,CH);\ -+ (CHM)=PUNPACKHI_32(CL,CH);\ -+ (C1)=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\ - PXOR1_2(C1,\ - PSLLI_128(PAND_(PXOR_(RAM,PSRLI_128(RAM,6)),PSET_64(0,MONE64)),4)); - -@@ -1271,124 +1271,124 @@ - /* Classical, 219 = 7+198+14 instructions */ - /* The fastest method */ - #define PMUL64_WS_CLAS2_GF2X(C1,A0,B0,RA,RAM,RB,CL,CH,CLM,CHM,M,one32) \ -- RA=PSET_32((A0)>>32,(A0)>>32,A0,A0);\ -- RAM=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\ -- RB=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\ --\ -- C1=PMASK32_ONE(PAND_(RB,one32),one32);\ -- CL=PAND_(RA,C1);\ -- CLM=PAND_(RAM,C1);\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (RA)=PSET_32((A0)>>32,(A0)>>32,A0,A0);\ -+ (RAM)=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\ -+ (RB)=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\ -+\ -+ (C1)=PMASK32_ONE(PAND_(RB,one32),one32);\ -+ (CL)=PAND_(RA,C1);\ -+ (CLM)=PAND_(RAM,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,1));\ -- CH=PSRLI_32(M,31);\ -- M=PAND_(RAM,C1);\ -+ (CH)=PSRLI_32(M,31);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,1));\ -- CHM=PSRLI_32(M,31);\ -+ (CHM)=PSRLI_32(M,31);\ - \ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,2));\ - PXOR1_2(CH,PSRLI_32(M,30));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,2));\ - PXOR1_2(CHM,PSRLI_32(M,30));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,3));\ - PXOR1_2(CH,PSRLI_32(M,29));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,3));\ - PXOR1_2(CHM,PSRLI_32(M,29));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,4));\ - PXOR1_2(CH,PSRLI_32(M,28));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,4));\ - PXOR1_2(CHM,PSRLI_32(M,28));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,5));\ - PXOR1_2(CH,PSRLI_32(M,27));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,5));\ - PXOR1_2(CHM,PSRLI_32(M,27));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,6));\ - PXOR1_2(CH,PSRLI_32(M,26));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,6));\ - PXOR1_2(CHM,PSRLI_32(M,26));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,7));\ - PXOR1_2(CH,PSRLI_32(M,25));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,7));\ - PXOR1_2(CHM,PSRLI_32(M,25));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,8),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,8),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,8));\ - PXOR1_2(CH,PSRLI_32(M,24));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,8));\ - PXOR1_2(CHM,PSRLI_32(M,24));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,9),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,9),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,9));\ - PXOR1_2(CH,PSRLI_32(M,23));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,9));\ - PXOR1_2(CHM,PSRLI_32(M,23));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,10),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,10),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,10));\ - PXOR1_2(CH,PSRLI_32(M,22));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,10));\ - PXOR1_2(CHM,PSRLI_32(M,22));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,11),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,11),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,11));\ - PXOR1_2(CH,PSRLI_32(M,21));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,11));\ - PXOR1_2(CHM,PSRLI_32(M,21));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,12),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,12),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,12));\ - PXOR1_2(CH,PSRLI_32(M,20));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,12));\ - PXOR1_2(CHM,PSRLI_32(M,20));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,13),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,13),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,13));\ - PXOR1_2(CH,PSRLI_32(M,19));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,13));\ - PXOR1_2(CHM,PSRLI_32(M,19));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,14),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,14),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,14));\ - PXOR1_2(CH,PSRLI_32(M,18));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,14));\ - PXOR1_2(CHM,PSRLI_32(M,18));\ -- C1=PMASK32_ONE(PAND_(PSRLI_32(RB,15),one32),one32);\ -- M=PAND_(RA,C1);\ -+ (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,15),one32),one32);\ -+ (M)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_32(M,15));\ - PXOR1_2(CH,PSRLI_32(M,17));\ -- M=PAND_(RAM,C1);\ -+ (M)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_32(M,15));\ - PXOR1_2(CHM,PSRLI_32(M,17));\ - \ -- RAM=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\ -- CLM=PUNPACKLO_32(CL,CH);\ -- CHM=PUNPACKHI_32(CL,CH);\ -- C1=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\ -+ (RAM)=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\ -+ (CLM)=PUNPACKLO_32(CL,CH);\ -+ (CHM)=PUNPACKHI_32(CL,CH);\ -+ (C1)=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\ - PXOR1_2(C1,\ - PSLLI_128(PAND_(PXOR_(RAM,PSRLI_128(RAM,6)),PSET_64(0,MONE64)),4)); - -@@ -1397,12 +1397,12 @@ - #define PMUL64_WS_KAR_KAR32_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,CM1,CM2,one32) \ - PMUL32_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM1,one32);\ - PMUL32_WS_KAR_GF2X(M,(A0)>>32,(B0)>>32,RA,RB,CL,CH,CM1,CM2,one32);\ -- CM=PXOR_(C1,M);\ -+ (CM)=PXOR_(C1,M);\ - PXOR1_2(C1,PSLLI_128(M,8));\ - PMUL32_WS_KAR_GF2X(M,(A0)^((A0)>>32),(B0)^((B0)>>32),\ - RA,RB,CL,CH,CM1,CM2,one32);\ - \ -- M=PXOR_(CM,M);\ -+ (M)=PXOR_(CM,M);\ - PXOR1_2(C1,PSLLI_128(M,4)); - - -@@ -1411,11 +1411,11 @@ - #define PMUL64_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one32) \ - PMUL32_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32);\ - PMUL32_WS_CLAS_GF2X(M,(A0)>>32,(B0)>>32,RA,RB,CL,CH,M,one32);\ -- CM=PXOR_(C1,M);\ -+ (CM)=PXOR_(C1,M);\ - PXOR1_2(C1,PSLLI_128(M,8));\ - PMUL32_WS_CLAS_GF2X(M,(A0)^((A0)>>32),(B0)^((B0)>>32),RA,RB,CL,CH,M,one32);\ - \ -- M=PXOR_(CM,M);\ -+ (M)=PXOR_(CM,M);\ - PXOR1_2(C1,PSLLI_128(M,4)); - - -@@ -1424,223 +1424,223 @@ - /* Classical, 511 = 2+507+2 instructions */ - #define PMUL64x2_NO_UNROLLED_WS_CLAS_GF2X(C1,C2,A0,A1,B0,B1,RA,RB,CL,CH,M,\ - one64,i) \ -- RA=PSET_64(A1,A0);\ -- RB=PSET_64(B1,B0);\ -+ (RA)=PSET_64(A1,A0);\ -+ (RB)=PSET_64(B1,B0);\ - \ -- CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -- CH=PSETZERO();\ -- for(i=1;i<64;++i)\ -+ (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -+ (CH)=PSETZERO();\ -+ for((i)=1;(i)<64;++(i))\ - {\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,i));\ -- PXOR1_2(CH,PSRLI_64(M,64-i));\ -+ PXOR1_2(CH,PSRLI_64(M,64-(i)));\ - }\ -- C1=PUNPACKLO_64(CL,CH);\ -- C2=PUNPACKHI_64(CL,CH); -+ (C1)=PUNPACKLO_64(CL,CH);\ -+ (C2)=PUNPACKHI_64(CL,CH); - - - /* ~ 64*8 = 512 instructions */ - /* Classical, 509 = 2+505+2 instructions */ - #define PMUL64x2_WS_CLAS_GF2X(C1,C2,A0,A1,B0,B1,RA,RB,M,one64) \ -- RA=PSET_64(A1,A0);\ -- RB=PSET_64(B1,B0);\ -+ (RA)=PSET_64(A1,A0);\ -+ (RB)=PSET_64(B1,B0);\ - \ -- CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\ -+ (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,1));\ -- CH=PSRLI_64(M,63);\ -+ (CH)=PSRLI_64(M,63);\ - \ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,2));\ - PXOR1_2(CH,PSRLI_64(M,62));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,3));\ - PXOR1_2(CH,PSRLI_64(M,61));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,4));\ - PXOR1_2(CH,PSRLI_64(M,60));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,5));\ - PXOR1_2(CH,PSRLI_64(M,59));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,6));\ - PXOR1_2(CH,PSRLI_64(M,58));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,7));\ - PXOR1_2(CH,PSRLI_64(M,57));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,8));\ - PXOR1_2(CH,PSRLI_64(M,56));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,9));\ - PXOR1_2(CH,PSRLI_64(M,55));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,10));\ - PXOR1_2(CH,PSRLI_64(M,54));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,11));\ - PXOR1_2(CH,PSRLI_64(M,53));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,12));\ - PXOR1_2(CH,PSRLI_64(M,52));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,13));\ - PXOR1_2(CH,PSRLI_64(M,51));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,14));\ - PXOR1_2(CH,PSRLI_64(M,50));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,15));\ - PXOR1_2(CH,PSRLI_64(M,49));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,16));\ - PXOR1_2(CH,PSRLI_64(M,48));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,17));\ - PXOR1_2(CH,PSRLI_64(M,47));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,18));\ - PXOR1_2(CH,PSRLI_64(M,46));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,19));\ - PXOR1_2(CH,PSRLI_64(M,45));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,20));\ - PXOR1_2(CH,PSRLI_64(M,44));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,21));\ - PXOR1_2(CH,PSRLI_64(M,43));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,22));\ - PXOR1_2(CH,PSRLI_64(M,42));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,23));\ - PXOR1_2(CH,PSRLI_64(M,41));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,24));\ - PXOR1_2(CH,PSRLI_64(M,40));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,25));\ - PXOR1_2(CH,PSRLI_64(M,39));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,26));\ - PXOR1_2(CH,PSRLI_64(M,38));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,27));\ - PXOR1_2(CH,PSRLI_64(M,37));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,28));\ - PXOR1_2(CH,PSRLI_64(M,36));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,29));\ - PXOR1_2(CH,PSRLI_64(M,35));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,30));\ - PXOR1_2(CH,PSRLI_64(M,34));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,31));\ - PXOR1_2(CH,PSRLI_64(M,33));\ - \ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,32));\ - PXOR1_2(CH,PSRLI_64(M,32));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,33));\ - PXOR1_2(CH,PSRLI_64(M,31));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,34));\ - PXOR1_2(CH,PSRLI_64(M,30));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,35));\ - PXOR1_2(CH,PSRLI_64(M,29));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,36));\ - PXOR1_2(CH,PSRLI_64(M,28));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,37));\ - PXOR1_2(CH,PSRLI_64(M,27));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,38));\ - PXOR1_2(CH,PSRLI_64(M,26));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,39));\ - PXOR1_2(CH,PSRLI_64(M,25));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,40));\ - PXOR1_2(CH,PSRLI_64(M,24));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,41));\ - PXOR1_2(CH,PSRLI_64(M,23));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,42));\ - PXOR1_2(CH,PSRLI_64(M,22));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,43));\ - PXOR1_2(CH,PSRLI_64(M,21));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,44));\ - PXOR1_2(CH,PSRLI_64(M,20));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,45));\ - PXOR1_2(CH,PSRLI_64(M,19));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,46));\ - PXOR1_2(CH,PSRLI_64(M,18));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,47));\ - PXOR1_2(CH,PSRLI_64(M,17));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,48));\ - PXOR1_2(CH,PSRLI_64(M,16));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,49));\ - PXOR1_2(CH,PSRLI_64(M,15));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,50));\ - PXOR1_2(CH,PSRLI_64(M,14));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,51));\ - PXOR1_2(CH,PSRLI_64(M,13));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,52));\ - PXOR1_2(CH,PSRLI_64(M,12));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,53));\ - PXOR1_2(CH,PSRLI_64(M,11));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,54));\ - PXOR1_2(CH,PSRLI_64(M,10));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,55));\ - PXOR1_2(CH,PSRLI_64(M,9));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,56));\ - PXOR1_2(CH,PSRLI_64(M,8));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,57));\ - PXOR1_2(CH,PSRLI_64(M,7));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,58));\ - PXOR1_2(CH,PSRLI_64(M,6));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,59));\ - PXOR1_2(CH,PSRLI_64(M,5));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,60));\ - PXOR1_2(CH,PSRLI_64(M,4));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,61));\ - PXOR1_2(CH,PSRLI_64(M,3));\ -- M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64));\ - PXOR1_2(CL,PSLLI_64(M,62));\ - PXOR1_2(CH,PSRLI_64(M,2));\ - /* Optimization: the '&1' is removed */\ -- M=PAND_(RA,PMASK64_ONE(PSRLI_64(RB,63),one64));\ -+ (M)=PAND_(RA,PMASK64_ONE(PSRLI_64(RB,63),one64));\ - PXOR1_2(CL,PSLLI_64(M,63));\ - PXOR1_2(CH,PSRLI_64(M,1));\ - \ -- C1=PUNPACKLO_64(CL,CH);\ -- C2=PUNPACKHI_64(CL,CH);\ -+ (C1)=PUNPACKLO_64(CL,CH);\ -+ (C2)=PUNPACKHI_64(CL,CH);\ - - - /* Karatsuba, 525 = 223*2+71*1+8 instructions */ -@@ -1660,29 +1660,29 @@ - /* The fastest classical method */ - #define PMUL128_NO_UNROLLED_WS_CLAS_GF2X(C1,C2,A,B,RA,RAM,RB,CL,CH,CLM,CHM,\ - one64,i) \ -- RA=PSET_64((A)[1],(A)[0]);\ -- RAM=PSET_64((A)[0],(A)[1]);\ -- RB=PSET_64((B)[1],(B)[0]);\ --\ -- C1=PMASK64_ONE(PAND_(RB,one64),one64);\ -- CL=PAND_(RA,C1);\ -- CH=PSETZERO();\ -- CLM=PAND_(RAM,C1);\ -- CHM=PSETZERO();\ -- for(i=1;i<64;++i)\ -+ (RA)=PSET_64((A)[1],(A)[0]);\ -+ (RAM)=PSET_64((A)[0],(A)[1]);\ -+ (RB)=PSET_64((B)[1],(B)[0]);\ -+\ -+ (C1)=PMASK64_ONE(PAND_(RB,one64),one64);\ -+ (CL)=PAND_(RA,C1);\ -+ (CH)=PSETZERO();\ -+ (CLM)=PAND_(RAM,C1);\ -+ (CHM)=PSETZERO();\ -+ for((i)=1;(i)<64;++(i))\ - {\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,i));\ -- PXOR1_2(CH,PSRLI_64(C2,64-i));\ -- C2=PAND_(RAM,C1);\ -+ PXOR1_2(CH,PSRLI_64(C2,64-(i)));\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,i));\ -- PXOR1_2(CHM,PSRLI_64(C2,64-i));\ -+ PXOR1_2(CHM,PSRLI_64(C2,64-(i)));\ - }\ -- C1=PUNPACKLO_64(CL,CH);\ -- C2=PUNPACKHI_64(CL,CH);\ -+ (C1)=PUNPACKLO_64(CL,CH);\ -+ (C2)=PUNPACKHI_64(CL,CH);\ - \ -- RAM=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\ -+ (RAM)=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\ - PXOR1_2(C1,PLSHIFT64(RAM));\ - PXOR1_2(C2,PRSHIFT64(RAM)); - -@@ -1690,462 +1690,462 @@ - /* ~ 64*13 = 832 instructions */ - /* Classical, 832 = 3+820+9 instructions */ - #define PMUL128_WS_CLAS_GF2X(C1,C2,A,B,RA,RAM,RB,CL,CH,CLM,CHM,one64) \ -- RA=PSET_64((A)[1],(A)[0]);\ -- RAM=PSET_64((A)[0],(A)[1]);\ -- RB=PSET_64((B)[1],(B)[0]);\ --\ -- C1=PMASK64_ONE(PAND_(RB,one64),one64);\ -- CL=PAND_(RA,C1);\ -- CLM=PAND_(RAM,C1);\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (RA)=PSET_64((A)[1],(A)[0]);\ -+ (RAM)=PSET_64((A)[0],(A)[1]);\ -+ (RB)=PSET_64((B)[1],(B)[0]);\ -+\ -+ (C1)=PMASK64_ONE(PAND_(RB,one64),one64);\ -+ (CL)=PAND_(RA,C1);\ -+ (CLM)=PAND_(RAM,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,1));\ -- CH=PSRLI_64(C2,63);\ -- C2=PAND_(RAM,C1);\ -+ (CH)=PSRLI_64(C2,63);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,1));\ -- CHM=PSRLI_64(C2,63);\ -+ (CHM)=PSRLI_64(C2,63);\ - \ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,2));\ - PXOR1_2(CH,PSRLI_64(C2,62));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,2));\ - PXOR1_2(CHM,PSRLI_64(C2,62));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,3));\ - PXOR1_2(CH,PSRLI_64(C2,61));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,3));\ - PXOR1_2(CHM,PSRLI_64(C2,61));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,4));\ - PXOR1_2(CH,PSRLI_64(C2,60));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,4));\ - PXOR1_2(CHM,PSRLI_64(C2,60));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,5));\ - PXOR1_2(CH,PSRLI_64(C2,59));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,5));\ - PXOR1_2(CHM,PSRLI_64(C2,59));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,6));\ - PXOR1_2(CH,PSRLI_64(C2,58));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,6));\ - PXOR1_2(CHM,PSRLI_64(C2,58));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,7));\ - PXOR1_2(CH,PSRLI_64(C2,57));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,7));\ - PXOR1_2(CHM,PSRLI_64(C2,57));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,8));\ - PXOR1_2(CH,PSRLI_64(C2,56));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,8));\ - PXOR1_2(CHM,PSRLI_64(C2,56));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,9));\ - PXOR1_2(CH,PSRLI_64(C2,55));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,9));\ - PXOR1_2(CHM,PSRLI_64(C2,55));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,10));\ - PXOR1_2(CH,PSRLI_64(C2,54));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,10));\ - PXOR1_2(CHM,PSRLI_64(C2,54));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,11));\ - PXOR1_2(CH,PSRLI_64(C2,53));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,11));\ - PXOR1_2(CHM,PSRLI_64(C2,53));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,12));\ - PXOR1_2(CH,PSRLI_64(C2,52));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,12));\ - PXOR1_2(CHM,PSRLI_64(C2,52));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,13));\ - PXOR1_2(CH,PSRLI_64(C2,51));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,13));\ - PXOR1_2(CHM,PSRLI_64(C2,51));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,14));\ - PXOR1_2(CH,PSRLI_64(C2,50));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,14));\ - PXOR1_2(CHM,PSRLI_64(C2,50));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,15));\ - PXOR1_2(CH,PSRLI_64(C2,49));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,15));\ - PXOR1_2(CHM,PSRLI_64(C2,49));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,16));\ - PXOR1_2(CH,PSRLI_64(C2,48));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,16));\ - PXOR1_2(CHM,PSRLI_64(C2,48));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,17));\ - PXOR1_2(CH,PSRLI_64(C2,47));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,17));\ - PXOR1_2(CHM,PSRLI_64(C2,47));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,18));\ - PXOR1_2(CH,PSRLI_64(C2,46));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,18));\ - PXOR1_2(CHM,PSRLI_64(C2,46));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,19));\ - PXOR1_2(CH,PSRLI_64(C2,45));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,19));\ - PXOR1_2(CHM,PSRLI_64(C2,45));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,20));\ - PXOR1_2(CH,PSRLI_64(C2,44));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,20));\ - PXOR1_2(CHM,PSRLI_64(C2,44));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,21));\ - PXOR1_2(CH,PSRLI_64(C2,43));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,21));\ - PXOR1_2(CHM,PSRLI_64(C2,43));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,22));\ - PXOR1_2(CH,PSRLI_64(C2,42));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,22));\ - PXOR1_2(CHM,PSRLI_64(C2,42));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,23));\ - PXOR1_2(CH,PSRLI_64(C2,41));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,23));\ - PXOR1_2(CHM,PSRLI_64(C2,41));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,24));\ - PXOR1_2(CH,PSRLI_64(C2,40));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,24));\ - PXOR1_2(CHM,PSRLI_64(C2,40));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,25));\ - PXOR1_2(CH,PSRLI_64(C2,39));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,25));\ - PXOR1_2(CHM,PSRLI_64(C2,39));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,26));\ - PXOR1_2(CH,PSRLI_64(C2,38));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,26));\ - PXOR1_2(CHM,PSRLI_64(C2,38));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,27));\ - PXOR1_2(CH,PSRLI_64(C2,37));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,27));\ - PXOR1_2(CHM,PSRLI_64(C2,37));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,28));\ - PXOR1_2(CH,PSRLI_64(C2,36));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,28));\ - PXOR1_2(CHM,PSRLI_64(C2,36));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,29));\ - PXOR1_2(CH,PSRLI_64(C2,35));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,29));\ - PXOR1_2(CHM,PSRLI_64(C2,35));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,30));\ - PXOR1_2(CH,PSRLI_64(C2,34));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,30));\ - PXOR1_2(CHM,PSRLI_64(C2,34));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,31));\ - PXOR1_2(CH,PSRLI_64(C2,33));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,31));\ - PXOR1_2(CHM,PSRLI_64(C2,33));\ - \ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,32));\ - PXOR1_2(CH,PSRLI_64(C2,32));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,32));\ - PXOR1_2(CHM,PSRLI_64(C2,32));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,33));\ - PXOR1_2(CH,PSRLI_64(C2,31));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,33));\ - PXOR1_2(CHM,PSRLI_64(C2,31));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,34));\ - PXOR1_2(CH,PSRLI_64(C2,30));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,34));\ - PXOR1_2(CHM,PSRLI_64(C2,30));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,35));\ - PXOR1_2(CH,PSRLI_64(C2,29));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,35));\ - PXOR1_2(CHM,PSRLI_64(C2,29));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,36));\ - PXOR1_2(CH,PSRLI_64(C2,28));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,36));\ - PXOR1_2(CHM,PSRLI_64(C2,28));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,37));\ - PXOR1_2(CH,PSRLI_64(C2,27));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,37));\ - PXOR1_2(CHM,PSRLI_64(C2,27));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,38));\ - PXOR1_2(CH,PSRLI_64(C2,26));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,38));\ - PXOR1_2(CHM,PSRLI_64(C2,26));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,39));\ - PXOR1_2(CH,PSRLI_64(C2,25));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,39));\ - PXOR1_2(CHM,PSRLI_64(C2,25));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,40));\ - PXOR1_2(CH,PSRLI_64(C2,24));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,40));\ - PXOR1_2(CHM,PSRLI_64(C2,24));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,41));\ - PXOR1_2(CH,PSRLI_64(C2,23));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,41));\ - PXOR1_2(CHM,PSRLI_64(C2,23));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,42));\ - PXOR1_2(CH,PSRLI_64(C2,22));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,42));\ - PXOR1_2(CHM,PSRLI_64(C2,22));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,43));\ - PXOR1_2(CH,PSRLI_64(C2,21));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,43));\ - PXOR1_2(CHM,PSRLI_64(C2,21));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,44));\ - PXOR1_2(CH,PSRLI_64(C2,20));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,44));\ - PXOR1_2(CHM,PSRLI_64(C2,20));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,45));\ - PXOR1_2(CH,PSRLI_64(C2,19));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,45));\ - PXOR1_2(CHM,PSRLI_64(C2,19));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,46));\ - PXOR1_2(CH,PSRLI_64(C2,18));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,46));\ - PXOR1_2(CHM,PSRLI_64(C2,18));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,47));\ - PXOR1_2(CH,PSRLI_64(C2,17));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,47));\ - PXOR1_2(CHM,PSRLI_64(C2,17));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,48));\ - PXOR1_2(CH,PSRLI_64(C2,16));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,48));\ - PXOR1_2(CHM,PSRLI_64(C2,16));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,49));\ - PXOR1_2(CH,PSRLI_64(C2,15));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,49));\ - PXOR1_2(CHM,PSRLI_64(C2,15));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,50));\ - PXOR1_2(CH,PSRLI_64(C2,14));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,50));\ - PXOR1_2(CHM,PSRLI_64(C2,14));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,51));\ - PXOR1_2(CH,PSRLI_64(C2,13));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,51));\ - PXOR1_2(CHM,PSRLI_64(C2,13));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,52));\ - PXOR1_2(CH,PSRLI_64(C2,12));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,52));\ - PXOR1_2(CHM,PSRLI_64(C2,12));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,53));\ - PXOR1_2(CH,PSRLI_64(C2,11));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,53));\ - PXOR1_2(CHM,PSRLI_64(C2,11));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,54));\ - PXOR1_2(CH,PSRLI_64(C2,10));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,54));\ - PXOR1_2(CHM,PSRLI_64(C2,10));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,55));\ - PXOR1_2(CH,PSRLI_64(C2,9));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,55));\ - PXOR1_2(CHM,PSRLI_64(C2,9));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,56));\ - PXOR1_2(CH,PSRLI_64(C2,8));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,56));\ - PXOR1_2(CHM,PSRLI_64(C2,8));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,57));\ - PXOR1_2(CH,PSRLI_64(C2,7));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,57));\ - PXOR1_2(CHM,PSRLI_64(C2,7));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,58));\ - PXOR1_2(CH,PSRLI_64(C2,6));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,58));\ - PXOR1_2(CHM,PSRLI_64(C2,6));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,59));\ - PXOR1_2(CH,PSRLI_64(C2,5));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,59));\ - PXOR1_2(CHM,PSRLI_64(C2,5));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,60));\ - PXOR1_2(CH,PSRLI_64(C2,4));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,60));\ - PXOR1_2(CHM,PSRLI_64(C2,4));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,61));\ - PXOR1_2(CH,PSRLI_64(C2,3));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,61));\ - PXOR1_2(CHM,PSRLI_64(C2,3));\ -- C1=PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,62));\ - PXOR1_2(CH,PSRLI_64(C2,2));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,62));\ - PXOR1_2(CHM,PSRLI_64(C2,2));\ - /* Optimization: the '&1' is removed */\ -- C1=PMASK64_ONE(PSRLI_64(RB,63),one64);\ -- C2=PAND_(RA,C1);\ -+ (C1)=PMASK64_ONE(PSRLI_64(RB,63),one64);\ -+ (C2)=PAND_(RA,C1);\ - PXOR1_2(CL,PSLLI_64(C2,63));\ - PXOR1_2(CH,PSRLI_64(C2,1));\ -- C2=PAND_(RAM,C1);\ -+ (C2)=PAND_(RAM,C1);\ - PXOR1_2(CLM,PSLLI_64(C2,63));\ - PXOR1_2(CHM,PSRLI_64(C2,1));\ - \ -- C1=PUNPACKLO_64(CL,CH);\ -- C2=PUNPACKHI_64(CL,CH);\ -+ (C1)=PUNPACKLO_64(CL,CH);\ -+ (C2)=PUNPACKHI_64(CL,CH);\ - \ -- RAM=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\ -+ (RAM)=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\ - PXOR1_2(C1,PLSHIFT64(RAM));\ - PXOR1_2(C2,PRSHIFT64(RAM)); - -@@ -2197,9 +2197,9 @@ - /* A2*B2 */\ - PMUL32_WS_CLAS_GF2X(z3,(A)[2],(B)[2],RA,RB,CL,CH,M,one32);\ - \ -- res1=PXOR_(z1,z2);\ -- res2=PXOR_(z3,z2);\ -- z2=PXOR_(res1,z3);\ -+ (res1)=PXOR_(z1,z2);\ -+ (res2)=PXOR_(z3,z2);\ -+ (z2)=PXOR_(res1,z3);\ - /* C[0] = C0 - C[1] = C1^(C0^C2) - C[2] = C2^(C1^C3)^C0^C4 -@@ -2233,9 +2233,9 @@ - /* A2*B2 */\ - PMUL64_WS_KAR_GF2X(z3,(A)[2],(B)[2],RA,RB,CL,CH,M,CM,one32);\ - \ -- res1=PXOR_(z1,z2);\ -- res2=PXOR_(z3,z2);\ -- z2=PXOR_(res1,z3);\ -+ (res1)=PXOR_(z1,z2);\ -+ (res2)=PXOR_(z3,z2);\ -+ (z2)=PXOR_(res1,z3);\ - /* C[0] = C0 - C[1] = C1^(C0^C2) - C[2] = C2^(C1^C3)^C0^C4 -@@ -2287,7 +2287,7 @@ - PMUL128_WS_KAR_GF2X(C3,C4,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,one32);\ - \ - PXOR1_2(C3,C2);\ -- C2=PXOR_(C3,C1);\ -+ (C2)=PXOR_(C3,C1);\ - PXOR1_2(C3,C4);\ - \ - RESERVED_BUF4[0]=(A)[0]^(A)[2];\ -@@ -2309,7 +2309,7 @@ - PMUL160_WS_KAR6_GF2X(C3,C4,C5,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,\ - res1,res2,one32)\ - PXOR1_2(C3,C2);\ -- C2=PXOR_(C3,C1);\ -+ (C2)=PXOR_(C3,C1);\ - PXOR1_2(C3,C4);\ - PXOR1_2(C4,C5);\ - \ -@@ -2335,7 +2335,7 @@ - PMUL192_WS_KAR6_GF2X(C3,C4,C5,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,\ - res1,res2,one32)\ - PXOR1_2(C3,C2);\ -- C2=PXOR_(C3,C1);\ -+ (C2)=PXOR_(C3,C1);\ - PXOR1_2(C3,C4);\ - PXOR1_2(C4,C5);\ - \ -@@ -2418,11 +2418,11 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\ - PMUL160_WS_KAR6_GF2X(C5,C6,C7,(A)+4,(B)+4,RA,RB,CL,CH,M,M2,CM,\ - CM1,CM2,one32)\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ - \ - RESERVED_BUF8[0]=(A)[0]^(A)[4];\ - RESERVED_BUF8[1]=(A)[1]^(A)[5];\ -@@ -2481,11 +2481,11 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\ - PMUL192_WS_KAR6_GF2X(C5,C6,C7,(A)+4,(B)+4,RA,RB,CL,CH,M,M2,CM,\ - CM1,CM2,one32)\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ - \ - RESERVED_BUF8[0]=(A)[0]^(A)[4];\ - RESERVED_BUF8[1]=(A)[1]^(A)[5];\ -@@ -2511,12 +2511,12 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32);\ - PMUL224_WS_KAR_GF2X(C5,C6,C7,C8,(A)+4,(B)+4,\ - RA,RB,CL,CH,M,MM,CM,M3,M4,one32);\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -- C6=PXOR_(C6,C8);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ -+ (C6)=PXOR_(C6,C8);\ - \ - RESERVED_BUF8[0]=(A)[0]^(A)[4];\ - RESERVED_BUF8[1]=(A)[1]^(A)[5];\ -@@ -2542,12 +2542,12 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32);\ - PMUL256_WS_KAR_GF2X(C5,C6,C7,C8,(A)+4,(B)+4,\ - RA,RB,CL,CH,M,MM,CM,M3,M4,one32);\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -- C6=PXOR_(C6,C8);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ -+ (C6)=PXOR_(C6,C8);\ - \ - RESERVED_BUF8[0]=(A)[0]^(A)[4];\ - RESERVED_BUF8[1]=(A)[1]^(A)[5];\ -@@ -2573,13 +2573,13 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\ - PMUL288_WS_KAR_GF2X(C5,C6,C7,C8,C9,(A)+4,(B)+4,RA,RB,CL,CH,M,MM,CM,\ - M1,M2,M3,res1,res2,one32)\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -- C6=PXOR_(C6,C8);\ -- C7=PXOR_(C7,C9);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ -+ (C6)=PXOR_(C6,C8);\ -+ (C7)=PXOR_(C7,C9);\ - \ - RESERVED_BUF10[0]=(A)[0]^(A)[4];\ - RESERVED_BUF10[1]=(A)[1]^(A)[5];\ -@@ -2608,13 +2608,13 @@ - PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\ - PMUL320_WS_KAR_GF2X(C5,C6,C7,C8,C9,(A)+4,(B)+4,RA,RB,CL,CH,M,MM,CM,\ - M1,M2,M3,res1,res2,one32)\ -- C5=PXOR_(C3,C5);\ -- C6=PXOR_(C4,C6);\ -- C3=PXOR_(C5,C1);\ -- C4=PXOR_(C6,C2);\ -- C5=PXOR_(C5,C7);\ -- C6=PXOR_(C6,C8);\ -- C7=PXOR_(C7,C9);\ -+ (C5)=PXOR_(C3,C5);\ -+ (C6)=PXOR_(C4,C6);\ -+ (C3)=PXOR_(C5,C1);\ -+ (C4)=PXOR_(C6,C2);\ -+ (C5)=PXOR_(C5,C7);\ -+ (C6)=PXOR_(C6,C8);\ -+ (C7)=PXOR_(C7,C9);\ - \ - RESERVED_BUF10[0]=(A)[0]^(A)[4];\ - RESERVED_BUF10[1]=(A)[1]^(A)[5];\ -@@ -2729,62 +2729,62 @@ - /* Classical: 4 mul64, 5 other instructions */ - #define PCLMUL128_WS_CLAS_GF2X(z1,z2,x,y,sum,res_low,res_high) \ - /* X^0 */\ -- res_low=PCLMUL(x,y,0);\ -+ (res_low)=PCLMUL(x,y,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x, y, 1);\ -- z2=PCLMUL(x, y, 0x10);\ -- res_high=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x, y, 1);\ -+ (z2)=PCLMUL(x, y, 0x10);\ -+ (res_high)=PXOR_(z1,z2);\ - \ - /* mid2_low: x^64 ... x^127 */\ -- sum=PLSHIFT64(res_high);\ -+ (sum)=PLSHIFT64(res_high);\ - /* mid2_low + L */\ -- z1=PXOR_(res_low,sum);\ -+ (z1)=PXOR_(res_low,sum);\ - \ - /* X^128 */\ -- res_low=PCLMUL(x,y,0x11);\ -+ (res_low)=PCLMUL(x,y,0x11);\ - \ - /* mid2_high: x^128 ... x^191 */\ -- sum=PRSHIFT64(res_high);\ -+ (sum)=PRSHIFT64(res_high);\ - /* mid2_high + H */\ -- z2=PXOR_(res_low,sum); -+ (z2)=PXOR_(res_low,sum); - - - /* Classical: 4 mul64, 7 other instructions */ - #define PCLMUL128_ADD_CLAS_GF2X(z3,z4,z1,z2,x,y,sum,res_low,res_high) \ - /* X^0 */\ -- res_low=PCLMUL(x,y,0);\ -+ (res_low)=PCLMUL(x,y,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x, y, 1);\ -- z2=PCLMUL(x, y, 0x10);\ -- res_high=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x, y, 1);\ -+ (z2)=PCLMUL(x, y, 0x10);\ -+ (res_high)=PXOR_(z1,z2);\ - \ - /* mid2_low: x^64 ... x^127 */\ -- sum=PLSHIFT64(res_high);\ -+ (sum)=PLSHIFT64(res_high);\ - /* mid2_low + L */\ -- z3^=PXOR_(res_low,sum);\ -+ (z3)^=PXOR_(res_low,sum);\ - \ - /* X^128 */\ -- res_low=PCLMUL(x,y,0x11);\ -+ (res_low)=PCLMUL(x,y,0x11);\ - \ - /* mid2_high: x^128 ... x^191 */\ -- sum=PRSHIFT64(res_high);\ -+ (sum)=PRSHIFT64(res_high);\ - /* mid2_high + H */\ -- z4^=PXOR_(res_low,sum); -+ (z4)^=PXOR_(res_low,sum); - - - /* Karatsuba: 3 mul64, 10 other instructions */ - #define PCLMUL128_WS_KAR_GF2X(z1,z2,x,y,sum,res_low,res_high) \ - /* X^0 */\ -- z1=PCLMUL(x,y,0);\ -+ (z1)=PCLMUL(x,y,0);\ - /* X^128 */\ -- z2=PCLMUL(x,y,0x11);\ -+ (z2)=PCLMUL(x,y,0x11);\ - \ -- res_low=PXOR_(x,PRSHIFT64(x));\ -- res_high=PXOR_(y,PRSHIFT64(y));\ -+ (res_low)=PXOR_(x,PRSHIFT64(x));\ -+ (res_high)=PXOR_(y,PRSHIFT64(y));\ - \ -- sum=PCLMUL(res_low,res_high,0);\ -+ (sum)=PCLMUL(res_low,res_high,0);\ - PXOR1_2(sum,z1);\ - PXOR1_2(sum,z2);\ - \ -@@ -2795,14 +2795,14 @@ - /* Karatsuba: 3 mul64, 12 other instructions */ - #define PCLMUL128_ADD_KAR_GF2X(z3,z4,z1,z2,x,y,sum,res_low,res_high) \ - /* X^0 */\ -- z1=PCLMUL(x,y,0);\ -+ (z1)=PCLMUL(x,y,0);\ - /* X^128 */\ -- z2=PCLMUL(x,y,0x11);\ -+ (z2)=PCLMUL(x,y,0x11);\ - \ -- res_low=PXOR_(x,PRSHIFT64(x));\ -- res_high=PXOR_(y,PRSHIFT64(y));\ -+ (res_low)=PXOR_(x,PRSHIFT64(x));\ -+ (res_high)=PXOR_(y,PRSHIFT64(y));\ - \ -- sum=PCLMUL(res_low,res_high,0);\ -+ (sum)=PCLMUL(res_low,res_high,0);\ - PXOR1_2(sum,z1);\ - PXOR1_2(sum,z2);\ - \ -@@ -2815,50 +2815,50 @@ - /* Classical: 9 mul64, 1 PMIDDLE, 9 other instructions */ - #define PCLMUL192_WS_CLAS_GF2X(z3,z1,z2,x1,x2,y1,y2,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PCLMUL(x1, y2, 1);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PCLMUL(x1, y2, 1);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- res1=PCLMUL(x2,y2,0);\ -- res2=PRSHIFT64(sum);\ -- z2=PXOR_(res1,res2); -+ (res1)=PCLMUL(x2,y2,0);\ -+ (res2)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,res2); - - - /* Karatsuba: 6 mul64, 1 PMIDDLE, 19 other instructions */ - #define PCLMUL192_WS_KAR_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2) \ - {__m128i u31;\ - /* A0*B0 */\ -- z1=PCLMUL(x1,y1,0);\ -+ (z1)=PCLMUL(x1,y1,0);\ - /* A1*B1 */\ -- z2=PCLMUL(x1,y1,0x11);\ -+ (z2)=PCLMUL(x1,y1,0x11);\ - /* A2*B2 */\ -- z3=PCLMUL(x2,y2,0);\ -+ (z3)=PCLMUL(x2,y2,0);\ - \ -- res1=PXOR_(z1,z2);\ -- res2=PXOR_(z3,z2);\ -- z2=PXOR_(res1,z3);\ -+ (res1)=PXOR_(z1,z2);\ -+ (res2)=PXOR_(z3,z2);\ -+ (z2)=PXOR_(res1,z3);\ - /* C[0] = C0 - C[1] = C1^(C0^C2) - C[2] = C2^(C1^C3)^C0^C4 -@@ -2866,9 +2866,9 @@ - C[4] = C4^(C5^C3) - C[5] = C5 */\ - /* (A2 A2) */\ -- u31=PSHUFFLE_32_1010(x2);\ -+ (u31)=PSHUFFLE_32_1010(x2);\ - /* (B2 B2) */\ -- sum=PSHUFFLE_32_1010(y2);\ -+ (sum)=PSHUFFLE_32_1010(y2);\ - /* (A2 A2) ^ (A0 A1) */\ - PXOR1_2(u31,x1);\ - /* (B2 B2) ^ (B0 B1) */\ -@@ -2890,13 +2890,13 @@ - #define PCLMUL192_WS_KAR256_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2) \ - {__m128i x,y,u31,u32;\ - PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\ -- z3=PCLMUL(x2,y2,0);\ -+ (z3)=PCLMUL(x2,y2,0);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(z3,z2);\ -- z2=PXOR_(z3,z1);\ -+ (z2)=PXOR_(z3,z1);\ - \ - PCLMUL128_ADD_GF2X(z2,z3,u31,u32,x,y,sum,res1,res2);} - -@@ -2904,108 +2904,108 @@ - /* Classical: 16 mul64, 2 PMIDDLE, 15 other instructions */ - #define PCLMUL256_WS_CLAS_GF2X(z3,z4,z1,z2,x1,x2,y1,y2,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(y1, x2, 0x11);\ -- z2=PCLMUL(y2, x2, 0);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0x11);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x11);\ -+ (z2)=PCLMUL(y2, x2, 0);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0x11);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^320 */\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- res1=PCLMUL(x2,y2,0x11);\ -- sum=PRSHIFT64(res2);\ -- z2=PXOR_(res1,sum); -+ (res1)=PCLMUL(x2,y2,0x11);\ -+ (sum)=PRSHIFT64(res2);\ -+ (z2)=PXOR_(res1,sum); - - - /* Classical: 16 mul64, 2 PMIDDLE, 19 other instructions */ - /* xor the res to z3,z4,z5,z6 */ - #define PCLMUL256_ADD_CLAS_GF2X(z3,z4,z5,z6,z1,z2,x1,x2,y1,y2,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3^=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)^=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4^=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)^=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(y1, x2, 0x11);\ -- z2=PCLMUL(y2, x2, 0);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0x11);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x11);\ -+ (z2)=PCLMUL(y2, x2, 0);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0x11);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^320 */\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z5^=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z5)^=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- res1=PCLMUL(x2,y2,0x11);\ -- sum=PRSHIFT64(res2);\ -- z6^=PXOR_(res1,sum); -+ (res1)=PCLMUL(x2,y2,0x11);\ -+ (sum)=PRSHIFT64(res2);\ -+ (z6)^=PXOR_(res1,sum); - - - /* Karatsuba: 3 mul128, 5 other instructions */ -@@ -3014,11 +3014,11 @@ - PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\ - PCLMUL128_WS_GF2X(z3,z4,x2,y2,sum,res1,res2);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(z3,z2);\ -- z2=PXOR_(z3,z1);\ -+ (z2)=PXOR_(z3,z1);\ - PXOR1_2(z3,z4);\ - \ - PCLMUL128_ADD_GF2X(z2,z3,u41,u42,x,y,sum,res1,res2);} -@@ -3030,8 +3030,8 @@ - PCLMUL128_WS_GF2X(u41,u42,x1,y1,sum,res1,res2);\ - PCLMUL128_WS_GF2X(z5,z6,x2,y2,sum,res1,res2);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(z1,u41);\ - PXOR1_2(z4,z6);\ -@@ -3049,11 +3049,11 @@ - PCLMUL128_WS_KAR_GF2X(z1,z2,x1,y1,sum,res1,res2);\ - PCLMUL128_WS_KAR_GF2X(z3,z4,x2,y2,sum,res1,res2);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(z3,z2);\ -- z2=PXOR_(z3,z1);\ -+ (z2)=PXOR_(z3,z1);\ - PXOR1_2(z3,z4);\ - \ - PCLMUL128_ADD_KAR_GF2X(z2,z3,u41,u42,x,y,sum,res1,res2);} -@@ -3065,8 +3065,8 @@ - PCLMUL128_WS_KAR_GF2X(u41,u42,x1,y1,sum,res1,res2);\ - PCLMUL128_WS_KAR_GF2X(z5,z6,x2,y2,sum,res1,res2);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(z1,u41);\ - PXOR1_2(z4,z6);\ -@@ -3081,77 +3081,77 @@ - /* Classical: 25 mul64, 3 PMIDDLE, 23 other instructions */ - #define PCLMUL320_WS_CLAS_GF2X(z3,z4,z5,z1,z2,x1,x2,x3,y1,y2,y3,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y3, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z5=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z5)=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- z1=PCLMUL(x2, y3, 0);\ -- z2=PCLMUL(x2, y2, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y2, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x2, y3, 0);\ -+ (z2)=PCLMUL(x2, y2, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x2, y3, 1);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y3, 1);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ - \ - /* X^512 */\ -- res1=PCLMUL(x3,y3,0);\ -- res2=PRSHIFT64(sum);\ -- z2=PXOR_(res1,res2); -+ (res1)=PCLMUL(x3,y3,0);\ -+ (res2)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,res2); - - - /* Classical: 25 mul64, 3 PMIDDLE, 28 other instructions */ -@@ -3159,77 +3159,77 @@ - #define PCLMUL320_ADD_CLAS_GF2X(z3,z4,z5,z6,z7,z1,z2,x1,x2,x3,y1,y2,y3,\ - sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3^=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)^=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4^=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)^=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y3, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z5^=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z5)^=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- z1=PCLMUL(x2, y3, 0);\ -- z2=PCLMUL(x2, y2, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y2, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x2, y3, 0);\ -+ (z2)=PCLMUL(x2, y2, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x2, y3, 1);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y3, 1);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z6^=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z6)^=PXOR_(res1,z2);\ - \ - /* X^512 */\ -- res1=PCLMUL(x3,y3,0);\ -- res2=PRSHIFT64(sum);\ -- z7^=PXOR_(res1,res2); -+ (res1)=PCLMUL(x3,y3,0);\ -+ (res2)=PRSHIFT64(sum);\ -+ (z7)^=PXOR_(res1,res2); - - - /* Karatsuba: 2 mul192, 1 mul128, 9 other instructions */ -@@ -3238,11 +3238,11 @@ - PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\ - PCLMUL192_WS_GF2X(z3,z4,z5,x2,x3,y2,y3,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x2);\ -- y1m=PXOR_(y1,y2);\ -+ (x1m)=PXOR_(x1,x2);\ -+ (y1m)=PXOR_(y1,y2);\ - \ - PXOR1_2(z3,z2);\ -- z2=PXOR_(z3,z1);\ -+ (z2)=PXOR_(z3,z1);\ - PXOR1_2(z3,z4);\ - PXOR1_2(z4,z5);\ - \ -@@ -3261,11 +3261,11 @@ - PCLMUL128_WS_GF2X(z6,z7,x1,y1,sum,res1,res2);\ - PCLMUL192_WS_GF2X(R1,R2,R3,x2,x3,y2,y3,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x2);\ -- y1m=PXOR_(y1,y2);\ -+ (x1m)=PXOR_(x1,x2);\ -+ (y1m)=PXOR_(y1,y2);\ - \ - PXOR1_2(R1,z7);\ -- z7=PXOR_(R1,z6);\ -+ (z7)=PXOR_(R1,z6);\ - PXOR1_2(R1,R2);\ - PXOR1_2(R2,R3);\ - \ -@@ -3286,104 +3286,104 @@ - #define PCLMUL384_WS_CLAS_GF2X(z3,z4,z5,z6,z1,z2,x1,x2,x3,y1,y2,y3,\ - sum,res1,res2)\ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 0x10);\ -- z2=PCLMUL(x1, y3, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y2, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 1);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y3, 0x10);\ -+ (z2)=PCLMUL(x1, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 1);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z5=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z5)=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- z1=PCLMUL(x1, y3, 0x11);\ -- z2=PCLMUL(x2, y3, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x11);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x3, y2, 0);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x3, y1, 0x11);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x1, y3, 0x11);\ -+ (z2)=PCLMUL(x2, y3, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x11);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x3, y2, 0);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x11);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x2, y3, 0x10);\ -- z2=PCLMUL(x2, y3, 1);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x3, y2, 1);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y3, 0x10);\ -+ (z2)=PCLMUL(x2, y3, 1);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x3, y2, 1);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z6=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z6)=PXOR_(res1,z2);\ - \ - /* X^512 */\ -- z1=PCLMUL(x2, y3, 0x11);\ -- z2=PCLMUL(x3, y3, 0);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y2, 0x11);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y3, 0x11);\ -+ (z2)=PCLMUL(x3, y3, 0);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0x11);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^576 */\ -- z1=PCLMUL(x3, y3, 0x10);\ -- z2=PCLMUL(x3, y3, 1);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y3, 0x10);\ -+ (z2)=PCLMUL(x3, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ - \ - /* X^640 */\ -- res1=PCLMUL(x3,y3,0x11);\ -- sum=PRSHIFT64(res2);\ -- z2=PXOR_(res1,sum); -+ (res1)=PCLMUL(x3,y3,0x11);\ -+ (sum)=PRSHIFT64(res2);\ -+ (z2)=PXOR_(res1,sum); - - - /* Karatsuba: 3 mul192, 4 PMIDDLE, 18 other instructions */ -@@ -3392,10 +3392,10 @@ - {__m128i x1m,x2m,y1m,y2m,R1,R2,R3;\ - PCLMUL192_WS_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2);\ - \ -- x1m=PMIDDLE(x2,x3);\ -- x2m=PRSHIFT64(x3);\ -- y1m=PMIDDLE(y2,y3);\ -- y2m=PRSHIFT64(y3);\ -+ (x1m)=PMIDDLE(x2,x3);\ -+ (x2m)=PRSHIFT64(x3);\ -+ (y1m)=PMIDDLE(y2,y3);\ -+ (y2m)=PRSHIFT64(y3);\ - \ - PCLMUL192_WS_GF2X(z4,z5,z6,x1m,x2m,y1m,y2m,sum,res1,res2);\ - \ -@@ -3419,135 +3419,135 @@ - #define PCLMUL448_WS_CLAS_GF2X(z3,z4,z5,z6,z7,z1,z2,x1,x2,x3,x4,y1,y2,y3,y4,\ - sum,res1,res2)\ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z3=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z3)=PXOR_(res1,sum);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z4=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z4)=PXOR_(res1,z2);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 0x10);\ -- z2=PCLMUL(x1, y3, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y2, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 1);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y3, 0x10);\ -+ (z2)=PCLMUL(x1, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 1);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z5=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z5)=PXOR_(res1,z2);\ - \ - /* X^384 */\ -- z1=PCLMUL(x1, y4, 0);\ -- z2=PCLMUL(x1, y3, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y3, 0);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x2, y2, 0x11);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x3, y2, 0);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x3, y1, 0x11);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x4, y1, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x1, y4, 0);\ -+ (z2)=PCLMUL(x1, y3, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y3, 0);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x2, y2, 0x11);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x3, y1, 0x11);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x4, y1, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x1, y4, 1);\ -- z2=PCLMUL(x2, y3, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x2, y3, 1);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x3, y2, 1);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x4, y1, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y4, 1);\ -+ (z2)=PCLMUL(x2, y3, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x2, y3, 1);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x3, y2, 1);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x4, y1, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z6=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z6)=PXOR_(res1,z2);\ - \ - /* X^512 */\ -- z1=PCLMUL(x2, y4, 0);\ -- z2=PCLMUL(x2, y3, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y3, 0);\ -- res1=PXOR_(res2,z1);\ -- z2=PCLMUL(x3, y2, 0x11);\ -- res2=PXOR_(res1,z2);\ -- z1=PCLMUL(x4, y2, 0);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y4, 0);\ -+ (z2)=PCLMUL(x2, y3, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y3, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z2)=PCLMUL(x3, y2, 0x11);\ -+ (res2)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x4, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^576 */\ -- z1=PCLMUL(x2, y4, 1);\ -- z2=PCLMUL(x3, y3, 0x10);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y3, 1);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x4, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y4, 1);\ -+ (z2)=PCLMUL(x3, y3, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y3, 1);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x4, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(sum,res2);\ -- z7=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z7)=PXOR_(res1,z2);\ - \ - /* X^640 */\ -- z1=PCLMUL(x3, y4, 0);\ -- z2=PCLMUL(x3, y3, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x4, y3, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x3, y4, 0);\ -+ (z2)=PCLMUL(x3, y3, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x4, y3, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^704 */\ -- z1=PCLMUL(x3, y4, 1);\ -- z2=PCLMUL(x4, y3, 0x10);\ -- sum=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y4, 1);\ -+ (z2)=PCLMUL(x4, y3, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ - \ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ - \ - /* X^768 */\ -- res1=PCLMUL(x4,y4,0);\ -- res2=PRSHIFT64(sum);\ -- z2=PXOR_(res1,res2); -+ (res1)=PCLMUL(x4,y4,0);\ -+ (res2)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,res2); - - - /* Karatsuba: 2 mul256, 1 mul192, 9 other instructions */ -@@ -3557,16 +3557,16 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL192_WS_GF2X(z5,z6,z7,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ - \ - PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);} - -@@ -3578,17 +3578,17 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL256_WS_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ - \ - PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);} - -@@ -3600,17 +3600,17 @@ - PCLMUL256_WS_KAR2_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL256_WS_KAR2_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ - \ - PCLMUL256_ADD_KAR2_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);} - -@@ -3622,18 +3622,18 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL320_WS_GF2X(z5,z6,z7,z8,z9,x3,x4,x5,y3,y4,y5,sum,res1,res2);\ - \ -- x11m=PXOR_(x1,x3);\ -- x22m=PXOR_(x2,x4);\ -- y11m=PXOR_(y1,y3);\ -- y22m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -- z7=PXOR_(z7,z9);\ -+ (x11m)=PXOR_(x1,x3);\ -+ (x22m)=PXOR_(x2,x4);\ -+ (y11m)=PXOR_(y1,y3);\ -+ (y22m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ -+ (z7)=PXOR_(z7,z9);\ - \ - PCLMUL320_ADD_GF2X(z3,z4,z5,z6,z7,t1,t2,x11m,x22m,x5,y11m,y22m,y5,\ - sum,res1,res2);} -@@ -3683,54 +3683,54 @@ - - /* 1 mul64 */ - #define PCLMUL64_GF2X(C,x,y,z,pos) \ -- z=PCLMUL(x, y, pos);\ -+ (z)=PCLMUL(x, y, pos);\ - PSTORE128(C,z); - - - /* Classical: 4 mul64, 5 other instructions */ - #define PCLMUL128_CLAS_FINAL(FINAL_STORE,C,x,y,z1,z2,sum,res_low,res_high) \ - /* X^0 */\ -- res_low=PCLMUL(x,y,0);\ -+ (res_low)=PCLMUL(x,y,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x, y, 1);\ -- z2=PCLMUL(x, y, 0x10);\ -- res_high=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x, y, 1);\ -+ (z2)=PCLMUL(x, y, 0x10);\ -+ (res_high)=PXOR_(z1,z2);\ - \ - /* mid2_low: x^64 ... x^127 */\ -- sum=PLSHIFT64(res_high);\ -+ (sum)=PLSHIFT64(res_high);\ - /* mid2_low + L */\ -- z1=PXOR_(res_low,sum);\ -+ (z1)=PXOR_(res_low,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- res_low=PCLMUL(x,y,0x11);\ -+ (res_low)=PCLMUL(x,y,0x11);\ - \ - /* mid2_high: x^128 ... x^191 */\ -- sum=PRSHIFT64(res_high);\ -+ (sum)=PRSHIFT64(res_high);\ - /* mid2_high + H */\ -- z2=PXOR_(res_low,sum);\ -+ (z2)=PXOR_(res_low,sum);\ - FINAL_STORE; - - - #define PCLMUL96_CLAS_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \ -- PCLMUL128_CLAS_FINAL(PSTOREL(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high) -+ PCLMUL128_CLAS_FINAL(PSTOREL((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high) - #define PCLMUL128_CLAS_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \ -- PCLMUL128_CLAS_FINAL(PSTORE128(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high) -+ PCLMUL128_CLAS_FINAL(PSTORE128((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high) - - - /* Karatsuba: 3 mul64, 10 other instructions */ - #define PCLMUL128_KAR_FINAL(FINAL_STORE,C,x,y,z1,z2,sum,res_low,res_high) \ - /* X^0 */\ -- z1=PCLMUL(x,y,0);\ -+ (z1)=PCLMUL(x,y,0);\ - /* X^128 */\ -- z2=PCLMUL(x,y,0x11);\ -+ (z2)=PCLMUL(x,y,0x11);\ - \ -- res_low=PXOR_(x,PRSHIFT64(x));\ -- res_high=PXOR_(y,PRSHIFT64(y));\ -+ (res_low)=PXOR_(x,PRSHIFT64(x));\ -+ (res_high)=PXOR_(y,PRSHIFT64(y));\ - \ - /* X^64 */\ -- sum=PCLMUL(res_low,res_high,0);\ -+ (sum)=PCLMUL(res_low,res_high,0);\ - PXOR1_2(sum,z1);\ - PXOR1_2(sum,z2);\ - \ -@@ -3742,52 +3742,52 @@ - - - #define PCLMUL96_KAR_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \ -- PCLMUL128_KAR_FINAL(PSTOREL(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high) -+ PCLMUL128_KAR_FINAL(PSTOREL((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high) - #define PCLMUL128_KAR_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \ -- PCLMUL128_KAR_FINAL(PSTORE128(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high) -+ PCLMUL128_KAR_FINAL(PSTORE128((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high) - - - /* Classical: 9 mul64, 1 PMIDDLE, 9 other instructions */ - #define PCLMUL192_CLAS_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z1=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z1)=PXOR_(res1,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PCLMUL(x1, y2, 1);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+2,z1);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PCLMUL(x1, y2, 1);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+2,z1);\ - \ - /* X^256 */\ -- res1=PCLMUL(x2,y2,0);\ -- z1=PRSHIFT64(sum);\ -- z2=PXOR_(res1,z1);\ -+ (res1)=PCLMUL(x2,y2,0);\ -+ (z1)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,z1);\ - FINAL_STORE; - - - #define PCLMUL160_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL192_CLAS_FINAL(PSTOREL(C+4,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) -+ PCLMUL192_CLAS_FINAL(PSTOREL((C)+4,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) - #define PCLMUL192_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL192_CLAS_FINAL(PSTORE128(C+4,z2),C,x1,x2,y1,y2,\ -+ PCLMUL192_CLAS_FINAL(PSTORE128((C)+4,z2),C,x1,x2,y1,y2,\ - z1,z2,sum,res1,res2) - - -@@ -3795,15 +3795,15 @@ - #define PCLMUL192_KAR_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ - {__m128i u31,u333;\ - /* A0*B0 */\ -- z1=PCLMUL(x1,y1,0);\ -+ (z1)=PCLMUL(x1,y1,0);\ - /* A1*B1 */\ -- z2=PCLMUL(x1,y1,0x11);\ -+ (z2)=PCLMUL(x1,y1,0x11);\ - /* A2*B2 */\ -- u333=PCLMUL(x2,y2,0);\ -+ (u333)=PCLMUL(x2,y2,0);\ - \ -- res1=PXOR_(z1,z2);\ -- res2=PXOR_(u333,z2);\ -- z2=PXOR_(res1,u333);\ -+ (res1)=PXOR_(z1,z2);\ -+ (res2)=PXOR_(u333,z2);\ -+ (z2)=PXOR_(res1,u333);\ - /* C[0] = C0 - C[1] = C1^(C0^C2) - C[2] = C2^(C1^C3)^C0^C4 -@@ -3811,9 +3811,9 @@ - C[4] = C4^(C5^C3) - C[5] = C5 */\ - /* (A2 A2) */\ -- u31=PSHUFFLE_32_1010(x2);\ -+ (u31)=PSHUFFLE_32_1010(x2);\ - /* (B2 B2) */\ -- sum=PSHUFFLE_32_1010(y2);\ -+ (sum)=PSHUFFLE_32_1010(y2);\ - /* (A2 A2) ^ (A0 A1) */\ - PXOR1_2(u31,x1);\ - /* (B2 B2) ^ (B0 B1) */\ -@@ -3832,77 +3832,77 @@ - \ - PSTORE128(C,z1);\ - FINAL_STORE;\ -- PSTORE128(C+2,z2);} -+ PSTORE128((C)+2,z2);} - - - #define PCLMUL160_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL192_KAR_FINAL(PSTOREL(C+4,u333),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) -+ PCLMUL192_KAR_FINAL(PSTOREL((C)+4,u333),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) - #define PCLMUL192_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL192_KAR_FINAL(PSTORE128(C+4,u333),C,x1,x2,y1,y2,\ -+ PCLMUL192_KAR_FINAL(PSTORE128((C)+4,u333),C,x1,x2,y1,y2,\ - z1,z2,sum,res1,res2) - - - /* Classical: 16 mul64, 2 PMIDDLE, 15 other instructions */ - #define PCLMUL256_CLAS_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z1=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z1)=PXOR_(res1,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+2,z1);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+2,z1);\ - \ - /* X^256 */\ -- z1=PCLMUL(y1, x2, 0x11);\ -- z2=PCLMUL(y2, x2, 0);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0x11);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x11);\ -+ (z2)=PCLMUL(y2, x2, 0);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0x11);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^320 */\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+4,z1);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+4,z1);\ - \ - /* X^384 */\ -- res1=PCLMUL(x2,y2,0x11);\ -- z1=PRSHIFT64(res2);\ -- z2=PXOR_(res1,z1);\ -+ (res1)=PCLMUL(x2,y2,0x11);\ -+ (z1)=PRSHIFT64(res2);\ -+ (z2)=PXOR_(res1,z1);\ - FINAL_STORE; - - - #define PCLMUL224_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL256_CLAS_FINAL(PSTOREL(C+6,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) -+ PCLMUL256_CLAS_FINAL(PSTOREL((C)+6,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) - #define PCLMUL256_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL256_CLAS_FINAL(PSTORE128(C+6,z2),C,x1,x2,y1,y2,\ -+ PCLMUL256_CLAS_FINAL(PSTORE128((C)+6,z2),C,x1,x2,y1,y2,\ - z1,z2,sum,res1,res2) - - -@@ -3912,25 +3912,25 @@ - PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\ - PCLMUL128_WS_GF2X(u43,u44,x2,y2,sum,res1,res2);\ - \ -- x=PXOR_(x1,x2);\ -- y=PXOR_(y1,y2);\ -+ (x)=PXOR_(x1,x2);\ -+ (y)=PXOR_(y1,y2);\ - \ - PXOR1_2(u43,z2);\ -- z2=PXOR_(u43,z1);\ -+ (z2)=PXOR_(u43,z1);\ - PXOR1_2(u43,u44);\ - \ - PCLMUL128_ADD_GF2X(z2,u43,u41,u42,x,y,sum,res1,res2);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,u43);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,u43);\ - FINAL_STORE;} - - - #define PCLMUL224_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL256_KAR_FINAL(PSTOREL(C+6,u44),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) -+ PCLMUL256_KAR_FINAL(PSTOREL((C)+6,u44),C,x1,x2,y1,y2,z1,z2,sum,res1,res2) - #define PCLMUL256_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \ -- PCLMUL256_KAR_FINAL(PSTORE128(C+6,u44),C,x1,x2,y1,y2,z1,z2,\ -+ PCLMUL256_KAR_FINAL(PSTORE128((C)+6,u44),C,x1,x2,y1,y2,z1,z2,\ - sum,res1,res2) - - -@@ -3938,89 +3938,89 @@ - #define PCLMUL320_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z1=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z1)=PXOR_(res1,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+2,z1);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+2,z1);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 1);\ -- z2=PCLMUL(x2, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 1);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+4,z1);\ -+ (z1)=PCLMUL(x1, y3, 1);\ -+ (z2)=PCLMUL(x2, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+4,z1);\ - \ - /* X^384 */\ -- z1=PCLMUL(x2, y3, 0);\ -- z2=PCLMUL(x2, y2, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y2, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x2, y3, 0);\ -+ (z2)=PCLMUL(x2, y2, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x2, y3, 1);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+6,z1);\ -+ (z1)=PCLMUL(x2, y3, 1);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+6,z1);\ - \ - /* X^512 */\ -- res1=PCLMUL(x3,y3,0);\ -- z1=PRSHIFT64(sum);\ -- z2=PXOR_(res1,z1);\ -+ (res1)=PCLMUL(x3,y3,0);\ -+ (z1)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,z1);\ - FINAL_STORE; - - - #define PCLMUL288_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL320_CLAS_FINAL(PSTOREL(C+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL320_CLAS_FINAL(PSTOREL((C)+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - #define PCLMUL320_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL320_CLAS_FINAL(PSTORE128(C+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL320_CLAS_FINAL(PSTORE128((C)+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - - -@@ -4031,11 +4031,11 @@ - PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\ - PCLMUL192_WS_GF2X(u53,u54,u55,x2,x3,y2,y3,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x2);\ -- y1m=PXOR_(y1,y2);\ -+ (x1m)=PXOR_(x1,x2);\ -+ (y1m)=PXOR_(y1,y2);\ - \ - PXOR1_2(u53,z2);\ -- z2=PXOR_(u53,z1);\ -+ (z2)=PXOR_(u53,z1);\ - PXOR1_2(u53,u54);\ - PXOR1_2(u54,u55);\ - \ -@@ -4046,17 +4046,17 @@ - PXOR1_2(u54,R3);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,u53);\ -- PSTORE128(C+6,u54);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,u53);\ -+ PSTORE128((C)+6,u54);\ - FINAL_STORE;} - - - #define PCLMUL288_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL320_KAR_FINAL(PSTOREL(C+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL320_KAR_FINAL(PSTOREL((C)+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - #define PCLMUL320_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL320_KAR_FINAL(PSTORE128(C+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL320_KAR_FINAL(PSTORE128((C)+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - - -@@ -4064,117 +4064,117 @@ - #define PCLMUL384_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z1=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z1)=PXOR_(res1,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+2,z1);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+2,z1);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 0x10);\ -- z2=PCLMUL(x1, y3, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y2, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 1);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+4,z1);\ -+ (z1)=PCLMUL(x1, y3, 0x10);\ -+ (z2)=PCLMUL(x1, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+4,z1);\ - \ - /* X^384 */\ -- z1=PCLMUL(x1, y3, 0x11);\ -- z2=PCLMUL(x2, y3, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x11);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x3, y2, 0);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x3, y1, 0x11);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x1, y3, 0x11);\ -+ (z2)=PCLMUL(x2, y3, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x11);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x3, y2, 0);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x11);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x2, y3, 0x10);\ -- z2=PCLMUL(x2, y3, 1);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x3, y2, 1);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+6,z1);\ -+ (z1)=PCLMUL(x2, y3, 0x10);\ -+ (z2)=PCLMUL(x2, y3, 1);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x3, y2, 1);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+6,z1);\ - \ - /* X^512 */\ -- z1=PCLMUL(x2, y3, 0x11);\ -- z2=PCLMUL(x3, y3, 0);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y2, 0x11);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y3, 0x11);\ -+ (z2)=PCLMUL(x3, y3, 0);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0x11);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^576 */\ -- z1=PCLMUL(x3, y3, 0x10);\ -- z2=PCLMUL(x3, y3, 1);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+8,z1);\ -+ (z1)=PCLMUL(x3, y3, 0x10);\ -+ (z2)=PCLMUL(x3, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+8,z1);\ - \ - /* X^640 */\ -- res1=PCLMUL(x3,y3,0x11);\ -- z1=PRSHIFT64(res2);\ -- z2=PXOR_(res1,z1);\ -+ (res1)=PCLMUL(x3,y3,0x11);\ -+ (z1)=PRSHIFT64(res2);\ -+ (z2)=PXOR_(res1,z1);\ - FINAL_STORE; - - - #define PCLMUL352_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL384_CLAS_FINAL(PSTOREL(C+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL384_CLAS_FINAL(PSTOREL((C)+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - #define PCLMUL384_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL384_CLAS_FINAL(PSTORE128(C+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL384_CLAS_FINAL(PSTORE128((C)+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - - -@@ -4184,10 +4184,10 @@ - {__m128i x1m,x2m,y1m,y2m,R1,R2,R3,u63,u64,u65,u66;\ - PCLMUL192_WS_GF2X(z1,z2,u63,x1,x2,y1,y2,sum,res1,res2);\ - \ -- x1m=PMIDDLE(x2,x3);\ -- x2m=PRSHIFT64(x3);\ -- y1m=PMIDDLE(y2,y3);\ -- y2m=PRSHIFT64(y3);\ -+ (x1m)=PMIDDLE(x2,x3);\ -+ (x2m)=PRSHIFT64(x3);\ -+ (y1m)=PMIDDLE(y2,y3);\ -+ (y2m)=PRSHIFT64(y3);\ - \ - PCLMUL192_WS_GF2X(u64,u65,u66,x1m,x2m,y1m,y2m,sum,res1,res2);\ - \ -@@ -4207,18 +4207,18 @@ - PXOR1_2(u64,PMIDDLE(R2,R3));\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,u63);\ -- PSTORE128(C+6,u64);\ -- PSTORE128(C+8,u65);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,u63);\ -+ PSTORE128((C)+6,u64);\ -+ PSTORE128((C)+8,u65);\ - FINAL_STORE;} - - - #define PCLMUL352_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL384_KAR_FINAL(PSTOREL(C+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL384_KAR_FINAL(PSTOREL((C)+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - #define PCLMUL384_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \ -- PCLMUL384_KAR_FINAL(PSTORE128(C+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\ -+ PCLMUL384_KAR_FINAL(PSTORE128((C)+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\ - sum,res1,res2) - - -@@ -4226,150 +4226,150 @@ - #define PCLMUL448_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,\ - sum,res1,res2) \ - /* X^0 */\ -- res1=PCLMUL(x1,y1,0);\ -+ (res1)=PCLMUL(x1,y1,0);\ - \ - /* X^64 */\ -- z1=PCLMUL(x1, y1, 1);\ -- z2=PCLMUL(x1, y1, 0x10);\ -- res2=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y1, 1);\ -+ (z2)=PCLMUL(x1, y1, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ - \ -- sum=PLSHIFT64(res2);\ -- z1=PXOR_(res1,sum);\ -+ (sum)=PLSHIFT64(res2);\ -+ (z1)=PXOR_(res1,sum);\ - PSTORE128(C,z1);\ - \ - /* X^128 */\ -- z1=PCLMUL(x1, y1, 0x11);\ -- z2=PCLMUL(x2, y1, 0);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x1, y2, 0);\ -- res1=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(x1, y1, 0x11);\ -+ (z2)=PCLMUL(x2, y1, 0);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x1, y2, 0);\ -+ (res1)=PXOR_(sum,z1);\ - \ - /* X^192 */\ -- z1=PCLMUL(x1, y2, 1);\ -- z2=PCLMUL(x1, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(y1, x2, 1);\ -- z2=PXOR_(sum,z1);\ -- z1=PCLMUL(y1, x2, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+2,z1);\ -+ (z1)=PCLMUL(x1, y2, 1);\ -+ (z2)=PCLMUL(x1, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(y1, x2, 1);\ -+ (z2)=PXOR_(sum,z1);\ -+ (z1)=PCLMUL(y1, x2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+2,z1);\ - \ - /* X^256 */\ -- z1=PCLMUL(x1, y3, 0);\ -- z2=PCLMUL(x1, y2, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0);\ -- res1=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y1, 0x11);\ -- res2=PXOR_(z1,res1);\ -- z2=PCLMUL(x3, y1, 0);\ -- res1=PXOR_(res2,z2);\ -+ (z1)=PCLMUL(x1, y3, 0);\ -+ (z2)=PCLMUL(x1, y2, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y1, 0x11);\ -+ (res2)=PXOR_(z1,res1);\ -+ (z2)=PCLMUL(x3, y1, 0);\ -+ (res1)=PXOR_(res2,z2);\ - \ - /* X^320 */\ -- z1=PCLMUL(x1, y3, 0x10);\ -- z2=PCLMUL(x1, y3, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y2, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x2, y2, 1);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y1, 0x10);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x3, y1, 1);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+4,z1);\ -+ (z1)=PCLMUL(x1, y3, 0x10);\ -+ (z2)=PCLMUL(x1, y3, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y2, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y2, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y1, 0x10);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x3, y1, 1);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+4,z1);\ - \ - /* X^384 */\ -- z1=PCLMUL(x1, y4, 0);\ -- z2=PCLMUL(x1, y3, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x2, y3, 0);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x2, y2, 0x11);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x3, y2, 0);\ -- res1=PXOR_(z1,sum);\ -- z2=PCLMUL(x3, y1, 0x11);\ -- sum=PXOR_(res1,z2);\ -- z1=PCLMUL(x4, y1, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x1, y4, 0);\ -+ (z2)=PCLMUL(x1, y3, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x2, y3, 0);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x2, y2, 0x11);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x3, y2, 0);\ -+ (res1)=PXOR_(z1,sum);\ -+ (z2)=PCLMUL(x3, y1, 0x11);\ -+ (sum)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x4, y1, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^448 */\ -- z1=PCLMUL(x1, y4, 1);\ -- z2=PCLMUL(x2, y3, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x2, y3, 1);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x3, y2, 0x10);\ -- sum=PXOR_(z1,z2);\ -- z2=PCLMUL(x3, y2, 1);\ -- z1=PXOR_(sum,z2);\ -- z2=PCLMUL(x4, y1, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+6,z1);\ -+ (z1)=PCLMUL(x1, y4, 1);\ -+ (z2)=PCLMUL(x2, y3, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x2, y3, 1);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x3, y2, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z2)=PCLMUL(x3, y2, 1);\ -+ (z1)=PXOR_(sum,z2);\ -+ (z2)=PCLMUL(x4, y1, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+6,z1);\ - \ - /* X^512 */\ -- z1=PCLMUL(x2, y4, 0);\ -- z2=PCLMUL(x2, y3, 0x11);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y3, 0);\ -- res1=PXOR_(res2,z1);\ -- z2=PCLMUL(x3, y2, 0x11);\ -- res2=PXOR_(res1,z2);\ -- z1=PCLMUL(x4, y2, 0);\ -- res1=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x2, y4, 0);\ -+ (z2)=PCLMUL(x2, y3, 0x11);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y3, 0);\ -+ (res1)=PXOR_(res2,z1);\ -+ (z2)=PCLMUL(x3, y2, 0x11);\ -+ (res2)=PXOR_(res1,z2);\ -+ (z1)=PCLMUL(x4, y2, 0);\ -+ (res1)=PXOR_(res2,z1);\ - \ - /* X^576 */\ -- z1=PCLMUL(x2, y4, 1);\ -- z2=PCLMUL(x3, y3, 0x10);\ -- res2=PXOR_(z1,z2);\ -- z1=PCLMUL(x3, y3, 1);\ -- z2=PXOR_(res2,z1);\ -- z1=PCLMUL(x4, y2, 0x10);\ -- res2=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(sum,res2);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+8,z1);\ -+ (z1)=PCLMUL(x2, y4, 1);\ -+ (z2)=PCLMUL(x3, y3, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x3, y3, 1);\ -+ (z2)=PXOR_(res2,z1);\ -+ (z1)=PCLMUL(x4, y2, 0x10);\ -+ (res2)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(sum,res2);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+8,z1);\ - \ - /* X^640 */\ -- z1=PCLMUL(x3, y4, 0);\ -- z2=PCLMUL(x3, y3, 0x11);\ -- sum=PXOR_(z1,z2);\ -- z1=PCLMUL(x4, y3, 0);\ -- res1=PXOR_(z1,sum);\ -+ (z1)=PCLMUL(x3, y4, 0);\ -+ (z2)=PCLMUL(x3, y3, 0x11);\ -+ (sum)=PXOR_(z1,z2);\ -+ (z1)=PCLMUL(x4, y3, 0);\ -+ (res1)=PXOR_(z1,sum);\ - \ - /* X^704 */\ -- z1=PCLMUL(x3, y4, 1);\ -- z2=PCLMUL(x4, y3, 0x10);\ -- sum=PXOR_(z1,z2);\ --\ -- z2=PMIDDLE(res2,sum);\ -- z1=PXOR_(res1,z2);\ -- PSTORE128(C+10,z1);\ -+ (z1)=PCLMUL(x3, y4, 1);\ -+ (z2)=PCLMUL(x4, y3, 0x10);\ -+ (sum)=PXOR_(z1,z2);\ -+\ -+ (z2)=PMIDDLE(res2,sum);\ -+ (z1)=PXOR_(res1,z2);\ -+ PSTORE128((C)+10,z1);\ - \ - /* X^768 */\ -- res1=PCLMUL(x4,y4,0);\ -- res2=PRSHIFT64(sum);\ -- z2=PXOR_(res1,res2);\ -+ (res1)=PCLMUL(x4,y4,0);\ -+ (res2)=PRSHIFT64(sum);\ -+ (z2)=PXOR_(res1,res2);\ - FINAL_STORE; - - - #define PCLMUL416_CLAS_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL448_CLAS_FINAL(PSTOREL(C+12,z2),C,x1,x2,x3,x4,\ -+ PCLMUL448_CLAS_FINAL(PSTOREL((C)+12,z2),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - #define PCLMUL448_CLAS_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL448_CLAS_FINAL(PSTORE128(C+12,z2),C,x1,x2,x3,x4,\ -+ PCLMUL448_CLAS_FINAL(PSTORE128((C)+12,z2),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - -@@ -4380,34 +4380,34 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL192_WS_GF2X(z5,z6,z7,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ - \ - PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,z3);\ -- PSTORE128(C+6,z4);\ -- PSTORE128(C+8,z5);\ -- PSTORE128(C+10,z6);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,z3);\ -+ PSTORE128((C)+6,z4);\ -+ PSTORE128((C)+8,z5);\ -+ PSTORE128((C)+10,z6);\ - FINAL_STORE;} - - - #define PCLMUL416_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL448_KAR_FINAL(PSTOREL(C+12,z7),C,x1,x2,x3,x4,\ -+ PCLMUL448_KAR_FINAL(PSTOREL((C)+12,z7),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - #define PCLMUL448_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL448_KAR_FINAL(PSTORE128(C+12,z7),C,x1,x2,x3,x4,\ -+ PCLMUL448_KAR_FINAL(PSTORE128((C)+12,z7),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - -@@ -4418,36 +4418,36 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL256_WS_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ - \ - PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,z3);\ -- PSTORE128(C+6,z4);\ -- PSTORE128(C+8,z5);\ -- PSTORE128(C+10,z6);\ -- PSTORE128(C+12,z7);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,z3);\ -+ PSTORE128((C)+6,z4);\ -+ PSTORE128((C)+8,z5);\ -+ PSTORE128((C)+10,z6);\ -+ PSTORE128((C)+12,z7);\ - FINAL_STORE;} - - - #define PCLMUL480_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL512_KAR_FINAL(PSTOREL(C+14,z8),C,x1,x2,x3,x4,\ -+ PCLMUL512_KAR_FINAL(PSTOREL((C)+14,z8),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - #define PCLMUL512_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL512_KAR_FINAL(PSTORE128(C+14,z8),C,x1,x2,x3,x4,\ -+ PCLMUL512_KAR_FINAL(PSTORE128((C)+14,z8),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - -@@ -4458,36 +4458,36 @@ - PCLMUL256_WS_KAR2_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL256_WS_KAR2_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\ - \ -- x1m=PXOR_(x1,x3);\ -- x2m=PXOR_(x2,x4);\ -- y1m=PXOR_(y1,y3);\ -- y2m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -+ (x1m)=PXOR_(x1,x3);\ -+ (x2m)=PXOR_(x2,x4);\ -+ (y1m)=PXOR_(y1,y3);\ -+ (y2m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ - \ - PCLMUL256_ADD_KAR2_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,z3);\ -- PSTORE128(C+6,z4);\ -- PSTORE128(C+8,z5);\ -- PSTORE128(C+10,z6);\ -- PSTORE128(C+12,z7);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,z3);\ -+ PSTORE128((C)+6,z4);\ -+ PSTORE128((C)+8,z5);\ -+ PSTORE128((C)+10,z6);\ -+ PSTORE128((C)+12,z7);\ - FINAL_STORE;} - - - #define PCLMUL480_KAR2_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL512_KAR2_FINAL(PSTOREL(C+14,z8),C,x1,x2,x3,x4,\ -+ PCLMUL512_KAR2_FINAL(PSTOREL((C)+14,z8),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - #define PCLMUL512_KAR2_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \ -- PCLMUL512_KAR2_FINAL(PSTORE128(C+14,z8),C,x1,x2,x3,x4,\ -+ PCLMUL512_KAR2_FINAL(PSTORE128((C)+14,z8),C,x1,x2,x3,x4,\ - y1,y2,y3,y4,z1,z2,sum,res1,res2) - - -@@ -4498,39 +4498,39 @@ - PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\ - PCLMUL320_WS_GF2X(z5,z6,z7,z8,z9,x3,x4,x5,y3,y4,y5,sum,res1,res2);\ - \ -- x11m=PXOR_(x1,x3);\ -- x22m=PXOR_(x2,x4);\ -- y11m=PXOR_(y1,y3);\ -- y22m=PXOR_(y2,y4);\ --\ -- z5=PXOR_(z3,z5);\ -- z6=PXOR_(z4,z6);\ -- z3=PXOR_(z5,z1);\ -- z4=PXOR_(z6,z2);\ -- z5=PXOR_(z5,z7);\ -- z6=PXOR_(z6,z8);\ -- z7=PXOR_(z7,z9);\ -+ (x11m)=PXOR_(x1,x3);\ -+ (x22m)=PXOR_(x2,x4);\ -+ (y11m)=PXOR_(y1,y3);\ -+ (y22m)=PXOR_(y2,y4);\ -+\ -+ (z5)=PXOR_(z3,z5);\ -+ (z6)=PXOR_(z4,z6);\ -+ (z3)=PXOR_(z5,z1);\ -+ (z4)=PXOR_(z6,z2);\ -+ (z5)=PXOR_(z5,z7);\ -+ (z6)=PXOR_(z6,z8);\ -+ (z7)=PXOR_(z7,z9);\ - \ - PCLMUL320_ADD_GF2X(z3,z4,z5,z6,z7,t1,t2,x11m,x22m,x5,y11m,y22m,y5,\ - sum,res1,res2);\ - \ - PSTORE128(C,z1);\ -- PSTORE128(C+2,z2);\ -- PSTORE128(C+4,z3);\ -- PSTORE128(C+6,z4);\ -- PSTORE128(C+8,z5);\ -- PSTORE128(C+10,z6);\ -- PSTORE128(C+12,z7);\ -- PSTORE128(C+14,z8);\ -+ PSTORE128((C)+2,z2);\ -+ PSTORE128((C)+4,z3);\ -+ PSTORE128((C)+6,z4);\ -+ PSTORE128((C)+8,z5);\ -+ PSTORE128((C)+10,z6);\ -+ PSTORE128((C)+12,z7);\ -+ PSTORE128((C)+14,z8);\ - FINAL_STORE;} - - - #define PCLMUL544_KAR_GF2X(C,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)\ -- PCLMUL576_KAR_FINAL(PSTOREL(C+16,z9),C,x1,x2,x3,x4,x5,\ -+ PCLMUL576_KAR_FINAL(PSTOREL((C)+16,z9),C,x1,x2,x3,x4,x5,\ - y1,y2,y3,y4,y5,z1,z2,sum,res1,res2) - - #define PCLMUL576_KAR_GF2X(C,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)\ -- PCLMUL576_KAR_FINAL(PSTORE128(C+16,z9),C,x1,x2,x3,x4,x5,\ -+ PCLMUL576_KAR_FINAL(PSTORE128((C)+16,z9),C,x1,x2,x3,x4,x5,\ - y1,y2,y3,y4,y5,z1,z2,sum,res1,res2) - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h deleted file mode 100644 index 4db7d2f..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h +++ /dev/null @@ -1,13 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/parameters_HFE.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/parameters_HFE.h -@@ -11,9 +11,6 @@ - #define GFq 2U - #define Log2_q 1 - /* For HFE, the previous parameter is necessarily 2. */ -- -- /** This type stores an element of GF(q). */ -- typedef unsigned char gf2; - #endif - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h deleted file mode 100644 index 92284d6..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h +++ /dev/null @@ -1,202 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/rem_gf2n.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/rem_gf2n.h -@@ -7,7 +7,6 @@ - #include "gf2x.h" - #include "tools_gf2n.h" - #include "macro.h" --#include "rem_gf2x.h" - #include "rem5_gf2n.h" - - -@@ -36,112 +35,84 @@ - - - /* Automatic choice of REM_GF2N */ -- --#if(NB_WORD_GFqn==1) -- #ifdef __TRINOMIAL_GF2N__ -- /* Example: REM64_TRINOMIAL_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n) */ -- #if(K3==1) -- #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\ --_TRINOMIAL_K31_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n)) -- #else -- #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\ --_TRINOMIAL_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n)) -- #endif -- #endif -- -- #ifdef __PENTANOMIAL_GF2N__ -- #if (HFEn!=64) -- /* Example: REM64_PENTANOMIAL_GF2X(P,Pol,HFEn,K1,K2,K3mod64, -- Q,R,MASK_GF2n) */ -- #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\ --_PENTANOMIAL_GF2X(P,Pol,HFEn,K1,K2,K3mod64,Q,R,MASK_GF2n)) -- #else -- /* HFEn == 64 */ -- #define REM_GF2N(P,Pol,Q,R) \ -- REM64_PENTANOMIAL_K64_GF2X(P,Pol,64,K1,K2,K3mod64,R) -- #endif -- #endif -- --#elif(NB_WORD_GFqn==2) -- #if (HFEn<97) -- #ifdef __TRINOMIAL_GF2N__ -- #define REM_GF2N(P,Pol,Q,R) REM96_TRINOMIAL_GF2X(P,Pol,\ --K3mod64,KI,Q,R,MASK_GF2n) -- #endif -- -- #ifdef __PENTANOMIAL_GF2N__ -- #define REM_GF2N(P,Pol,Q,R) REM96_PENTANOMIAL_GF2X(P,Pol,\ --K1,K2,K3mod64,KI,Q,R,MASK_GF2n) -- #endif -- #else -- #ifdef __TRINOMIAL_GF2N__ -- #define REM_GF2N(P,Pol,Q,R) REM128_TRINOMIAL_GF2X(P,Pol,\ --K3mod64,KI,KI64,Q,R,MASK_GF2n) -- #endif -- -- #ifdef __PENTANOMIAL_GF2N__ -- #if (HFEnr) -- #define REM_GF2N(P,Pol,Q,R) REM128_PENTANOMIAL_GF2X(P,Pol,\ --K1,K2,K3mod64,KI,KI64,Q,R,MASK_GF2n) -- #else -- /* HFEn == 128 */ -- #define REM_GF2N(P,Pol,Q,R) \ -- REM128_PENTANOMIAL_K128_GF2X(P,Pol,K1,K2,K3mod64,R) -- #endif -- #endif -- #endif -- --#else -- #ifdef __TRINOMIAL_GF2N__ -- #if ((HFEn>256)&&(HFEn<289)&&(K3>32)&&(K3<64)) -- #define REM_GF2N(P,Pol,Q,R) REM288_SPECIALIZED_TRINOMIAL_GF2X(P,Pol\ --,K3,KI,KI64,K364,Q,R,MASK_GF2n) -- #elif (HFEn==313) -- #define REM_GF2N(P,Pol,Q,R) REM320_SPECIALIZED_TRINOMIAL_GF2X(\ --P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n) -- #elif (HFEn==354) -- #define REM_GF2N(P,Pol,Q,R) REM384_SPECIALIZED_TRINOMIAL_GF2X(\ --P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n) -- #elif (HFEn==358) -- #define REM_GF2N(P,Pol,Q,R) REM384_SPECIALIZED358_TRINOMIAL_GF2X(\ --P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n) -- #elif (HFEn==402) -- #define REM_GF2N(P,Pol,Q,R) REM402_SPECIALIZED_TRINOMIAL_GF2X(\ --P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n) -- #else -- /* Example: REM192_TRINOMIAL_GF2X(P,Pol,K3mod64,KI,KI64,K364mod64, -- Q,R,MASK_GF2n) */ -- #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\ --_TRINOMIAL_GF2X(P,Pol,K3mod64,KI,KI64,K364mod64,Q,R,MASK_GF2n)) -- #endif -- #endif -- -- #ifdef __PENTANOMIAL_GF2N__ -- #if ((HFEn==312)&&(K3==128)) -- #define REM_GF2N(P,Pol,Q,R) REM312_PENTANOMIAL_K3_IS_128_GF2X(\ --P,Pol,K1,K2,,KI,KI64,K164,K264,,Q,R,MASK_GF2n) -- #elif ((HFEn==448)&&(K3==64)) -- #define REM_GF2N(P,Pol,Q,R) REM448_PENTANOMIAL_K448_K3_IS_64_GF2X(\ --P,Pol,K1,K2,,K164,K264,,R) -- #elif ((HFEn==544)&&(K3==128)) -- #define REM_GF2N(P,Pol,Q,R) REM544_PENTANOMIAL_K3_IS_128_GF2X(\ --P,Pol,K1,K2,,KI,KI64,K164,K264,,Q,R,MASK_GF2n) -- #elif (HFEnr) -- /* Example: REM192_PENTANOMIAL_GF2X(P,Pol,K1,K2,K3mod64,KI,KI64, -- K164,K264,K364mod64,Q,R,MASK_GF2n) */ -- #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\ --_PENTANOMIAL_GF2X(P,Pol,K1,K2,K3mod64,KI,KI64,K164,K264,K364mod64,Q,R,\ --MASK_GF2n)) -- #else -- /* HFEn == NB_WORD_GFqn*64 */ -- /* Example: REM192_PENTANOMIAL_K192_GF2X(P,Pol,K1,K2,K3mod64,\ -- K164,K264,K364mod64,R) */ -- #define REM_GF2N_TMP CONCAT(CONCAT_NB_BITS_MMUL_SUP(CONCAT(\ --CONCAT_NB_BITS_MMUL_SUP(REM),_PENTANOMIAL_K)),_GF2X) -- #define REM_GF2N(P,Pol,Q,R) \ -- REM_GF2N_TMP(P,Pol,K1,K2,K3mod64,K164,K264,K364mod64,R) -- #endif -- #endif -+#if (HFEn==174||HFEn==175||HFEn==177) -+/* Assumes KI >= K3, which it is for {Blue,Red,}GeMSS128 */ -+#define REM_GF2N(P,Pol,Q,R) \ -+ (Q)[0]=((Pol)[2]>>(KI))^((Pol)[3]<<(KI64));\ -+ (Q)[1]=((Pol)[3]>>(KI))^((Pol)[4]<<(KI64));\ -+ (Q)[2]=((Pol)[4]>>(KI))^((Pol)[5]<<(KI64));\ -+ XOR3(P,Pol,Q);\ -+ (P)[0]^=(Q)[0]<<(K3);\ -+ (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3));\ -+ (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3));\ -+ (R)=(Q)[2]>>((KI)-(K3));\ -+ (P)[0]^=(R);\ -+ (P)[0]^=(R)<<(K3);\ -+ (P)[2]&=(MASK_GF2n); -+ -+#elif (HFEn==265||HFEn==266) -+/* Assumes KI < K3, which it is for {Blue,Red,}GeMSS192 */ -+#define REM_GF2N(P,Pol,Q,R)\ -+ (Q)[0]=((Pol)[4]>>(KI))^((Pol)[5]<<(KI64));\ -+ (Q)[1]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\ -+ (Q)[2]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\ -+ (Q)[3]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\ -+ (Q)[4]=((Pol)[8]>>(KI));\ -+ XOR5(P,Pol,Q);\ -+ (P)[0]^=(Q)[0]<<(K3);\ -+ (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3));\ -+ (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3));\ -+ (P)[3]^=((Q)[2]>>(K364))^((Q)[3]<<(K3));\ -+ (P)[4]^=((Q)[3]>>(K364))^((Q)[4]<<(K3));\ -+ /* 64-((K364)+(KI)) == ((K3)-(KI)) */\ -+ (R)=((Q)[3]>>((K364)+(KI)))^((Q)[4]<<((K3)-(KI)));\ -+ (P)[0]^=(R);\ -+ (P)[0]^=(R)<<(K3);\ -+ /* This row is the unique difference with REM288_TRINOMIAL_GF2X */\ -+ (P)[1]^=(R)>>(K364);\ -+ (P)[4]&=(MASK_GF2n); -+ -+#elif (HFEn==354) -+#define REM_GF2N(P,Pol,Q,R) \ -+ {uint64_t R2;\ -+ (Q)[0]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\ -+ (Q)[1]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\ -+ (Q)[2]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\ -+ (Q)[3]=((Pol)[8]>>(KI))^((Pol)[9]<<(KI64));\ -+ (Q)[4]=((Pol)[9]>>(KI))^((Pol)[10]<<(KI64));\ -+ (Q)[5]=((Pol)[10]>>(KI))^((Pol)[11]<<(KI64));\ -+ XOR6(P,Pol,Q);\ -+ /* 64-((K364)+(KI)) == ((K3mod64)-(KI)) */\ -+ (R)=((Q)[3]>>((K364)+(KI)))^((Q)[4]<<((K3mod64)-(KI)));\ -+ (P)[0]^=(R);\ -+ (R2)=((Q)[4]>>((K364)+(KI)))^((Q)[5]<<((K3mod64)-(KI)));\ -+ (P)[1]^=(R2);\ -+ (P)[1]^=((R)^(Q)[0])<<(K3mod64);\ -+ (P)[2]^=(((R)^(Q)[0])>>(K364))^((R2^(Q)[1])<<(K3mod64));\ -+ (P)[3]^=((R2^(Q)[1])>>(K364))^((Q)[2]<<(K3mod64));\ -+ (P)[4]^=((Q)[2]>>(K364))^((Q)[3]<<(K3mod64));\ -+ (P)[5]^=(Q)[3]>>(K364);\ -+ (P)[5]&=(MASK_GF2n);} -+ -+#elif (HFEn==358) -+#define REM_GF2N(P,Pol,Q,R) \ -+ (Q)[0]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\ -+ (Q)[1]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\ -+ (Q)[2]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\ -+ (Q)[3]=((Pol)[8]>>(KI))^((Pol)[9]<<(KI64));\ -+ (Q)[4]=((Pol)[9]>>(KI))^((Pol)[10]<<(KI64));\ -+ (Q)[5]=((Pol)[10]>>(KI))^((Pol)[11]<<(KI64));\ -+ /* 64-((k364)+(KI)) == ((K3mod64)-(KI)) */\ -+ (R)=((Q)[4]>>((K364)+(KI)))^((Q)[5]<<((K3mod64)-(KI)));\ -+ (Q)[0]^=(R);\ -+ XOR6(P,Pol,Q);\ -+ (P)[0]^=(Q)[0]<<(K3mod64);\ -+ (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3mod64));\ -+ (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3mod64));\ -+ (P)[3]^=((Q)[2]>>(K364))^((Q)[3]<<(K3mod64));\ -+ (P)[4]^=((Q)[3]>>(K364))^((Q)[4]<<(K3mod64));\ -+ (P)[5]^=((Q)[4]>>(K364));\ -+ (P)[5]&=(MASK_GF2n); - #endif - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h deleted file mode 100644 index 5117bb5..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h +++ /dev/null @@ -1,23 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/signHFE.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/signHFE.h -@@ -7,6 +7,7 @@ - #include "gf2nx.h" - #include "config_HFE.h" - #include "matrix_gf2.h" -+#include "sizes_HFE.h" - #include - - -@@ -30,7 +31,10 @@ - #endif - - #if ENABLED_SEED_SK -- UINT *sk_uncomp; -+ UINT sk_uncomp[NB_UINT_HFEVPOLY -+ +(LTRIANGULAR_NV_SIZE<<1) -+ +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t -+ +MATRIXnv_SIZE+MATRIXn_SIZE]; - #endif - } secret_key_HFE; - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h deleted file mode 100644 index 0343356..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h +++ /dev/null @@ -1,342 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/simd_intel.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/simd_intel.h -@@ -16,7 +16,7 @@ - #define PSHUFFLE_PS(A,B,i) \ - ((__m128i)_mm_shuffle_ps((__m128)(A),(__m128)(B),i)) - -- #define PMOVE_PS(A,B) ((__m128i)_mm_move_ss((__m128)A,(__m128)B)) -+ #define PMOVE_PS(A,B) ((__m128i)_mm_move_ss((__m128)(A),(__m128)(B))) - #endif - - -@@ -200,67 +200,67 @@ - - #define PINIT192(x1,x2,A) \ - PINIT128(x1,A);\ -- PINIT64(x2,A+2); -+ PINIT64(x2,(A)+2); - - #define PINIT256(x1,x2,A) \ - PINIT128(x1,A);\ -- PINIT128(x2,A+2); -+ PINIT128(x2,(A)+2); - - #define PINIT320(x1,x2,x3,A) \ - PINIT256(x1,x2,A);\ -- PINIT64(x3,A+4); -+ PINIT64(x3,(A)+4); - - #define PINIT384(x1,x2,x3,A) \ - PINIT256(x1,x2,A);\ -- PINIT128(x3,A+4); -+ PINIT128(x3,(A)+4); - - #define PINIT448(x1,x2,x3,x4,A) \ - PINIT256(x1,x2,A);\ -- PINIT192(x3,x4,A+4); -+ PINIT192(x3,x4,(A)+4); - - #define PINIT512(x1,x2,x3,x4,A) \ - PINIT256(x1,x2,A);\ -- PINIT256(x3,x4,A+4); -+ PINIT256(x3,x4,(A)+4); - - #define PINIT576(x1,x2,x3,x4,x5,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT64(x5,A+8); -+ PINIT64(x5,(A)+8); - - #define PINIT640(x1,x2,x3,x4,x5,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT128(x5,A+8); -+ PINIT128(x5,(A)+8); - - #define PINIT704(x1,x2,x3,x4,x5,x6,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT192(x5,x6,A+8); -+ PINIT192(x5,x6,(A)+8); - - #define PINIT768(x1,x2,x3,x4,x5,x6,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT256(x5,x6,A+8); -+ PINIT256(x5,x6,(A)+8); - - #define PINIT832(x1,x2,x3,x4,x5,x6,x7,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT320(x5,x6,x7,A+8); -+ PINIT320(x5,x6,x7,(A)+8); - - #define PINIT896(x1,x2,x3,x4,x5,x6,x7,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT384(x5,x6,x7,A+8); -+ PINIT384(x5,x6,x7,(A)+8); - - #define PINIT960(x1,x2,x3,x4,x5,x6,x7,x8,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT448(x5,x6,x7,x8,A+8); -+ PINIT448(x5,x6,x7,x8,(A)+8); - - #define PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A) \ - PINIT512(x1,x2,x3,x4,A);\ -- PINIT512(x5,x6,x7,x8,A+8); -+ PINIT512(x5,x6,x7,x8,(A)+8); - - #define PINIT1088(x1,x2,x3,x4,x5,x6,x7,x8,x9,A) \ - PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A);\ -- PINIT64(x9,A+16); -+ PINIT64(x9,(A)+16); - - #define PINIT1152(x1,x2,x3,x4,x5,x6,x7,x8,x9,A) \ - PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A);\ -- PINIT128(x9,A+16); -+ PINIT128(x9,(A)+16); - - - /* Store */ -@@ -268,67 +268,67 @@ - - #define PSTORE192(A,x1,x2) \ - PSTORE128(A,x1);\ -- PSTOREL(A+2,x2); -+ PSTOREL((A)+2,x2); - - #define PSTORE256(A,x1,x2) \ - PSTORE128(A,x1);\ -- PSTORE128(A+2,x2); -+ PSTORE128((A)+2,x2); - - #define PSTORE320(A,x1,x2,x3) \ - PSTORE256(A,x1,x2);\ -- PSTOREL(A+4,x3); -+ PSTOREL((A)+4,x3); - - #define PSTORE384(A,x1,x2,x3) \ - PSTORE256(A,x1,x2);\ -- PSTORE128(A+4,x3); -+ PSTORE128((A)+4,x3); - - #define PSTORE448(A,x1,x2,x3,x4) \ - PSTORE256(A,x1,x2);\ -- PSTORE192(A+4,x3,x4); -+ PSTORE192((A)+4,x3,x4); - - #define PSTORE512(A,x1,x2,x3,x4) \ - PSTORE256(A,x1,x2);\ -- PSTORE256(A+4,x3,x4); -+ PSTORE256((A)+4,x3,x4); - - #define PSTORE576(A,x1,x2,x3,x4,x5) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTOREL(A+8,x5); -+ PSTOREL((A)+8,x5); - - #define PSTORE640(A,x1,x2,x3,x4,x5) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE128(A+8,x5); -+ PSTORE128((A)+8,x5); - - #define PSTORE704(A,x1,x2,x3,x4,x5,x6) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE192(A+8,x5,x6); -+ PSTORE192((A)+8,x5,x6); - - #define PSTORE768(A,x1,x2,x3,x4,x5,x6) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE256(A+8,x5,x6); -+ PSTORE256((A)+8,x5,x6); - - #define PSTORE832(A,x1,x2,x3,x4,x5,x6,x7) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE320(A+8,x5,x6,x7); -+ PSTORE320((A)+8,x5,x6,x7); - - #define PSTORE896(A,x1,x2,x3,x4,x5,x6,x7) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE384(A+8,x5,x6,x7); -+ PSTORE384((A)+8,x5,x6,x7); - - #define PSTORE960(A,x1,x2,x3,x4,x5,x6,x7,x8) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE448(A+8,x5,x6,x7,x8); -+ PSTORE448((A)+8,x5,x6,x7,x8); - - #define PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8) \ - PSTORE512(A,x1,x2,x3,x4);\ -- PSTORE512(A+8,x5,x6,x7,x8); -+ PSTORE512((A)+8,x5,x6,x7,x8); - - #define PSTORE1088(A,x1,x2,x3,x4,x5,x6,x7,x8,x9) \ - PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8);\ -- PSTOREL(A+16,x9); -+ PSTOREL((A)+16,x9); - - #define PSTORE1152(A,x1,x2,x3,x4,x5,x6,x7,x8,x9) \ - PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8);\ -- PSTORE128(A+16,x9); -+ PSTORE128((A)+16,x9); - - - /* Bitwise operator */ -@@ -566,65 +566,65 @@ - x=PCVT_256(PLOADU(A)); - - #define VPINIT192(x,A) \ -- x=VPINSERT_128(PCVT_256(PLOADU(A)),PLOADL(A+2)); -+ x=VPINSERT_128(PCVT_256(PLOADU(A)),PLOADL((A)+2)); - - #define VPINIT256(x,A) x=VPLOADU(A); - - #define VPINIT320(x1,x2,A) \ - VPINIT256(x1,A);\ -- VPINIT64(x2,A+4); -+ VPINIT64(x2,(A)+4); - - #define VPINIT384(x1,x2,A) \ - VPINIT256(x1,A);\ -- VPINIT128(x2,A+4); -+ VPINIT128(x2,(A)+4); - - #define VPINIT448(x1,x2,A) \ - VPINIT256(x1,A);\ -- VPINIT192(x2,A+4); -+ VPINIT192(x2,(A)+4); - - #define VPINIT512(x1,x2,A) \ - VPINIT256(x1,A);\ -- VPINIT256(x2,A+4); -+ VPINIT256(x2,(A)+4); - - #define VPINIT576(x1,x2,x3,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT64(x3,A+8); -+ VPINIT64(x3,(A)+8); - - #define VPINIT640(x1,x2,x3,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT128(x3,A+8); -+ VPINIT128(x3,(A)+8); - - #define VPINIT704(x1,x2,x3,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT192(x3,A+8); -+ VPINIT192(x3,(A)+8); - - #define VPINIT768(x1,x2,x3,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT256(x3,A+8); -+ VPINIT256(x3,(A)+8); - - #define VPINIT832(x1,x2,x3,x4,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT320(x3,x4,A+8); -+ VPINIT320(x3,x4,(A)+8); - - #define VPINIT896(x1,x2,x3,x4,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT384(x3,x4,A+8); -+ VPINIT384(x3,x4,(A)+8); - - #define VPINIT960(x1,x2,x3,x4,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT448(x3,x4,A+8); -+ VPINIT448(x3,x4,(A)+8); - - #define VPINIT1024(x1,x2,x3,x4,A) \ - VPINIT512(x1,x2,A);\ -- VPINIT512(x3,x4,A+8); -+ VPINIT512(x3,x4,(A)+8); - - #define VPINIT1088(x1,x2,x3,x4,x5,A) \ - VPINIT1024(x1,x2,x3,x4,A);\ -- VPINIT64(x5,A+16); -+ VPINIT64(x5,(A)+16); - - #define VPINIT1152(x1,x2,x3,x4,x5,A) \ - VPINIT1024(x1,x2,x3,x4,A);\ -- VPINIT128(x5,A+16); -+ VPINIT128(x5,(A)+16); - - - /* Store */ -@@ -636,65 +636,65 @@ - - #define VPSTORE192(A,x) \ - PSTOREU(A,VPCVT_128(x));\ -- PSTOREL(A+2,VPEXTRACT128(x)); -+ PSTOREL((A)+2,VPEXTRACT128(x)); - - #define VPSTORE256 VPSTOREU - - #define VPSTORE320(A,x1,x2) \ - VPSTORE256(A,x1);\ -- VPSTORE64(A+4,x2); -+ VPSTORE64((A)+4,x2); - - #define VPSTORE384(A,x1,x2) \ - VPSTORE256(A,x1);\ -- VPSTORE128(A+4,x2); -+ VPSTORE128((A)+4,x2); - - #define VPSTORE448(A,x1,x2) \ - VPSTORE256(A,x1);\ -- VPSTORE192(A+4,x2); -+ VPSTORE192((A)+4,x2); - - #define VPSTORE512(A,x1,x2) \ - VPSTORE256(A,x1);\ -- VPSTORE256(A+4,x2); -+ VPSTORE256((A)+4,x2); - - #define VPSTORE576(A,x1,x2,x3) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE64(A+8,x3); -+ VPSTORE64((A)+8,x3); - - #define VPSTORE640(A,x1,x2,x3) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE128(A+8,x3); -+ VPSTORE128((A)+8,x3); - - #define VPSTORE704(A,x1,x2,x3) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE192(A+8,x3); -+ VPSTORE192((A)+8,x3); - - #define VPSTORE768(A,x1,x2,x3) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE256(A+8,x3); -+ VPSTORE256((A)+8,x3); - - #define VPSTORE832(A,x1,x2,x3,x4) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE320(A+8,x3,x4); -+ VPSTORE320((A)+8,x3,x4); - - #define VPSTORE896(A,x1,x2,x3,x4) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE384(A+8,x3,x4); -+ VPSTORE384((A)+8,x3,x4); - - #define VPSTORE960(A,x1,x2,x3,x4) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE448(A+8,x3,x4); -+ VPSTORE448((A)+8,x3,x4); - - #define VPSTORE1024(A,x1,x2,x3,x4) \ - VPSTORE512(A,x1,x2);\ -- VPSTORE512(A+8,x3,x4); -+ VPSTORE512((A)+8,x3,x4); - - #define VPSTORE1088(A,x1,x2,x3,x4,x5) \ - VPSTORE1024(A,x1,x2,x3,x4);\ -- VPSTORE64(A+16,x5); -+ VPSTORE64((A)+16,x5); - - #define VPSTORE1152(A,x1,x2,x3,x4,x5) \ - VPSTORE1024(A,x1,x2,x3,x4);\ -- VPSTORE128(A+16,x5); -+ VPSTORE128((A)+16,x5); - #endif - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h deleted file mode 100644 index 1292bde..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h +++ /dev/null @@ -1,20 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2n.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2n.h -@@ -92,14 +92,8 @@ - - - /* Function sqr in GF(2^x), then modular reduction */ --#define SQR_THEN_REM_GF2N void \ -- PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn]) --#define SQR_NOCST_THEN_REM_GF2N void \ -- PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn]) --SQR_THEN_REM_GF2N; --SQR_NOCST_THEN_REM_GF2N; -+void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]); -+void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]); - #define sqr_then_rem_gf2n PREFIX_NAME(sqr_then_rem_gf2n) - #define sqr_nocst_then_rem_gf2n PREFIX_NAME(sqr_nocst_then_rem_gf2n) - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h deleted file mode 100644 index ff71b18..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h +++ /dev/null @@ -1,372 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2x.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2x.h -@@ -66,7 +66,7 @@ - SQR64_NO_SIMD_GF2X(C,(A)[0]); - - #define SQR128_NO_SIMD_GF2X(C,A) \ -- SQR64_NO_SIMD_GF2X(C+2,(A)[1]);\ -+ SQR64_NO_SIMD_GF2X((C)+2,(A)[1]);\ - SQR64_NO_SIMD_GF2X(C,(A)[0]); - - #define SQR160_NO_SIMD_GF2X(C,A) \ -@@ -74,15 +74,15 @@ - SQR128_NO_SIMD_GF2X(C,A); - - #define SQR192_NO_SIMD_GF2X(C,A) \ -- SQR64_NO_SIMD_GF2X(C+4,(A)[2]);\ -+ SQR64_NO_SIMD_GF2X((C)+4,(A)[2]);\ - SQR128_NO_SIMD_GF2X(C,A); - - #define SQR224_NO_SIMD_GF2X(C,A) \ -- SQR96_NO_SIMD_GF2X(C+4,A+2);\ -+ SQR96_NO_SIMD_GF2X((C)+4,(A)+2);\ - SQR128_NO_SIMD_GF2X(C,A); - - #define SQR256_NO_SIMD_GF2X(C,A) \ -- SQR128_NO_SIMD_GF2X(C+4,A+2);\ -+ SQR128_NO_SIMD_GF2X((C)+4,(A)+2);\ - SQR128_NO_SIMD_GF2X(C,A); - - #define SQR288_NO_SIMD_GF2X(C,A) \ -@@ -90,31 +90,31 @@ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR320_NO_SIMD_GF2X(C,A) \ -- SQR64_NO_SIMD_GF2X(C+8,(A)[4]);\ -+ SQR64_NO_SIMD_GF2X((C)+8,(A)[4]);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR352_NO_SIMD_GF2X(C,A) \ -- SQR96_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR96_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR384_NO_SIMD_GF2X(C,A) \ -- SQR128_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR128_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR416_NO_SIMD_GF2X(C,A) \ -- SQR160_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR160_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR448_NO_SIMD_GF2X(C,A) \ -- SQR192_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR192_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR480_NO_SIMD_GF2X(C,A) \ -- SQR224_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR224_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR512_NO_SIMD_GF2X(C,A) \ -- SQR256_NO_SIMD_GF2X(C+8,A+4);\ -+ SQR256_NO_SIMD_GF2X((C)+8,(A)+4);\ - SQR256_NO_SIMD_GF2X(C,A); - - #define SQR544_NO_SIMD_GF2X(C,A) \ -@@ -122,7 +122,7 @@ - SQR512_NO_SIMD_GF2X(C,A); - - #define SQR576_NO_SIMD_GF2X(C,A) \ -- SQR64_NO_SIMD_GF2X(C+16,(A)[8]);\ -+ SQR64_NO_SIMD_GF2X((C)+16,(A)[8]);\ - SQR512_NO_SIMD_GF2X(C,A); - - -@@ -177,25 +177,25 @@ - - /* 11 instructions */ - #define PSQR64_LO_GF2X(C1,A1,RA,R1) \ -- R1=PUNPACKLO_8(A1,RA);\ -- C1=PAND_(R1,PSET1_8(0xF));\ -- R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -- C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); -+ (R1)=PUNPACKLO_8(A1,RA);\ -+ (C1)=PAND_(R1,PSET1_8(0xF));\ -+ (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -+ (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); - - /* 11 instructions */ - #define PSQR64_HI_GF2X(C1,A1,RA,R1) \ -- R1=PUNPACKHI_8(A1,RA);\ -- C1=PAND_(R1,PSET1_8(0xF));\ -- R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -- C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); -+ (R1)=PUNPACKHI_8(A1,RA);\ -+ (C1)=PAND_(R1,PSET1_8(0xF));\ -+ (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -+ (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); - - /* 12 instructions */ - #define PSQR64_GF2X(C1,A1,RA,R1) \ - RA=PSRLI_16(A1,4);\ -- R1=PUNPACKLO_8(A1,RA);\ -- C1=PAND_(R1,PSET1_8(0xF));\ -- R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -- C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); -+ (R1)=PUNPACKLO_8(A1,RA);\ -+ (C1)=PAND_(R1,PSET1_8(0xF));\ -+ (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\ -+ (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55)); - - #define PSQR128_GF2X(C1,C2,A1,RA,R1) \ - RA=PSRLI_16(A1,4);\ -@@ -282,53 +282,53 @@ - - - #define PSQR_INIT_SHUFFLE_GF2X(M,T) \ -- M=PSET1_8(0x0F);\ -- T=PSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100); -+ (M)=PSET1_8(0x0F);\ -+ (T)=PSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100); - - /* 6 instructions */ - #define PSQR64_SHUFFLE_V1_GF2X(E0,A128,M,T) \ - {__m128i C0,C1,D0,D1;\ -- C0=PAND_(A128,M);\ -- C1=PAND_(PSRLI_16(A128,4),M);\ -+ (C0)=PAND_(A128,M);\ -+ (C1)=PAND_(PSRLI_16(A128,4),M);\ - \ -- D0=PSHUFFLE_8(T,C0);\ -- D1=PSHUFFLE_8(T,C1);\ -+ (D0)=PSHUFFLE_8(T,C0);\ -+ (D1)=PSHUFFLE_8(T,C1);\ - \ -- E0=PUNPACKLO_8(D0,D1);} -+ (E0)=PUNPACKLO_8(D0,D1);} - - /* 4 instructions, faster than PSQR64_SHUFFLE_V1_GF2X */ - #define PSQR64_SHUFFLE_GF2X(E0,A128,M,T) \ - {__m128i C0,D0;\ -- E0=PSRLI_16(A128,4);\ -- C0=PUNPACKLO_8(A128,E0);\ -- D0=PAND_(C0,M);\ -- E0=PSHUFFLE_8(T,D0);} -+ (E0)=PSRLI_16(A128,4);\ -+ (C0)=PUNPACKLO_8(A128,E0);\ -+ (D0)=PAND_(C0,M);\ -+ (E0)=PSHUFFLE_8(T,D0);} - - /* 7 instructions */ - #define PSQR128_SHUFFLE_V1_GF2X(E0,E1,A128,M,T) \ - {__m128i C0,C1,D0,D1;\ -- E0=PSRLI_16(A128,4);\ -+ (E0)=PSRLI_16(A128,4);\ - \ -- C0=PUNPACKLO_8(A128,E0);\ -- C1=PUNPACKHI_8(A128,E0);\ -+ (C0)=PUNPACKLO_8(A128,E0);\ -+ (C1)=PUNPACKHI_8(A128,E0);\ - \ -- D0=PAND_(C0,M);\ -- D1=PAND_(C1,M);\ -+ (D0)=PAND_(C0,M);\ -+ (D1)=PAND_(C1,M);\ - \ -- E0=PSHUFFLE_8(T,D0);\ -- E1=PSHUFFLE_8(T,D1);} -+ (E0)=PSHUFFLE_8(T,D0);\ -+ (E1)=PSHUFFLE_8(T,D1);} - - /* 7 instructions, faster than PSQR128_SHUFFLE_V1_GF2X */ - #define PSQR128_SHUFFLE_GF2X(E0,E1,A128,M,T) \ - {__m128i C0,C1,D0,D1;\ -- C0=PAND_(A128,M);\ -- C1=PAND_(PSRLI_16(A128,4),M);\ -+ (C0)=PAND_(A128,M);\ -+ (C1)=PAND_(PSRLI_16(A128,4),M);\ - \ -- D0=PSHUFFLE_8(T,C0);\ -- D1=PSHUFFLE_8(T,C1);\ -+ (D0)=PSHUFFLE_8(T,C0);\ -+ (D1)=PSHUFFLE_8(T,C1);\ - \ -- E0=PUNPACKLO_8(D0,D1);\ -- E1=PUNPACKHI_8(D0,D1);} -+ (E0)=PUNPACKLO_8(D0,D1);\ -+ (E1)=PUNPACKHI_8(D0,D1);} - - /* General macros */ - #define PSQR192_SHUFFLE_GF2X(E1,E2,E3,A1,A2,M,T) \ -@@ -403,58 +403,58 @@ - - - #define VPSQR_INIT_SHUFFLE_GF2X(M,T) \ -- M=VPSET1_8(0x0F);\ -- T=VPSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100,\ -+ (M)=VPSET1_8(0x0F);\ -+ (T)=VPSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100,\ - (uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100); - - /* 4 instructions */ - #define VPSQR64_SHUFFLE_GF2X(E0,A256,M,T) \ - {__m256i B1,C0,D0;\ -- B1=VPSRLI_16(A256,4);\ -- C0=VPUNPACKLO_8(A256,B1);\ -- D0=VPAND_(C0,M);\ -- E0=VPSHUFFLE_8(T,D0);} -+ (B1)=VPSRLI_16(A256,4);\ -+ (C0)=VPUNPACKLO_8(A256,B1);\ -+ (D0)=VPAND_(C0,M);\ -+ (E0)=VPSHUFFLE_8(T,D0);} - - /* 5 instructions */ - #define VPSQR128_SHUFFLE_GF2X(E0,A256,M,T) \ - {__m256i B0,B1,C0,D0;\ -- B0=VPPERMUTE4x64(A256,0xD8);\ -- B1=VPSRLI_16(B0,4);\ -- C0=VPUNPACKLO_8(B0,B1);\ -- D0=VPAND_(C0,M);\ -- E0=VPSHUFFLE_8(T,D0);} -+ (B0)=VPPERMUTE4x64(A256,0xD8);\ -+ (B1)=VPSRLI_16(B0,4);\ -+ (C0)=VPUNPACKLO_8(B0,B1);\ -+ (D0)=VPAND_(C0,M);\ -+ (E0)=VPSHUFFLE_8(T,D0);} - - /* unpack after */ - /* 9 instructions */ - #define VPSQR256_SHUFFLE_V1_GF2X(E0,E1,A256,M,T) \ - {__m256i B0,B1,C0,C1,D0,D1;\ -- B0=VPAND_(A256,M);\ -- B1=VPAND_(VPSRLI_16(A256,4),M);\ -+ (B0)=VPAND_(A256,M);\ -+ (B1)=VPAND_(VPSRLI_16(A256,4),M);\ - \ -- C0=VPSHUFFLE_8(T,B0);\ -- C1=VPSHUFFLE_8(T,B1);\ -+ (C0)=VPSHUFFLE_8(T,B0);\ -+ (C1)=VPSHUFFLE_8(T,B1);\ - \ -- D0=VPUNPACKLO_8(C0,C1);\ -- D1=VPUNPACKHI_8(C0,C1);\ -+ (D0)=VPUNPACKLO_8(C0,C1);\ -+ (D1)=VPUNPACKHI_8(C0,C1);\ - \ -- E0=VPPERMUTE2x128(D0,D1,0x20);\ -- E1=VPPERMUTE2x128(D0,D1,0x31);} -+ (E0)=VPPERMUTE2x128(D0,D1,0x20);\ -+ (E1)=VPPERMUTE2x128(D0,D1,0x31);} - - /* unpack before */ - /* 8 instructions, faster than VPSQR256_SHUFFLE_V1_GF2X */ - #define VPSQR256_SHUFFLE_GF2X(E0,E1,A256,M,T) \ - {__m256i B0,B1,C0,C1,D0,D1;\ -- B0=VPPERMUTE4x64(A256,0xD8);\ -- B1=VPSRLI_16(B0,4);\ -+ (B0)=VPPERMUTE4x64(A256,0xD8);\ -+ (B1)=VPSRLI_16(B0,4);\ - \ -- C0=VPUNPACKLO_8(B0,B1);\ -- C1=VPUNPACKHI_8(B0,B1);\ -+ (C0)=VPUNPACKLO_8(B0,B1);\ -+ (C1)=VPUNPACKHI_8(B0,B1);\ - \ -- D0=VPAND_(C0,M);\ -- D1=VPAND_(C1,M);\ -+ (D0)=VPAND_(C0,M);\ -+ (D1)=VPAND_(C1,M);\ - \ -- E0=VPSHUFFLE_8(T,D0);\ -- E1=VPSHUFFLE_8(T,D1);} -+ (E0)=VPSHUFFLE_8(T,D0);\ -+ (E1)=VPSHUFFLE_8(T,D1);} - - #define VPSQR192_SHUFFLE_GF2X VPSQR256_SHUFFLE_GF2X - -@@ -615,74 +615,74 @@ - /* 2 pclmul */ - #define SQR96_PCLMUL_GF2X(C,x,z) \ - SQR64_PCLMUL_GF2X(C,x,z,0);\ -- SQR64LOW_TAB_PCLMUL_GF2X(C+2,x,17); -+ SQR64LOW_TAB_PCLMUL_GF2X((C)+2,x,17); - - #define SQR128_PCLMUL_GF2X(C,x,z) \ - SQR64_PCLMUL_GF2X(C,x,z,0);\ -- SQR64_PCLMUL_GF2X(C+2,x,z,17); -+ SQR64_PCLMUL_GF2X((C)+2,x,z,17); - - /* 3 pclmul */ - #define SQR160_PCLMUL_GF2X(C,x1,x2,z) \ - SQR128_PCLMUL_GF2X(C,x1,z);\ -- SQR64LOW_TAB_PCLMUL_GF2X(C+4,x2,0); -+ SQR64LOW_TAB_PCLMUL_GF2X((C)+4,x2,0); - - #define SQR192_PCLMUL_GF2X(C,x1,x2,z) \ - SQR128_PCLMUL_GF2X(C,x1,z);\ -- SQR64_PCLMUL_GF2X(C+4,x2,z,0); -+ SQR64_PCLMUL_GF2X((C)+4,x2,z,0); - - /* 4 pclmul */ - #define SQR224_PCLMUL_GF2X(C,x1,x2,z) \ - SQR128_PCLMUL_GF2X(C,x1,z);\ -- SQR96_PCLMUL_GF2X(C+4,x2,z); -+ SQR96_PCLMUL_GF2X((C)+4,x2,z); - - #define SQR256_PCLMUL_GF2X(C,x1,x2,z) \ - SQR128_PCLMUL_GF2X(C,x1,z);\ -- SQR128_PCLMUL_GF2X(C+4,x2,z); -+ SQR128_PCLMUL_GF2X((C)+4,x2,z); - - /* 5 pclmul */ - #define SQR288_PCLMUL_GF2X(C,x1,x2,x3,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR64LOW_TAB_PCLMUL_GF2X(C+8,x3,0); -+ SQR64LOW_TAB_PCLMUL_GF2X((C)+8,x3,0); - - #define SQR320_PCLMUL_GF2X(C,x1,x2,x3,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR64_PCLMUL_GF2X(C+8,x3,z,0); -+ SQR64_PCLMUL_GF2X((C)+8,x3,z,0); - - /* 6 pclmul */ - #define SQR352_PCLMUL_GF2X(C,x1,x2,x3,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR96_PCLMUL_GF2X(C+8,x3,z); -+ SQR96_PCLMUL_GF2X((C)+8,x3,z); - - #define SQR384_PCLMUL_GF2X(C,x1,x2,x3,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR128_PCLMUL_GF2X(C+8,x3,z); -+ SQR128_PCLMUL_GF2X((C)+8,x3,z); - - /* 7 pclmul */ - #define SQR416_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR160_PCLMUL_GF2X(C+8,x3,x4,z); -+ SQR160_PCLMUL_GF2X((C)+8,x3,x4,z); - - #define SQR448_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR192_PCLMUL_GF2X(C+8,x3,x4,z); -+ SQR192_PCLMUL_GF2X((C)+8,x3,x4,z); - - /* 8 pclmul */ - #define SQR480_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR224_PCLMUL_GF2X(C+8,x3,x4,z); -+ SQR224_PCLMUL_GF2X((C)+8,x3,x4,z); - - #define SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \ - SQR256_PCLMUL_GF2X(C,x1,x2,z);\ -- SQR256_PCLMUL_GF2X(C+8,x3,x4,z); -+ SQR256_PCLMUL_GF2X((C)+8,x3,x4,z); - - /* 9 pclmul */ - #define SQR544_PCLMUL_GF2X(C,x1,x2,x3,x4,x5,z) \ - SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z);\ -- SQR64LOW_TAB_PCLMUL_GF2X(C+16,x5,0); -+ SQR64LOW_TAB_PCLMUL_GF2X((C)+16,x5,0); - - #define SQR576_PCLMUL_GF2X(C,x1,x2,x3,x4,x5,z) \ - SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z);\ -- SQR64_PCLMUL_GF2X(C+16,x5,z,0); -+ SQR64_PCLMUL_GF2X((C)+16,x5,z,0); - - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h deleted file mode 100644 index 9a9fd27..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h +++ /dev/null @@ -1,42 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2m.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2m.h -@@ -25,12 +25,13 @@ - /* The number of word that an element of GF(2^m) needs */ - #if (HFEmr) - #define NB_WORD_GF2m_TMP (HFEmq+1) -+ /* Mask to truncate the last word */ -+ #define MASK_GF2m ((UINT_1<<(HFEmr))-UINT_1) - #else - #define NB_WORD_GF2m_TMP HFEmq -+ #define MASK_GF2m UINT_M1 - #endif - --/* Mask to truncate the last word */ --#define MASK_GF2m maskUINT(HFEmr) - - #define HFEmq8 (HFEm>>3) - #define HFEmr8 (HFEm&7U) -@@ -75,19 +76,18 @@ - - #define isEqual_gf2m(a,b) f_ISEQUAL(a,b,NB_WORD_GF2m) - -+#define set0_gf2m(c) SET0((unsigned char *)(c),8*NB_WORD_GF2m) -+#define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1((unsigned char *)(res),(unsigned char *)(a),b,8*NB_WORD_GF2m) -+ - #if (NB_WORD_GF2m<7) - #define add_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_GF2X) - #define add2_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_2_GF2X) - #define copy_gf2m CONCAT_NB_WORD_GF2m_SUP(COPY) -- #define set0_gf2m CONCAT_NB_WORD_GF2m_SUP(SET0_) -- #define xorLoadMask1_gf2m CONCAT_NB_WORD_GF2m_SUP(XORLOADMASK1_) - #define dotProduct_gf2_m CONCAT_NB_WORD_GF2m_SUP(DOTPRODUCT) - #else - #define add_gf2m(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2m); - #define add2_gf2m(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2m); - #define copy_gf2m(c,a) COPY(c,a,NB_WORD_GF2m) -- #define set0_gf2m(c) SET0(c,NB_WORD_GF2m) -- #define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2m) - #define dotProduct_gf2_m(res,a,b) DOTPRODUCT(res,a,b,NB_WORD_GF2m) - #endif - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h deleted file mode 100644 index 2103631..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h +++ /dev/null @@ -1,52 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2n.h -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2n.h -@@ -52,13 +52,13 @@ - /* The number of word that an element of GF(2^n) needs */ - #if (HFEnr) - #define NB_WORD_GFqn_TMP (HFEnq+1) -+ /* Mask for arithmetic in GF(2^n) */ -+ #define MASK_GF2n ((UINT_1<<(HFEnr))-UINT_1) - #else - #define NB_WORD_GFqn_TMP HFEnq -+ #define MASK_GF2n UINT_M1 - #endif - --/* Mask for arithmetic in GF(2^n) */ --#define MASK_GF2n maskUINT(HFEnr) -- - #define HFEnr8 (HFEn&7) - #define MASK8_GF2n ((1U<>3) - #define HFEnvr8 (HFEnv&7) - #define MASK8_GF2nv ((1U<>3)+((HFEvr8)?1:0)) - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c deleted file mode 100644 index b74cbfa..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c +++ /dev/null @@ -1,89 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c -@@ -26,16 +26,14 @@ - */ - int PREFIX_NAME(changeVariablesMQS_simd_gf2)(mqsnv_gf2n MQS, cst_GLnv_gf2 S) - { -- UINT tmp[NB_WORD_GFqn]; -- mqsnv_gf2n MQS2, MQS2_cp; -+ UINT tmp[NB_WORD_GFqn]={0}; -+ /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */ -+ UINT MQS2[HFEnv*HFEnv*NB_WORD_GFqn]={0}; -+ UINT *MQS2_cp; - cst_mqsnv_gf2n MQS_cpi,MQS_cpj; - cst_GLnv_gf2 S_cpi,S_cpj; - unsigned int i,j; - -- /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */ -- MQS2=(UINT*)malloc(HFEnv*HFEnv*NB_WORD_GFqn*sizeof(UINT)); -- VERIFY_ALLOC_RET(MQS2); -- - /* To avoid the constant of MQS */ - MQS+=NB_WORD_GFqn; - -@@ -129,8 +127,6 @@ - S_cpj+=NB_WORD_GF2nv; - } - -- -- free(MQS2); - return 0; - } - -@@ -142,7 +138,7 @@ - - /* Compute a dot product with one word of S */ - #define LOOPKR(START,NB_IT) \ -- for(kr=START;kr -+ hash[i*SIZE_DIGEST_UINT+j])) - { -- j=0; -- while((j -- hash[i*SIZE_DIGEST_UINT+j])) -- { -- ind=i; -- } -+ ind=i; - } -+ } - -- /* We choose the corresponding root */ -- copy_gf2n(root,roots+ind*NB_WORD_GFqn); -- -- free(hash); -- #else -+ /* We choose the corresponding root */ -+ copy_gf2n(root,roots+ind*NB_WORD_GFqn); - -- /* Sort the roots */ -- sort_gf2n(roots,l); -+ free(hash); -+ #else - -- #if FIRST_ROOT -- /* Choose the first root */ -- copy_gf2n(root,roots); -- #elif DETERMINIST_ROOT -- /* Choose a root with a determinist hash */ -- HASH((unsigned char*)hash, -- (unsigned char*)U,NB_BYTES_GFqn); -- copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn); -- #endif -+ /* Sort the roots */ -+ sort_gf2n(roots,l); -+ -+ #if FIRST_ROOT -+ /* Choose the first root */ -+ copy_gf2n(root,roots); -+ #elif DETERMINIST_ROOT -+ /* Choose a root with a determinist hash */ -+ HASH((unsigned char*)hash, -+ (unsigned char*)U,NB_BYTES_GFqn); -+ copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn); - #endif -- } -- free(roots); -- return l; -+ #endif - } -+ return l; - #endif - } - #endif - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c deleted file mode 100644 index 18970d1..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c +++ /dev/null @@ -1,40 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/convMQS_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/convMQS_gf2.c -@@ -108,8 +108,7 @@ - unsigned int j; - - #if HFEmr8 -- uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION -- *sizeof(uint8_t)); -+ uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0}; - - convMQS_one_to_last_mr8_equations_gf2(pk_U,pk); - for(j=0;j>(j*HFENr8c)))<=LOST_BITS;--j,++k) -+ for(j=HFEnv-1;j>=(int)LOST_BITS;--j,++k) - { - pk2[k>>3]^=((pk[nb_bits>>3]>>(nb_bits&7))&ONE8)<<(k&7); - nb_bits+=j; -@@ -135,10 +135,15 @@ - */ - UINT PREFIX_NAME(convMQ_last_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk) - { -- const uint64_t *pk64; - unsigned int iq,ir,k,nb_bits; -+ uint64_t t1, t2; -+ const uint8_t *pk64 = pk; -+ #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) -+ const uint8_t *pk_end; -+ uint64_t end; -+ unsigned int l; -+ #endif - -- pk64=(uint64_t*)pk; - - nb_bits=1; - /* For each row */ -@@ -150,29 +155,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63))); - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } -- pk64+=iq+1; -+ pk64+=8*(iq+1); - pk2+=iq+1; - nb_bits+=(iq+1)<<6; - } -@@ -205,29 +218,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>3)&7) -- uint8_t *pk_end; -- uint64_t end; -- unsigned int l; -- #endif -- - #if LAST_ROW_R -- ir=LAST_ROW_R; - if(nb_bits&63) - { - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) -@@ -257,15 +268,18 @@ - - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - - #if (NB_WHOLE_BLOCKS>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -274,12 +288,13 @@ - pk2[k]^=end<<(64-(nb_bits&63)); - pk2[k+1]=end>>(nb_bits&63); - #else -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - -- if(((nb_bits&63)+ir)>64) -+ if(((nb_bits&63)+LAST_ROW_R)>64) - { - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -290,14 +305,18 @@ - #else - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -- if(((nb_bits&63)+ir)>64) -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); -+ if(((nb_bits&63)+LAST_ROW_R)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - #endif - } else -@@ -305,11 +324,12 @@ - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) - for(k=0;k>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -318,7 +338,8 @@ - #else - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - #endif - } -@@ -328,13 +349,16 @@ - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) - for(k=0;k<(iq-1);++k) - { -- pk2[k]=(pk64[k]>>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -343,15 +367,18 @@ - #else - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - #endif - } else - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63))); - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } -- pk64+=iq+1; -+ pk64+=8*(iq+1); - pk2+=iq+1; - nb_bits+=(iq+1)<<6; - } -@@ -450,29 +486,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>ir)&1);\ -+ pivot=1+~((((*S_cpj)>>ir)&1));\ - LOOPK; - - -@@ -44,7 +44,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=1;\ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\ - \ - /* Here, the pivot is 1 if S is invertible */\ -@@ -158,7 +163,7 @@ - algorithm. */\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\ - \ - /* Next row */\ -@@ -214,7 +219,7 @@ - det_i&=(*S_cpi)>>ir; - #endif - -- return det_i; -+ return (gf2) det_i; - } - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c deleted file mode 100644 index 00c7462..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c +++ /dev/null @@ -1,66 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/determinantnv_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/determinantnv_gf2.c -@@ -26,7 +26,7 @@ - - #define ADDROW(LOOPK) \ - /* pivot */\ -- pivot=-(((*S_cpj)>>ir)&1);\ -+ pivot=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK; - - -@@ -44,7 +44,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=1;\ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\ - \ - /* Here, the pivot is 1 if S is invertible */\ -@@ -158,7 +163,7 @@ - algorithm. */\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\ - \ - /* Next row */\ -@@ -214,7 +219,7 @@ - det_i&=(*S_cpi)>>ir; - #endif - -- return det_i; -+ return (gf2) det_i; - } - - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c deleted file mode 100644 index e52f791..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c +++ /dev/null @@ -1,62 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/div_gf2nx.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/div_gf2nx.c -@@ -120,8 +120,11 @@ - } - - leading_coef=A+da*NB_WORD_GFqn; -- i=(db<<1)-da; -- i=MAXI(0,(int)i); -+ i=0; -+ if(2*db > da) -+ { -+ i = 2*db - da; -+ } - res=A+(da-db+i)*NB_WORD_GFqn; - - for(;i>=1;\ - }\ - }\ -- for(;jr>=1;\ - } - -- -- - #if (LEN_UNROLLED_64==1) - #define LOOPJR_UNROLLED_64 LOOPJR_NOCST_64 - #else - - #define LOOPJR_UNROLLED_64(START,NB_IT) \ -- for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\ -+ for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\ - {\ - for(h=0;h>=1;\ - }\ - }\ -- for(;jr>=1;\ -@@ -172,7 +150,7 @@ - #endif - - /* Constant cst_pk */ -- COPY_64bits_variables(c,(const UINT*)pk); -+ LOAD_UINT_ARRAY(c, pk, NB_WORD_EQ) - pk+=NB_BYTES_EQ; - - /* for each row of the quadratic matrix of pk, excepted the last block */ -@@ -186,7 +164,7 @@ - /* for each column of the quadratic matrix of pk */ - - /* xj=xi=1 */ -- XOR_ELEM(c,(const UINT*)pk); -+ XOR_ELEM(c,pk); - pk+=NB_BYTES_EQ; - - xj=xi>>1; -@@ -222,7 +200,7 @@ - /* for each column of the quadratic matrix of pk */ - - /* xj=xi=1 */ -- XOR_ELEM(c,(const UINT*)pk); -+ XOR_ELEM(c,pk); - pk+=NB_BYTES_EQ; - - xj=xi>>1; - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c deleted file mode 100644 index d70638a..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c +++ /dev/null @@ -1,68 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c -@@ -109,18 +109,6 @@ - /**************************************************************************/ - - --#if NB_VARr -- #define REM_X \ -- xi=m[i];\ -- for(j=0;j>j)&UINT_1);\ -- } --#else -- #define REM_X --#endif -- -- - - - /* Input: -@@ -139,16 +127,18 @@ - unsigned int i,j,k; - - /* Compute one time all -((xi>>1)&UINT_1) */ -+ i=0; - k=0; -- for(i=0;i>j)&UINT_1); -+ x[k]=(1+~((xi>>j)&UINT_1)); -+ ++k; - } -+ ++i; - } -- REM_X; - - /* Constant cst_pk */ - COPY_64bits_variables(c,pk); -@@ -187,16 +177,18 @@ - unsigned int i,j,k; - - /* Compute one time all -((xi>>1)&UINT_1) */ -+ i=0; - k=0; -- for(i=0;i>j)&UINT_1); -+ x[k]=(1+~((xi>>j)&UINT_1)); -+ ++k; - } -+ ++i; - } -- REM_X; - - /* Constant cst_pk */ - COPY_64bits_variables(c,pk); - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c deleted file mode 100644 index ef785f0..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c +++ /dev/null @@ -1,74 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c -@@ -41,7 +41,15 @@ - i=1; - /* (2^i) < deg does not require modular reduction by f */ - #if(HFEn<33) -- const unsigned int min=(deg<(1U< HFE_odd_degree) j=1; -+ #endif -+ /* Here a_vec = row i */ - a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn; - for(;j HFE_odd_degree) j=1; - #endif -+ /* Here a_vec = row i */ -+ a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn; -+ #if HFEDegJ -+ for(; j=HFEDegI)) -- for(j=0;j=HFEDegI)) -+ for(j=0;j2) - /* Vector with linear terms of F */ -- UINT* F_lin; -- -- F_lin=(UINT*)calloc((HFEDegI+1)*(HFEv+1)*NB_WORD_GFqn,sizeof(UINT)); -- VERIFY_ALLOC_RET(F_lin); -+ UINT F_lin[(HFEDegI+1)*(HFEv+1)*NB_WORD_GFqn]={0}; - - F_cp=F+MQv_GFqn_SIZE; - -@@ -828,13 +799,10 @@ - #if PRECOMPUTED_CBASIS - static cst_vec_gf2n alpha_vec=cbasis_v; - #else -- vec_gf2n alpha_vec; -- - /* Matrix in GF(2^n) with HFEn-1 rows and (HFEDegI+1) columns */ - /* calloc is useful when it initialises a multiple precision element - to 1 */ -- alpha_vec=(UINT*)calloc(SIZE_ROW*(HFEn-1)*NB_WORD_GFqn,sizeof(UINT)); -- VERIFY_ALLOC_RET(alpha_vec); -+ UINT alpha_vec[SIZE_ROW*(HFEn-1)*NB_WORD_GFqn]={0}; - - genCanonicalBasisVertical_gf2n(alpha_vec); - #endif -@@ -848,10 +816,7 @@ - - /* Precompute an other table */ - #if(HFEDeg>2) -- UINT* buf; -- buf=(UINT*)calloc(HFEDegI*HFEn*NB_WORD_GFqn,sizeof(UINT)); -- VERIFY_ALLOC_RET(buf); -- -+ UINT buf[HFEDegI*HFEn*NB_WORD_GFqn]={0}; - special_buffer(buf,F,alpha_vec); - #endif - -@@ -1045,12 +1010,6 @@ - /* k becomes k+1 */ - a_vec_k+=SIZE_ROW*NB_WORD_GFqn; - } -- free(buf); -- free(F_lin); -- #endif -- -- #if (!PRECOMPUTED_CBASIS) -- free(alpha_vec); - #endif - - /* MQS with v vinegar variables */ - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c deleted file mode 100644 index c5078f8..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c +++ /dev/null @@ -1,140 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c -@@ -1,59 +1,93 @@ - #include "initMatrixId_gf2.h" - -- --#define INITMATID_GF2(NAME,MATRIX_SIZE,nq,nr,NB_WORD_n) \ --void NAME(Mn_gf2 S)\ --{\ -- UINT bit_ir;\ -- unsigned int iq,ir;\ --\ -- /* Initialize to 0 */\ -- for(iq=0;iq>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK;\ - LOOPKINV; - -@@ -49,7 +49,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=UINT_1;\ -- for(ir=0;ir>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\ - }\ - \ -@@ -133,7 +133,6 @@ - LOOPIR(HFEnr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask); - - /* Step 2 */ -- bit_ir=UINT_1<<(HFEnr-1); - LOOPIR_DOWN_TO_UP(HFEnr); - #else - /* To begin to last row */ -@@ -178,16 +177,16 @@ - } - - #define LOOPIR_CST(NB_IT) \ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\ - xorLoadMask1_gf2n(Sinv_cpi,Sinv_cpj,mask);\ - });\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\ - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\ - });\ -@@ -314,7 +313,7 @@ - Sinv_cpj+=NB_WORD_GFqn; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -343,7 +342,7 @@ - Sinv_cpj+=NB_WORD_GFqn; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -381,7 +380,7 @@ - for(j=0;j>6])>>(j&63U))&1U); -+ mask=(1+~(((U[j>>6])>>(j&63U))&1U)); - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask); - - /* next row */ - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c deleted file mode 100644 index 5fe3bf2..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c +++ /dev/null @@ -1,94 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c -@@ -28,7 +28,7 @@ - - #define ADDROW(LOOPK,LOOPKINV) \ - /* pivot */\ -- mask=-(((*S_cpj)>>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK;\ - LOOPKINV; - -@@ -49,7 +49,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=UINT_1;\ -- for(ir=0;ir>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\ - }\ - \ -@@ -133,7 +133,6 @@ - LOOPIR(HFEnvr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask); - - /* Step 2 */ -- bit_ir=UINT_1<<(HFEnvr-1); - LOOPIR_DOWN_TO_UP(HFEnvr); - #else - /* To begin to last row */ -@@ -178,16 +177,16 @@ - } - - #define LOOPIR_CST(NB_IT) \ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\ - xorLoadMask1_gf2nv(Sinv_cpi,Sinv_cpj,mask);\ - });\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\ - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\ - });\ -@@ -315,7 +314,7 @@ - Sinv_cpj+=NB_WORD_GF2nv; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -344,7 +343,7 @@ - Sinv_cpj+=NB_WORD_GF2nv; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -382,7 +381,7 @@ - for(j=0;j>6])>>(j&63U))&1U); -+ mask=(1+~(((U[j>>6])>>(j&63U))&1U)); - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask); - - /* next row */ - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c deleted file mode 100644 index bac490f..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c +++ /dev/null @@ -1,28 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/inv_gf2n.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/inv_gf2n.c -@@ -3,7 +3,6 @@ - #include "add_gf2n.h" - #include "mul_gf2n.h" - #include "sqr_gf2n.h" --#include "rem_gf2x.h" - #include "rem_gf2n.h" - #include "ITMIA_addchains.h" - -@@ -222,7 +221,7 @@ - /* res=res * res^(2^((HFEn-1)>>i)) */\ - /* res^(2^((HFEn-1)>>i)) */\ - PINIT_GF2N_(r_128_copy,r2_128_copy,r3_128_copy,r4_128_copy,r5_128_copy,r);\ -- for(j=0;j>(i<<3); -+ pk[i]=(uint8_t)(pk_last[NB_WORD_GF2m-1]>>(i<<3)); - } - #endif - } - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c deleted file mode 100644 index be95968..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c +++ /dev/null @@ -1,213 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/mul_gf2n.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/mul_gf2n.c -@@ -28,7 +28,7 @@ - for(i=0;i>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A< -+#include - #include - - #include "sign_keypairHFE.h" -@@ -15,11 +14,7 @@ - * @param[out] sk The secret-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP --int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) --#else --int PREFIX_NAME(crypto_sign_keypair)(unsigned char *pk, unsigned char *sk) --#endif -+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) - { - return sign_keypairHFE(pk,(UINT*)sk); - } -@@ -34,18 +29,14 @@ - * @param[in] sk The secret-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP - int crypto_sign( --#else --int PREFIX_NAME(crypto_sign)( --#endif -- unsigned char *sm, unsigned long long *smlen, -- const unsigned char *m, unsigned long long mlen, -- const unsigned char *sk) -+ uint8_t *sm, size_t *smlen, -+ const uint8_t *m, size_t mlen, -+ const uint8_t *sk) - { - *smlen=mlen+CRYPTO_BYTES; -- memcpy(sm+CRYPTO_BYTES,m,(size_t)mlen); -- return signHFE(sm,m,(size_t)mlen,(UINT*)sk); -+ memmove(sm+CRYPTO_BYTES,m,mlen); -+ return signHFE(sm,m,mlen,(UINT*)sk); - } - - -@@ -58,21 +49,35 @@ - * @param[in] pk The public-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP - int crypto_sign_open( --#else --int PREFIX_NAME(crypto_sign_open)( --#endif -- unsigned char *m, unsigned long long *mlen, -- const unsigned char *sm, unsigned long long smlen, -- const unsigned char *pk) -+ uint8_t *m, size_t *mlen, -+ const uint8_t *sm, size_t smlen, -+ const uint8_t *pk) - { - int result; - *mlen=smlen-CRYPTO_BYTES; - result=sign_openHFE(sm+CRYPTO_BYTES,(size_t)(*mlen),sm,pk); - /* For compatibily with SUPERCOP, the memcpy is done only after sign_open */ -- memcpy(m,sm+CRYPTO_BYTES,(size_t)(*mlen)); -+ memmove(m,sm+CRYPTO_BYTES,(size_t)(*mlen)); - return result; - } - - -+int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) -+{ -+ int result; -+ *siglen = CRYPTO_BYTES; -+ result = signHFE(sig,m,mlen,(UINT*)sk); -+ return result; -+} -+ -+int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) -+{ -+ int result; -+ if (siglen == CRYPTO_BYTES) { -+ result = sign_openHFE(m,mlen,sig,pk); -+ } else { -+ result = -1; -+ } -+ return result; -+} - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c deleted file mode 100644 index 6b3611c..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c +++ /dev/null @@ -1,220 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/signHFE.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/signHFE.c -@@ -83,8 +83,10 @@ - for(k1=1;k1sk_uncomp,UINT*,NB_UINT_HFEVPOLY -- +(LTRIANGULAR_NV_SIZE<<1) -- +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t -- +MATRIXnv_SIZE+MATRIXn_SIZE,sizeof(UINT)); - expandSeed((uint8_t*)(sk_HFE->sk_uncomp),(NB_UINT_HFEVPOLY - +(LTRIANGULAR_NV_SIZE<<1) - +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3, -@@ -313,7 +311,8 @@ - { - #if HFEv - cst_sparse_monic_gf2nx F_HFEv; -- UINT* F; -+ sparse_monic_gf2nx F; -+ sparse_monic_gf2nx F_cp; - unsigned int i; - #endif - -@@ -333,13 +332,10 @@ - #endif - - #if HFEv -+ F=sk_HFE->F_struct.poly; - F_HFEv=sk_HFE->F_HFEv; - -- ALIGNED_GFqn_MALLOC(F,UINT*,NB_UINT_HFEPOLY,sizeof(UINT)); -- VERIFY_ALLOC_RET(F); -- - #if (HFEDeg>1) -- UINT *F_cp; - unsigned int j; - - /* X^(2^0) */ -@@ -351,11 +347,11 @@ - { - /* Copy i quadratic terms */ - -+ j=0; - #if ENABLED_REMOVE_ODD_DEGREE -- for(j=(((1U<HFE_odd_degree) ++j; - #endif -+ for(;jHFE_odd_degree) ++j; - #endif -+ for(;jF_struct.poly=F; - #else - sk_HFE->F_struct.poly=sk_HFE->F_HFEv; - #endif -@@ -447,7 +442,7 @@ - #endif - - int nb_root; -- secret_key_HFE sk_HFE; -+ secret_key_HFE sk_HFE={0}; - - #if(HFEv) - UINT* F; -@@ -666,9 +661,6 @@ - if(nb_root<0) - { - /* Error from chooseRootHFE */ -- #if HFEv -- ALIGNED_GFqn_FREE(F); -- #endif - return nb_root; - } - -@@ -677,7 +669,7 @@ - /* Add the v bits to DR */ - #if HFEnr - DR[NB_WORD_GFqn-1]^=V[0]<>(64-HFEnr))^(V[i+1]<>(64-HFEnr); - #endif - #else -- for(i=0;i>(64-HFEnr))^(V[i+1]<>(64-HFEnr); - #endif - #else -- for(i=0;i1)) - uint64_t val; -@@ -578,10 +576,6 @@ - #endif - - -- #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP&&HFEmr8) -- free(pk_tmp); -- #endif -- - return ret; - } - - diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c deleted file mode 100644 index 7337b7c..0000000 --- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c +++ /dev/null @@ -1,37 +0,0 @@ ---- upstream/Optimized_Implementation/sign/GeMSS128/src/sort_gf2n.c -+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sort_gf2n.c -@@ -111,7 +111,7 @@ - - /* The power of 2 before l, which is 1<>=1;\ -@@ -22,11 +22,11 @@ - - /* for a block of bits of vec */ - #define LOOPIR_N(NB_IT) \ -- for(ir=0;ir>6;iq>ir;\ -- LOOPIR_START(NB_BITS_UINT);\ -- ir=0;\ -- }\ -- /* the last block */\ -- REM_START(LOOPIR_START);\ -+void PREFIX_NAME(vecMatProductv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) { -+ cst_Mn_gf2 S_cp; -+ UINT bit_ir, vec_ir; -+ unsigned int iq,ir; -+ -+ /* initialization of res to 0 */ -+ set0_gf2n(res); -+ -+ S_cp=S; -+ /* for each bit of vec excepted the last block */ -+ iq = 0; -+ ir = 0; -+ while(NB_BITS_UINT*iq + ir < HFEv) -+ { -+ bit_ir = vec[iq]; -+ for(ir=0;(NB_BITS_UINT*iq + ir < HFEv) && (ir>=1; -+ } -+ ++iq; -+ } -+} -+ -+void PREFIX_NAME(vecMatProductn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) { -+ cst_Mn_gf2 S_cp; -+ UINT bit_ir, vec_ir; -+ unsigned int iq,ir; -+ -+ /* initialization of res to 0 */ -+ set0_gf2n(res); -+ -+ S_cp=S; -+ /* for each bit of vec excepted the last block */ -+ for(iq=0;iq>6;iq>ir; -+ LOOPIR_START_N(NB_BITS_UINT); -+ ir=0; -+ } -+ /* the last block */ -+ REM_START_NV(LOOPIR_START_N); - } - - --VECMATPROD_START(PREFIX_NAME(vecMatProductnvn_start_64),set0_gf2n, -- LOOPIR_START_N,REM_START_NV,HFEnvq) --/* --VECMATPROD_START(PREFIX_NAME(vecMatProductn_start_64),set0_gf2n, -- LOOPIR_START_N,REM_START_N,HFEnq) --*/ - - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h deleted file mode 100644 index 64621a5..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h +++ /dev/null @@ -1,30 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/add_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/add_gf2nx.h -@@ -14,7 +14,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define add2_gf2nx(res,A,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]^=(A)[i];\ - } -@@ -30,7 +30,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define copy_gf2nx(res,A,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]=(A)[i];\ - } -@@ -45,7 +45,7 @@ - * @remark Constant-time implementation when len is not secret. - */ - #define set0_gf2nx(res,len,i) \ -- for(i=0;i<((len)*NB_WORD_GFqn);++i)\ -+ for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\ - {\ - (res)[i]=0;\ - } - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h deleted file mode 100644 index 915c6ad..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h +++ /dev/null @@ -1,131 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/arch.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/arch.h -@@ -26,18 +26,6 @@ - #define ZERO8 ((uint8_t)0) - #define ONE8 ((uint8_t)1) - --/* 0x... */ --#define PRINT_X64(a) printf("0x%"PRIx64,a); --#define PRINT_X32(a) printf("0x%"PRIx32,a); --#define PRINT_X16(a) printf("0x%"PRIx16,a); --#define PRINT_X8(a) printf("0x%"PRIx8,a); --/* ... */ --#define PRINT_U64(a) printf("%"PRIx64,a); --#define PRINT_U32(a) printf("%"PRIx32,a); --#define PRINT_U16(a) printf("%"PRIx16,a); --#define PRINT_U8(a) printf("%"PRIx8,a); -- -- - /****************** Definition of an UINT ******************/ - - /* XXX For the moment, this parameter cannot be modified. XXX */ -@@ -81,9 +69,6 @@ - /** Print an UINT. */ - #define PRINT_UINT(a) CONCAT(PRINT_X,NB_BITS_UINT)(a); - -- -- -- - /** A reserved variable to do a for loop on a buffer of UINT. */ - #define RESERVED_VARIABLE reserved_variable - -@@ -103,6 +88,35 @@ - } \ - } - -+/** Load a UINT from unsigned char * **/ -+ -+#define LOAD_UINT(a, p) \ -+ (a) = (p)[7]; (a) <<= 8;\ -+ (a) |= (p)[6]; (a) <<= 8;\ -+ (a) |= (p)[5]; (a) <<= 8;\ -+ (a) |= (p)[4]; (a) <<= 8;\ -+ (a) |= (p)[3]; (a) <<= 8;\ -+ (a) |= (p)[2]; (a) <<= 8;\ -+ (a) |= (p)[1]; (a) <<= 8;\ -+ (a) |= (p)[0]; -+ -+#define LOAD_UINT_ARRAY(a, p, N) \ -+ FOR_LOOP(LOAD_UINT((a)[RESERVED_VARIABLE], &(p)[8*RESERVED_VARIABLE]), (N)) -+ -+/** Store a UINT to an unsigned char * **/ -+#define STORE_UINT(p, a) \ -+ (p)[0] = ((a) >> 0x00) & 0xff; \ -+ (p)[1] = ((a) >> 0x08) & 0xff; \ -+ (p)[2] = ((a) >> 0x10) & 0xff; \ -+ (p)[3] = ((a) >> 0x18) & 0xff; \ -+ (p)[4] = ((a) >> 0x20) & 0xff; \ -+ (p)[5] = ((a) >> 0x28) & 0xff; \ -+ (p)[6] = ((a) >> 0x30) & 0xff; \ -+ (p)[7] = ((a) >> 0x38) & 0xff; -+ -+#define STORE_UINT_ARRAY(a, p, N) \ -+ FOR_LOOP(STORE_UINT(&(p)[8*RESERVED_VARIABLE], (a)[RESERVED_VARIABLE]), (N)) -+ - - - /****************** C++ compatibility ******************/ -@@ -226,7 +240,7 @@ - /* Only when set to 1, the installation of gf2x is required */ - /* Be careful because this library can be in variable-time and so vulnerable - to the timing attacks. */ --#define ENABLED_GF2X 1 -+#define ENABLED_GF2X 0 - - - /********************* Allocation *********************/ -@@ -237,7 +251,7 @@ - /** Verify if the allocation by malloc or calloc succeeds. - * Exit in the failure case. */ - #define VERIFY_ALLOC(p) \ -- if(!p) \ -+ if(!(p)) \ - {\ - exit(ERROR_ALLOC);\ - } -@@ -245,7 +259,7 @@ - /** Verify if the allocation by malloc or calloc succeeds. - * Return ERROR_ALLOC in the failure case. */ - #define VERIFY_ALLOC_RET(p) \ -- if(!p) \ -+ if(!(p)) \ - {\ - return(ERROR_ALLOC);\ - } -@@ -272,7 +286,7 @@ - p=(type)_mm_malloc((nmemb)*(size),16); - #else - #define ALIGNED16_MALLOC(p,type,nmemb,size) \ -- if(posix_memalign((void**)(&p),16,(nmemb)*(size)))\ -+ if(posix_memalign((void**)(&(p)),16,(nmemb)*(size)))\ - {\ - exit(1);\ - } -@@ -280,7 +294,7 @@ - - #define ALIGNED16_CALLOC(p,type,nmemb,size) \ - ALIGNED16_MALLOC(p,type,nmemb,size);\ -- memset((void*)p,0,(nmemb)*(size)) -+ memset((void*)(p),0,(nmemb)*(size)) - - - /** Align the data on 32 bytes, useful for avx. */ -@@ -291,7 +305,7 @@ - p=(type)_mm_malloc((nmemb)*(size),32); - #else - #define ALIGNED32_MALLOC(p,type,nmemb,size) \ -- if(posix_memalign((void**)(&p),32,(nmemb)*(size)))\ -+ if(posix_memalign((void**)(&(p)),32,(nmemb)*(size)))\ - {\ - exit(1);\ - } -@@ -299,7 +313,7 @@ - - #define ALIGNED32_CALLOC(p,type,nmemb,size) \ - ALIGNED32_MALLOC(p,type,nmemb,size);\ -- memset((void*)p,0,(nmemb)*(size)); -+ memset((void*)(p),0,(nmemb)*(size)); - - - #define NO_ALIGNED_MALLOC(p,type,nmemb,size) p=(type)malloc((nmemb)*(size)); - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h deleted file mode 100644 index ad90575..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h +++ /dev/null @@ -1,322 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/bit.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/bit.h -@@ -8,18 +8,8 @@ - - /* Tools for the bits manipulation */ - -- --/* (2^k) - 1, k<64, and -1 for k=0 */ --#define mask64(k) ((k)?(ONE64<<(k))-ONE64:MONE64) -- --/* (2^k) - 1, k<32, and -1 for k=0 */ --#define mask32(k) ((k)?(ONE32<<(k))-ONE32:MONE32) -- --#define maskUINT(k) ((k)?(UINT_1<<(k))-UINT_1:UINT_M1) -- -- - /** The i-th bit of the UINT val. */ --#define ITHBIT(val,i) ((val>>i)&UINT_1) -+#define ITHBIT(val,i) (((val)>>(i))&UINT_1) - - - /** Compute the MSB position of one UINT. */ -@@ -27,14 +17,14 @@ - Output: res the MSB position of U. If U is zero, res=0 - */ - #define MSB_SP(res,U,j) \ -- res=0;\ -+ (res)=0;\ - /* Search the MSB position of one word */\ -- for(j=NB_BITS_UINT>>1;j!=0;j>>=1) \ -+ for((j)=NB_BITS_UINT>>1;(j)!=0;(j)>>=1) \ - {\ -- if((U)>>(res^j))\ -+ if((U)>>((res)^(j)))\ - {\ - /* To remember the choice of the high part */\ -- res^=j;\ -+ (res)^=(j);\ - }\ - } - -@@ -43,15 +33,15 @@ - Output: res the MSB position of U. If U is zero, res=0 - */ - #define MSB_MP(res,U,i,j,nb_word) \ -- i=nb_word-1;\ -+ (i)=(nb_word)-1;\ - /* Search the first word different from zero */\ -- while(i&&(!U[i])) \ -+ while((i)&&(!(U)[i])) \ - {\ -- --i;\ -+ --(i);\ - }\ - /* Search the MSB of one word */\ -- MSB_SP(res,U[i],j);\ -- res^=i<>=32U;\ -- n+=((uint64_t)0xFFFFFFFF);\ -- n>>=32U; -+ (n)|=(n) << 32U;\ -+ (n)>>=32U;\ -+ (n)+=((uint64_t)0xFFFFFFFF);\ -+ (n)>>=32U; - - - /* 5 logical operations */ - #define NORBITS64_SHORT(n) \ -- n|=n << 32U;\ -- n>>=32U;\ -- --n;\ -- n>>=63U; -+ (n)|=(n) << 32U;\ -+ (n)>>=32U;\ -+ --(n);\ -+ (n)>>=63U; - - - /* The third fastest method, based on the variable-precision SWAR algorithm */ -@@ -99,95 +89,95 @@ - - /* 12 logical operations */ - #define COUNTBITS64_SWAR(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -- n=(((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)=((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) >> 56U; - - - /* 13 logical operations */ - #define ORBITS64_SWAR(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ - /* We change ((n/(2^56))+63)/64 in (n+63*(2^56))/(2^62) */\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) + ((uint64_t)0x3F00000000000000)) >> 62U; - - - /* 13 logical operations */ - #define NORBITS64_SWAR(n) \ -- --n;\ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ --(n);\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101))) >> 62U; - - - /* 13 logical operations */ - #define NORBITS64_SWAR2(n) \ -- n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\ -- n=(n & ((uint64_t)0x3333333333333333)) \ -- + ((n >> 2U) & ((uint64_t)0x3333333333333333));\ -+ (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\ -+ (n)=((n) & ((uint64_t)0x3333333333333333)) \ -+ + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\ - /* We remark that ({0,1,...,63}+255)<<56 ={255<<56,0<<56,...,62<<56}*2^56*/\ - /* So, the 63-th bit is 1 iff Hamming_weight(n)=0 */\ -- n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ -+ (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \ - * ((uint64_t)0x101010101010101)) + ((uint64_t)0xFF00000000000000)) >> 63U; - - - /* Slow, 13 logical operations */ - #define XORBITS64_SWAR2(n) \ - COUNTBITS64_SWAR(n); \ -- n&=ONE64; -+ (n)&=ONE64; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS64_SWAR(n) \ - /* +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+***+***+***+***+***+***+***+***+***+***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000+000+000+000+000+000+000+000+000+000+000+000+000 */\ - /* Then, we sum the 16 bits and store them in the bits 63 to 67. */\ - /* So the 63-th bit in the bit of parity. */\ -- n=((n & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\ -+ (n)=(((n) & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\ - >> 63; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS32_SWAR(n) \ - /* +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+***+***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000+000+000+000+000 */\ - /* Then, we sum the 8 bits and store them in the bits 31 to 34. */\ - /* So the 31-th bit in the bit of parity. */\ -- n=((n & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31; -+ (n)=(((n) & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31; - - - /* A special algorithm with 7 logical operations */ - #define XORBITS16_SWAR(n) \ - /* +*+*+*+*+*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +***+***+***+*** */\ -- n^=(n << 2);\ -+ (n)^=((n) << 2);\ - /* +000+000+000+000 */\ - /* Then, we sum the 4 bits and store them in the bits 15 to 17. */\ - /* So the 15-th bit in the bit of parity. */\ -- n=((n & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15; -+ (n)=(((n) & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15; - - - /* A special algorithm with 5 logical operations */ - #define XORBITS8_SWAR(n) \ - /* +*+*+*+* */\ -- n^=(n << 1);\ -+ (n)^=((n) << 1);\ - /* +0+0+0+0 */\ - /* Then, we sum the 4 bits and store them in the bits 7 to 9. */\ - /* So the 15-th bit in the bit of parity. */\ -- n=((n & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7; -+ (n)=(((n) & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7; - - - /* The slowest method, based on the "dichotomic xor/or" */ -@@ -196,55 +186,55 @@ - /* A generic method using the dichotomic principle */ - #define ORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n|=n>>RESERVED_VARIABLE)\ -- n&=UINT_1; -+ (n)|=(n)>>RESERVED_VARIABLE)\ -+ (n)&=UINT_1; - - - #define NORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n|=n>>RESERVED_VARIABLE)\ -- n=~n;\ -- n&=UINT_1; -+ (n)|=(n)>>RESERVED_VARIABLE)\ -+ (n)=~(n);\ -+ (n)&=UINT_1; - - - #define XORBITS(n,SIZE) \ - FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\ -- n^=n>>RESERVED_VARIABLE)\ -- n&=UINT_1; -+ (n)^=(n)>>RESERVED_VARIABLE)\ -+ (n)&=UINT_1; - - - /* 13 logical operations */ - #define ORBITS64_DICHO(n) \ -- n|=n >> 32U;\ -- n|=n >> 16U;\ -- n|=n >> 8U;\ -- n|=n >> 4U;\ -- n|=n >> 2U;\ -- n|=n >> 1U;\ -- n&=ONE64; -+ (n)|=(n) >> 32U;\ -+ (n)|=(n) >> 16U;\ -+ (n)|=(n) >> 8U;\ -+ (n)|=(n) >> 4U;\ -+ (n)|=(n) >> 2U;\ -+ (n)|=(n) >> 1U;\ -+ (n)&=ONE64; - - - /* 14 logical operations */ - #define NORBITS64_DICHO(n) \ -- n|=n >> 32U;\ -- n|=n >> 16U;\ -- n|=n >> 8U;\ -- n|=n >> 4U;\ -- n|=n >> 2U;\ -- n|=n >> 1U;\ -- n=~n;\ -- n&=ONE64; -+ (n)|=(n) >> 32U;\ -+ (n)|=(n) >> 16U;\ -+ (n)|=(n) >> 8U;\ -+ (n)|=(n) >> 4U;\ -+ (n)|=(n) >> 2U;\ -+ (n)|=(n) >> 1U;\ -+ (n)=~(n);\ -+ (n)&=ONE64; - - - /* 13 logical operations */ - #define XORBITS64_DICHO(n) \ -- n^=n >> 32U;\ -- n^=n >> 16U;\ -- n^=n >> 8U;\ -- n^=n >> 4U;\ -- n^=n >> 2U;\ -- n^=n >> 1U;\ -- n&=ONE64; -+ (n)^=(n) >> 32U;\ -+ (n)^=(n) >> 16U;\ -+ (n)^=(n) >> 8U;\ -+ (n)^=(n) >> 4U;\ -+ (n)^=(n) >> 2U;\ -+ (n)^=(n) >> 1U;\ -+ (n)&=ONE64; - - - /* Choose the best method */ -@@ -254,17 +244,10 @@ - #define XORBITS64 XORBITS64_DICHO - - --#if (NB_BITS_UINT==64U) -- #define COUNTBITS_UINT CONCAT(COUNTBITS,NB_BITS_UINT) -- #define ORBITS_UINT CONCAT( ORBITS,NB_BITS_UINT) -- #define NORBITS_UINT CONCAT( NORBITS,NB_BITS_UINT) -- #define XORBITS_UINT CONCAT( XORBITS,NB_BITS_UINT) --#else -- #define COUNTBITS_UINT COUNTBITS64_SWAR -- #define ORBITS_UINT(n) ORBITS(n,NB_BITS_UINT) -- #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT) -- #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT) --#endif -+#define COUNTBITS_UINT COUNTBITS64 -+#define ORBITS_UINT ORBITS64 -+#define NORBITS_UINT NORBITS64 -+#define XORBITS_UINT XORBITS64 - - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h deleted file mode 100644 index 0fb42b7..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h -@@ -30,7 +30,7 @@ - #include "gf2nx.h" - - int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - #define chooseRootHFE_gf2nx PREFIX_NAME(chooseRootHFE_gf2nx) - #endif - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h deleted file mode 100644 index 597a4d2..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/conv_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/conv_gf2nx.h -@@ -10,7 +10,7 @@ - - - void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense, -- const complete_sparse_monic_gf2nx F); -+ complete_sparse_monic_gf2nx F); - #define convHFEpolynomialSparseToDense_gf2nx \ - PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx) - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h deleted file mode 100644 index e72442e..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h +++ /dev/null @@ -1,41 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/div_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/div_gf2nx.h -@@ -24,13 +24,13 @@ - - - unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - #define div_r_HFE_gf2nx PREFIX_NAME(div_r_HFE_gf2nx) - #define div_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx) -@@ -39,16 +39,16 @@ - - #if ENABLED_REMOVE_ODD_DEGREE - void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n cst); - #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx) - #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(divsqr_r_HFE_cst_gf2nx) - #else -- #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx) -- #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(div_r_HFE_cst_gf2nx) -+ #define divsqr_r_HFE_cstdeg_gf2nx div_r_HFE_cstdeg_gf2nx -+ #define divsqr_r_HFE_cst_gf2nx div_r_HFE_cst_gf2nx - #endif - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h deleted file mode 100644 index 5fd5501..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h +++ /dev/null @@ -1,77 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/dotProduct_gf2.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/dotProduct_gf2.h -@@ -10,50 +10,50 @@ - - /* Dot product of vector of bits */ - #define DOTPRODUCT(res,a,b,SIZE) \ -- res=(a)[0]&(b)[0];\ -+ (res)=(a)[0]&(b)[0];\ - FOR_LOOP_COMPLETE(1,RESERVED_VARIABLE<(SIZE),++RESERVED_VARIABLE,\ -- res^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\ -+ (res)^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\ - XORBITS_UINT(res); - - - /* Inlined version */ - #define DOTPRODUCT1(res,a,b) \ -- res=(a)[0]&(b)[0];\ -+ (res)=(a)[0]&(b)[0];\ - XORBITS_UINT(res); - - #define DOTPRODUCT2(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ - XORBITS_UINT(res); - - #define DOTPRODUCT3(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ - XORBITS_UINT(res); - - #define DOTPRODUCT4(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ - XORBITS_UINT(res); - - #define DOTPRODUCT5(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -- res^=(a)[4]&(b)[4];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ -+ (res)^=(a)[4]&(b)[4];\ - XORBITS_UINT(res); - - #define DOTPRODUCT6(res,a,b) \ -- res=(a)[0]&(b)[0];\ -- res^=(a)[1]&(b)[1];\ -- res^=(a)[2]&(b)[2];\ -- res^=(a)[3]&(b)[3];\ -- res^=(a)[4]&(b)[4];\ -- res^=(a)[5]&(b)[5];\ -+ (res)=(a)[0]&(b)[0];\ -+ (res)^=(a)[1]&(b)[1];\ -+ (res)^=(a)[2]&(b)[2];\ -+ (res)^=(a)[3]&(b)[3];\ -+ (res)^=(a)[4]&(b)[4];\ -+ (res)^=(a)[5]&(b)[5];\ - XORBITS_UINT(res); - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h deleted file mode 100644 index f9cd4d3..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h +++ /dev/null @@ -1,22 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h -@@ -19,14 +19,14 @@ - convHFEpolynomialSparseToDense_gf2nx(poly2,F);\ - /* Initialize to F-U */\ - add2_gf2n(poly2,U);\ -- l=gcd_gf2nx(&i,poly2,d2,poly,l); -+ (l)=gcd_gf2nx(&(i),poly2,d2,poly,l); - - --int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots, -- const complete_sparse_monic_gf2nx F, -+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U); - #define findRootsHFE_gf2nx PREFIX_NAME(findRootsHFE_gf2nx) - #define findUniqRootHFE_gf2nx PREFIX_NAME(findUniqRootHFE_gf2nx) - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h deleted file mode 100644 index 7974101..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h -@@ -9,7 +9,7 @@ - #include "gf2nx.h" - - --unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const -+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, - complete_sparse_monic_gf2nx F, cst_gf2n U); - #define frobeniusMap_HFE_gf2nx PREFIX_NAME(frobeniusMap_HFE_gf2nx) - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h deleted file mode 100644 index 34dd608..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h +++ /dev/null @@ -1,12 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/gf2nx.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/gf2nx.h -@@ -119,7 +119,7 @@ - - /* A structure with a special list to find the exponents of the monomials */ - typedef struct { -- cst_sparse_monic_gf2nx poly; -+ UINT poly[NB_UINT_HFEPOLY]; - /* List of the successive differences of the exponents of the monomials of - poly multiplied by NB_WORD_GFqn */ - unsigned int L[NB_COEFS_HFEPOLY]; - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h deleted file mode 100644 index 57635c2..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h +++ /dev/null @@ -1,346 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/hash.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/hash.h -@@ -1,311 +1,40 @@ - #ifndef _HASH_H - #define _HASH_H - --#include "arch.h" - #include "choice_crypto.h" --#include "parameters_HFE.h" --#include "predicate.h" --#include "init.h" -- -- --/******************************************************************/ --/****************** Choice of the hash functions ******************/ --/******************************************************************/ -- -- --/* Choice of the hash function */ --/* The user is allowed to switch between SHA2 and SHA3 */ --#if (defined(QUARTZ)||defined(QUARTZ_V1)) -- #define CHOICE_HASH_SHA1 --#elif 0 -- #define CHOICE_HASH_SHA2 --#else -- #define CHOICE_HASH_SHA3 --#endif -- -- --/******************************************************************/ --/******************** Enable the hash functions *******************/ --/******************************************************************/ -- -- --/* Use of third libraries */ --/* The user is allowed to switch between OpenSSL and XKCP */ --/* The user can define several macros, while several SHA3 are not defined -- (if several SHA3 are defined, XKCP has priority). */ --#ifdef CHOICE_HASH_SHA1 -- #define ENABLED_SHA1_OPENSSL --#endif --#if defined(CHOICE_HASH_SHA2) -- #define ENABLED_SHA2_OPENSSL --#endif --#if defined(CHOICE_HASH_SHA3) -- /* XKCP is constant-time and faster than OpenSSL */ -- #define ENABLED_SHA3_XKCP -- /* #define ENABLED_SHA3_OPENSSL */ --#endif -- -- --#define ENABLED_SHAKE_XKCP --/* #define ENABLED_TUPLEHASH_XKCP XXX Disabled XXX */ -- -- --/******************************************************************/ --/***************** Include for the hash functions *****************/ --/******************************************************************/ -- -- --/* We minimize the numbers of #include to decrease the dependencies with the -- third libraries. */ --#if (defined(ENABLED_SHA1_OPENSSL)||defined(ENABLED_SHA2_OPENSSL)) -- #include --#endif -- -- --#ifdef ENABLED_SHA2_OPENSSL -- #include "randombytes.h" -- -- #if ENABLED_OPENSSL_FIPS -- #include -- #include -- #endif --#endif -- -- --#ifdef ENABLED_SHA3_OPENSSL -- #include -- #include "prefix_name.h" -- int PREFIX_NAME(sha3_256)(unsigned char *output, const unsigned char *m, -- size_t len); -- int PREFIX_NAME(sha3_384)(unsigned char *output, const unsigned char *m, -- size_t len); -- int PREFIX_NAME(sha3_512)(unsigned char *output, const unsigned char *m, -- size_t len); -- #define sha3_256 PREFIX_NAME(sha3_256) -- #define sha3_384 PREFIX_NAME(sha3_384) -- #define sha3_512 PREFIX_NAME(sha3_512) --#endif -- -- --#if (defined(ENABLED_SHA3_XKCP)||defined(ENABLED_SHAKE_XKCP)) -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --#ifdef ENABLED_SHAKE_XKCP -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --#ifdef ENABLED_TUPLEHASH_XKCP -- BEGIN_EXTERNC -- #include -- END_EXTERNC --#endif -- -- --/******************************************************************/ --/**************** Macro to call the hash functions ****************/ --/******************************************************************/ -- -- --#define SHA1_OPENSSL(output,m,len) SHA1(m,len,output) --#define SHA256_OPENSSL(output,m,len) SHA256(m,len,output) --#define SHA384_OPENSSL(output,m,len) SHA384(m,len,output) --#define SHA512_OPENSSL(output,m,len) SHA512(m,len,output) -- --#define SHA256_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA256_OPENSSL(output,m,len); --#define SHA384_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA384_OPENSSL(output,m,len); --#define SHA512_OPENSSL_FIPS(output,m,len) \ -- if(FIPS_mode()) \ -- {\ -- /* Set to off the FIPS mode */\ -- if(FIPS_mode_set(0)!=1)\ -- {\ -- exit(ERR_get_error());\ -- }\ -- }\ -- SHA512_OPENSSL(output,m,len); -- --/* Format: SHA3_*(output,m,len) */ --#if 0 -- #define SHA3_256_XKCP SHA3_256 -- #define SHA3_384_XKCP SHA3_384 -- #define SHA3_512_XKCP SHA3_512 --#else -- /* SHA3_* is inlined from SimpleFIPS202.c */ -- #define SHA3_256_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(1088, 512, m, len, 0x06, output, 32) -- #define SHA3_384_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(832, 768, m, len, 0x06, output, 48) -- #define SHA3_512_XKCP(output,m,len) \ -- KeccakWidth1600_Sponge(576, 1024, m, len, 0x06, output, 64) --#endif -- --/* Format: SHAKE*(output,outputByteLen,input,inputByteLen) */ --#if 0 -- #define SHAKE128_XKCP SHAKE128 -- #define SHAKE256_XKCP SHAKE256 --#else -- /* SHAKE* is inlined from SimpleFIPS202.c */ -- #define SHAKE128_XKCP(output,outputByteLen,m,len) \ -- KeccakWidth1600_Sponge(1344, 256, m, len, 0x1F, output, outputByteLen) -- #define SHAKE256_XKCP(output,outputByteLen,m,len) \ -- KeccakWidth1600_Sponge(1088, 512, m, len, 0x1F, output, outputByteLen) --#endif -- --/* To call with: -- Keccak_HashInstance hashInstance; -- Keccak_HashIUF_SHAKE*_XKCP(&hashInstance,data,databitlen); -- And after a call to Keccak_HashIUF_SHAKE*_XKCP, to use one or several times: -- Keccak_HashSqueeze(&hashInstance,output,outputbitlen); -- XXX Here, length in bits XXX --*/ --#define Keccak_HashIUF_SHAKE128_XKCP(hashInstance,data,databitlen) \ -- Keccak_HashInitialize_SHAKE128(hashInstance);\ -- Keccak_HashUpdate(hashInstance,data,databitlen);\ -- Keccak_HashFinal(hashInstance,0); --#define Keccak_HashIUF_SHAKE256_XKCP(hashInstance,data,databitlen) \ -- Keccak_HashInitialize_SHAKE256(hashInstance);\ -- Keccak_HashUpdate(hashInstance,data,databitlen);\ -- Keccak_HashFinal(hashInstance,0); -- --#define TUPLEHASH128_XKCP TupleHash128 --#define TUPLEHASH256_XKCP TupleHash256 -- -- --/************************************************************************/ --/* Macro to call the hash functions corresponding to the security level */ --/************************************************************************/ -+#include "fips202.h" - -+#define CHOICE_HASH_SHA3 - - /* Choice of the hash function */ - #if (K<=128) -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA256_OPENSSL_FIPS -- #else -- #define SHA2 SHA256_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_256_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_256 -- #endif --#elif (K<=192) -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA384_OPENSSL_FIPS -- #else -- #define SHA2 SHA384_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_384_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_384 -- #endif --#else -- #if ENABLED_OPENSSL_FIPS -- #define SHA2 SHA512_OPENSSL_FIPS -- #else -- #define SHA2 SHA512_OPENSSL -- #endif -- -- #ifdef ENABLED_SHA3_XKCP -- #define SHA3 SHA3_512_XKCP -- #elif defined(ENABLED_SHA3_OPENSSL) -- #define SHA3 sha3_512 -- #endif --#endif -- -- --/* Choice of SHAKE and TupleHash */ --#if (K<=128) -- #define SHAKE SHAKE128_XKCP -- #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE128_XKCP -- #define TUPLEHASH TUPLEHASH128_XKCP --#else -- #define SHAKE SHAKE256_XKCP -- #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE256_XKCP -- #define TUPLEHASH TUPLEHASH256_XKCP --#endif -- -- --/******************************************************************/ --/******** Macro to call the chosen hash function of MQsoft ********/ --/******************************************************************/ -- -- --#ifdef CHOICE_HASH_SHA1 -- #define HASH SHA1 --#elif defined(CHOICE_HASH_SHA2) -- #define HASH SHA2 --#else -- #define HASH SHA3 --#endif -- -- --/******************************************************************/ --/************************** Other tools ***************************/ --/******************************************************************/ -- -- --BEGIN_EXTERNC -- /* For KeccakWidth1600_Sponge */ -- #include --END_EXTERNC -- -- --#if (K<=80) -- #define SIZE_DIGEST 20 -- #define SIZE_DIGEST_UINT 3 --#elif (K<=128) -- #define SIZE_DIGEST 32 -- #define SIZE_DIGEST_UINT 4 -- #define SIZE_2_DIGEST 64 -- #define EQUALHASH_NOCST ISEQUAL4_NOCST -- #define COPYHASH COPY4 -+ #define HASH sha3_256 -+ #define SHAKE shake128 -+ #define SIZE_DIGEST 32 -+ #define SIZE_DIGEST_UINT 4 -+ #define SIZE_2_DIGEST 64 -+ #define EQUALHASH_NOCST ISEQUAL4_NOCST -+ #define COPYHASH COPY4 - #elif (K<=192) -- #define SIZE_DIGEST 48 -- #define SIZE_DIGEST_UINT 6 -- #define SIZE_2_DIGEST 96 -- #define EQUALHASH_NOCST ISEQUAL6_NOCST -- #define COPYHASH COPY6 --#else -- #define SIZE_DIGEST 64 -- #define SIZE_DIGEST_UINT 8 -- #define SIZE_2_DIGEST 128 -- #define EQUALHASH_NOCST ISEQUAL8_NOCST -- #define COPYHASH COPY8 -+ #define HASH sha3_384 -+ #define SHAKE shake256 -+ #define SIZE_DIGEST 48 -+ #define SIZE_DIGEST_UINT 6 -+ #define SIZE_2_DIGEST 96 -+ #define EQUALHASH_NOCST ISEQUAL6_NOCST -+ #define COPYHASH COPY6 -+#else -+ #define HASH sha3_512 -+ #define SHAKE shake256 -+ #define SIZE_DIGEST 64 -+ #define SIZE_DIGEST_UINT 8 -+ #define SIZE_2_DIGEST 128 -+ #define EQUALHASH_NOCST ISEQUAL8_NOCST -+ #define COPYHASH COPY8 - #endif - -- - #define EQUALHASH(a,b) f_ISEQUAL(a,b,SIZE_DIGEST_UINT) - -- --/* XXX Bytes XXX */ - #define expandSeed(output,outputByteLen,seed,seedByteLen) \ - SHAKE(output,outputByteLen,seed,seedByteLen) - -@@ -313,6 +42,4 @@ - #define expandSeedIUF Keccak_HashIUF_SHAKE - #define expandSeedSqueeze Keccak_HashSqueeze - -- - #endif -- - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h deleted file mode 100644 index 12a62bf..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h +++ /dev/null @@ -1,32 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/init.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/init.h -@@ -116,23 +116,23 @@ - - #define SET1_2(c) \ - SET1_1(c);\ -- SET0_1(c+1); -+ SET0_1((c)+1); - - #define SET1_3(c) \ - SET1_1(c);\ -- SET0_2(c+1); -+ SET0_2((c)+1); - - #define SET1_4(c) \ - SET1_1(c);\ -- SET0_3(c+1); -+ SET0_3((c)+1); - - #define SET1_5(c) \ - SET1_1(c);\ -- SET0_4(c+1); -+ SET0_4((c)+1); - - #define SET1_6(c) \ - SET1_1(c);\ -- SET0_5(c+1); -+ SET0_5((c)+1); - - - #endif - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h deleted file mode 100644 index 3a91bc2..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h +++ /dev/null @@ -1,33 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/macro.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/macro.h -@@ -7,29 +7,5 @@ - /** This macro permits to concat the names. */ - #define CONCAT(a,b) CONCAT2(a,b) - -- --/** Print a name as a string. */ --#define PRINTF_NAME(name) puts(#name); --#define PRINTF_NAME1(name) PRINTF_NAME(name) --#define PRINTF_NAME2(name) PRINTF_NAME1(name) -- -- --/** Compute Floor(a/b) with a and b positive integers, a can be zero. */ --#define DIV_FLOOR(a,b) ((a)/(b)) --#define DIV_CEIL1(a,b) (((a)/(b))+(((a)%(b))?1:0)) --/* Faster but overflow if (a+b-1) >= 2^x for x=size_of_the_type_in_bits */ --#define DIV_CEIL2(a,b) (((a)+(b)-1)/(b)) --/* Faster but incorrect only when a == 0 and b>1 */ --#define DIV_CEIL3(a,b) ((((a)-1)/(b))+1) --/** Compute Ceiling(a/b) with a and b positive integers, a can be zero. */ --#define DIV_CEIL DIV_CEIL2 -- -- --/** Return the minimum. */ --#define MINI(a,b) (((a)<(b))?(a):(b)) --/** Return the maximum. */ --#define MAXI(a,b) (((a)>(b))?(a):(b)) -- -- - #endif - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h deleted file mode 100644 index 326f857..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h +++ /dev/null @@ -1,16 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/mul_gf2n.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/mul_gf2n.h -@@ -60,11 +60,7 @@ - - - /* Function mul in GF(2^x), then modular reduction */ --#define MUL_THEN_REM_GF2N void \ -- PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn], \ -- const uint64_t B[NB_WORD_GFqn]) --MUL_THEN_REM_GF2N; -+void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn]); - #define mul_then_rem_gf2n PREFIX_NAME(mul_then_rem_gf2n) - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h deleted file mode 100644 index e76f169..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h +++ /dev/null @@ -1,13 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/parameters_HFE.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/parameters_HFE.h -@@ -11,9 +11,6 @@ - #define GFq 2U - #define Log2_q 1 - /* For HFE, the previous parameter is necessarily 2. */ -- -- /** This type stores an element of GF(q). */ -- typedef unsigned char gf2; - #endif - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h deleted file mode 100644 index b2698c6..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h +++ /dev/null @@ -1,23 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/signHFE.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/signHFE.h -@@ -7,6 +7,7 @@ - #include "gf2nx.h" - #include "config_HFE.h" - #include "matrix_gf2.h" -+#include "sizes_HFE.h" - #include - - -@@ -30,7 +31,10 @@ - #endif - - #if ENABLED_SEED_SK -- UINT *sk_uncomp; -+ UINT sk_uncomp[NB_UINT_HFEVPOLY -+ +(LTRIANGULAR_NV_SIZE<<1) -+ +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t -+ +MATRIXnv_SIZE+MATRIXn_SIZE]; - #endif - } secret_key_HFE; - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h deleted file mode 100644 index 4f3525a..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h +++ /dev/null @@ -1,20 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/sqr_gf2n.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/sqr_gf2n.h -@@ -36,14 +36,8 @@ - - - /* Function sqr in GF(2^x), then modular reduction */ --#define SQR_THEN_REM_GF2N void \ -- PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn]) --#define SQR_NOCST_THEN_REM_GF2N void \ -- PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \ -- const uint64_t A[NB_WORD_GFqn]) --SQR_THEN_REM_GF2N; --SQR_NOCST_THEN_REM_GF2N; -+void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]); -+void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]); - #define sqr_then_rem_gf2n PREFIX_NAME(sqr_then_rem_gf2n) - #define sqr_nocst_then_rem_gf2n PREFIX_NAME(sqr_nocst_then_rem_gf2n) - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h deleted file mode 100644 index 154b646..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h +++ /dev/null @@ -1,42 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2m.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2m.h -@@ -25,12 +25,13 @@ - /* The number of word that an element of GF(2^m) needs */ - #if (HFEmr) - #define NB_WORD_GF2m_TMP (HFEmq+1) -+ /* Mask to truncate the last word */ -+ #define MASK_GF2m ((UINT_1<<(HFEmr))-UINT_1) - #else - #define NB_WORD_GF2m_TMP HFEmq -+ #define MASK_GF2m UINT_M1 - #endif - --/* Mask to truncate the last word */ --#define MASK_GF2m maskUINT(HFEmr) - - #define HFEmq8 (HFEm>>3) - #define HFEmr8 (HFEm&7U) -@@ -75,19 +76,18 @@ - - #define isEqual_gf2m(a,b) f_ISEQUAL(a,b,NB_WORD_GF2m) - -+#define set0_gf2m(c) SET0((unsigned char *)(c),8*NB_WORD_GF2m) -+#define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1((unsigned char *)(res),(unsigned char *)(a),b,8*NB_WORD_GF2m) -+ - #if (NB_WORD_GF2m<7) - #define add_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_GF2X) - #define add2_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_2_GF2X) - #define copy_gf2m CONCAT_NB_WORD_GF2m_SUP(COPY) -- #define set0_gf2m CONCAT_NB_WORD_GF2m_SUP(SET0_) -- #define xorLoadMask1_gf2m CONCAT_NB_WORD_GF2m_SUP(XORLOADMASK1_) - #define dotProduct_gf2_m CONCAT_NB_WORD_GF2m_SUP(DOTPRODUCT) - #else - #define add_gf2m(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2m); - #define add2_gf2m(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2m); - #define copy_gf2m(c,a) COPY(c,a,NB_WORD_GF2m) -- #define set0_gf2m(c) SET0(c,NB_WORD_GF2m) -- #define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2m) - #define dotProduct_gf2_m(res,a,b) DOTPRODUCT(res,a,b,NB_WORD_GF2m) - #endif - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h deleted file mode 100644 index bdd55b3..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h +++ /dev/null @@ -1,52 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2n.h -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2n.h -@@ -52,13 +52,13 @@ - /* The number of word that an element of GF(2^n) needs */ - #if (HFEnr) - #define NB_WORD_GFqn_TMP (HFEnq+1) -+ /* Mask for arithmetic in GF(2^n) */ -+ #define MASK_GF2n ((UINT_1<<(HFEnr))-UINT_1) - #else - #define NB_WORD_GFqn_TMP HFEnq -+ #define MASK_GF2n UINT_M1 - #endif - --/* Mask for arithmetic in GF(2^n) */ --#define MASK_GF2n maskUINT(HFEnr) -- - #define HFEnr8 (HFEn&7) - #define MASK8_GF2n ((1U<>3) - #define HFEnvr8 (HFEnv&7) - #define MASK8_GF2nv ((1U<>3)+((HFEvr8)?1:0)) - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c deleted file mode 100644 index 4f2a7ee..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c +++ /dev/null @@ -1,33 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c -@@ -26,16 +26,14 @@ - */ - int PREFIX_NAME(changeVariablesMQS_simd_gf2)(mqsnv_gf2n MQS, cst_GLnv_gf2 S) - { -- UINT tmp[NB_WORD_GFqn]; -- mqsnv_gf2n MQS2, MQS2_cp; -+ UINT tmp[NB_WORD_GFqn]={0}; -+ /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */ -+ UINT MQS2[HFEnv*HFEnv*NB_WORD_GFqn]={0}; -+ UINT *MQS2_cp; - cst_mqsnv_gf2n MQS_cpi,MQS_cpj; - cst_GLnv_gf2 S_cpi,S_cpj; - unsigned int i,j; - -- /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */ -- MQS2=(UINT*)malloc(HFEnv*HFEnv*NB_WORD_GFqn*sizeof(UINT)); -- VERIFY_ALLOC_RET(MQS2); -- - /* To avoid the constant of MQS */ - MQS+=NB_WORD_GFqn; - -@@ -129,8 +127,6 @@ - S_cpj+=NB_WORD_GF2nv; - } - -- -- free(MQS2); - return 0; - } - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c deleted file mode 100644 index 406710e..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c +++ /dev/null @@ -1,141 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c -@@ -29,7 +29,7 @@ - * @remark A part of the implementation is not in constant-time. - */ - int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root, -- const complete_sparse_monic_gf2nx F, -+ complete_sparse_monic_gf2nx F, - cst_gf2n U) - { - #if (HFEDeg==1) -@@ -44,76 +44,74 @@ - unsigned int j,i,ind=0; - #endif - -- vec_gf2n roots; -+ UINT roots[HFEDeg * NB_WORD_GFqn] = {0}; - int l; - -- l=findRootsHFE_gf2nx(&roots,F,U); -+ l=findRootsHFE_gf2nx(roots,F,U); - -- if(!l) -+ if(l==0) - { - /* Zero root */ - return 0; -- } else -+ } -+ if(l==1) - { -- if(l==1) -- { -- /* One root */ -- copy_gf2n(root,roots); -- } else -- { -- /* Several roots */ -- #if QUARTZ_ROOT -- hash=(UINT*)malloc(l*SIZE_DIGEST_UINT*sizeof(UINT)); -- -- /* We hash each root */ -- for(i=0;i -+ hash[i*SIZE_DIGEST_UINT+j])) - { -- j=0; -- while((j -- hash[i*SIZE_DIGEST_UINT+j])) -- { -- ind=i; -- } -+ ind=i; - } -+ } - -- /* We choose the corresponding root */ -- copy_gf2n(root,roots+ind*NB_WORD_GFqn); -- -- free(hash); -- #else -+ /* We choose the corresponding root */ -+ copy_gf2n(root,roots+ind*NB_WORD_GFqn); - -- /* Sort the roots */ -- sort_gf2n(roots,l); -+ free(hash); -+ #else - -- #if FIRST_ROOT -- /* Choose the first root */ -- copy_gf2n(root,roots); -- #elif DETERMINIST_ROOT -- /* Choose a root with a determinist hash */ -- HASH((unsigned char*)hash, -- (unsigned char*)U,NB_BYTES_GFqn); -- copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn); -- #endif -+ /* Sort the roots */ -+ sort_gf2n(roots,l); -+ -+ #if FIRST_ROOT -+ /* Choose the first root */ -+ copy_gf2n(root,roots); -+ #elif DETERMINIST_ROOT -+ /* Choose a root with a determinist hash */ -+ HASH((unsigned char*)hash, -+ (unsigned char*)U,NB_BYTES_GFqn); -+ copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn); - #endif -- } -- free(roots); -- return l; -+ #endif - } -+ return l; - #endif - } - #endif - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c deleted file mode 100644 index e321539..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c +++ /dev/null @@ -1,40 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/convMQS_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/convMQS_gf2.c -@@ -108,8 +108,7 @@ - unsigned int j; - - #if HFEmr8 -- uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION -- *sizeof(uint8_t)); -+ uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0}; - - convMQS_one_to_last_mr8_equations_gf2(pk_U,pk); - for(j=0;j>(j*HFENr8c)))<=LOST_BITS;--j,++k) -+ for(j=HFEnv-1;j>=(int)LOST_BITS;--j,++k) - { - pk2[k>>3]^=((pk[nb_bits>>3]>>(nb_bits&7))&ONE8)<<(k&7); - nb_bits+=j; -@@ -135,10 +135,15 @@ - */ - UINT PREFIX_NAME(convMQ_last_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk) - { -- const uint64_t *pk64; - unsigned int iq,ir,k,nb_bits; -+ uint64_t t1, t2; -+ const uint8_t *pk64 = pk; -+ #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) -+ const uint8_t *pk_end; -+ uint64_t end; -+ unsigned int l; -+ #endif - -- pk64=(uint64_t*)pk; - - nb_bits=1; - /* For each row */ -@@ -150,29 +155,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63))); - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } -- pk64+=iq+1; -+ pk64+=8*(iq+1); - pk2+=iq+1; - nb_bits+=(iq+1)<<6; - } -@@ -205,29 +218,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>3)&7) -- uint8_t *pk_end; -- uint64_t end; -- unsigned int l; -- #endif -- - #if LAST_ROW_R -- ir=LAST_ROW_R; - if(nb_bits&63) - { - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) -@@ -257,15 +268,18 @@ - - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - - #if (NB_WHOLE_BLOCKS>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -274,12 +288,13 @@ - pk2[k]^=end<<(64-(nb_bits&63)); - pk2[k+1]=end>>(nb_bits&63); - #else -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - -- if(((nb_bits&63)+ir)>64) -+ if(((nb_bits&63)+LAST_ROW_R)>64) - { - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -290,14 +305,18 @@ - #else - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -- if(((nb_bits&63)+ir)>64) -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); -+ if(((nb_bits&63)+LAST_ROW_R)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - #endif - } else -@@ -305,11 +324,12 @@ - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) - for(k=0;k>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -318,7 +338,8 @@ - #else - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - #endif - } -@@ -328,13 +349,16 @@ - #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7) - for(k=0;k<(iq-1);++k) - { -- pk2[k]=(pk64[k]>>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - - end=0; -- pk_end=(uint8_t*)(pk64+k+1); -+ pk_end=pk64+8*(k+1); - for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l) - { - end^=((uint64_t)(pk_end[l]))<<(l<<3); -@@ -343,15 +367,18 @@ - #else - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - #endif - } else - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63))); - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } -- pk64+=iq+1; -+ pk64+=8*(iq+1); - pk2+=iq+1; - nb_bits+=(iq+1)<<6; - } -@@ -450,29 +486,34 @@ - { - for(k=0;k>(nb_bits&63)) -- ^(pk64[k+1]<<(64-(nb_bits&63))); -+ LOAD_UINT(t1, &pk64[8*k]) -+ LOAD_UINT(t2, &pk64[8*(k+1)]) -+ pk2[k]=(t1>>(nb_bits&63)) -+ ^(t2<<(64-(nb_bits&63))); - } - -- pk2[k]=pk64[k]>>(nb_bits&63); -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1>>(nb_bits&63); - if(((nb_bits&63)+ir)>64) - { -- pk2[k]^=pk64[k+1]<<(64-(nb_bits&63)); -+ LOAD_UINT(t1, &pk64[8*(k+1)]) -+ pk2[k]^=t1<<(64-(nb_bits&63)); - } - - if(((nb_bits&63)+ir)>=64) - { -- ++pk64; -+ pk64+=8; - } - } else - { - for(k=0;k<=iq;++k) - { -- pk2[k]=pk64[k]; -+ LOAD_UINT(t1, &pk64[8*k]) -+ pk2[k]=t1; - } - } - -- pk64+=iq; -+ pk64+=8*iq; - /* 0 padding on the last word */ - pk2[iq]&=(ONE64<>ir)&1);\ -+ pivot=1+~((((*S_cpj)>>ir)&1));\ - LOOPK; - - -@@ -44,7 +44,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=1;\ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\ - \ - /* Here, the pivot is 1 if S is invertible */\ -@@ -158,7 +163,7 @@ - algorithm. */\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\ - \ - /* Next row */\ -@@ -214,7 +219,7 @@ - det_i&=(*S_cpi)>>ir; - #endif - -- return det_i; -+ return (gf2) det_i; - } - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c deleted file mode 100644 index 4a86c2a..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c +++ /dev/null @@ -1,66 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/determinantnv_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/determinantnv_gf2.c -@@ -26,7 +26,7 @@ - - #define ADDROW(LOOPK) \ - /* pivot */\ -- pivot=-(((*S_cpj)>>ir)&1);\ -+ pivot=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK; - - -@@ -44,7 +44,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=1;\ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\ - \ - /* Here, the pivot is 1 if S is invertible */\ -@@ -158,7 +163,7 @@ - algorithm. */\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\ - \ - /* Next row */\ -@@ -214,7 +219,7 @@ - det_i&=(*S_cpi)>>ir; - #endif - -- return det_i; -+ return (gf2) det_i; - } - - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c deleted file mode 100644 index 19f80aa..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c +++ /dev/null @@ -1,62 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/div_gf2nx.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/div_gf2nx.c -@@ -120,8 +120,11 @@ - } - - leading_coef=A+da*NB_WORD_GFqn; -- i=(db<<1)-da; -- i=MAXI(0,(int)i); -+ i=0; -+ if(2*db > da) -+ { -+ i = 2*db - da; -+ } - res=A+(da-db+i)*NB_WORD_GFqn; - - for(;i>=1;\ - }\ - }\ -- for(;jr>=1;\ - } - -- -- - #if (LEN_UNROLLED_64==1) - #define LOOPJR_UNROLLED_64 LOOPJR_NOCST_64 - #else - - #define LOOPJR_UNROLLED_64(START,NB_IT) \ -- for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\ -+ for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\ - {\ - for(h=0;h>=1;\ - }\ - }\ -- for(;jr>=1;\ -@@ -172,7 +150,7 @@ - #endif - - /* Constant cst_pk */ -- COPY_64bits_variables(c,(const UINT*)pk); -+ LOAD_UINT_ARRAY(c, pk, NB_WORD_EQ) - pk+=NB_BYTES_EQ; - - /* for each row of the quadratic matrix of pk, excepted the last block */ -@@ -186,7 +164,7 @@ - /* for each column of the quadratic matrix of pk */ - - /* xj=xi=1 */ -- XOR_ELEM(c,(const UINT*)pk); -+ XOR_ELEM(c,pk); - pk+=NB_BYTES_EQ; - - xj=xi>>1; -@@ -222,7 +200,7 @@ - /* for each column of the quadratic matrix of pk */ - - /* xj=xi=1 */ -- XOR_ELEM(c,(const UINT*)pk); -+ XOR_ELEM(c,pk); - pk+=NB_BYTES_EQ; - - xj=xi>>1; - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c deleted file mode 100644 index 6b145cd..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c +++ /dev/null @@ -1,45 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c -@@ -109,18 +109,6 @@ - /**************************************************************************/ - - --#if NB_VARr -- #define REM_X \ -- xi=m[i];\ -- for(j=0;j>j)&UINT_1);\ -- } --#else -- #define REM_X --#endif -- -- - - - /* Input: -@@ -139,16 +127,18 @@ - unsigned int i,j,k; - - /* Compute one time all -((xi>>1)&UINT_1) */ -+ i=0; - k=0; -- for(i=0;i>j)&UINT_1); -+ x[k]=(1+~((xi>>j)&UINT_1)); -+ ++k; - } -+ ++i; - } -- REM_X; - - /* Constant cst_pk */ - COPY_64bits_variables(c,pk); - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c deleted file mode 100644 index 0d8ff39..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c +++ /dev/null @@ -1,74 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c -@@ -41,7 +41,15 @@ - i=1; - /* (2^i) < deg does not require modular reduction by f */ - #if(HFEn<33) -- const unsigned int min=(deg<(1U< HFE_odd_degree) j=1; -+ #endif -+ /* Here a_vec = row i */ - a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn; - for(;j HFE_odd_degree) j=1; - #endif -+ /* Here a_vec = row i */ -+ a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn; -+ #if HFEDegJ -+ for(; j>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK;\ - LOOPKINV; - -@@ -49,7 +49,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=UINT_1;\ -- for(ir=0;ir>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\ - }\ - \ -@@ -133,7 +133,6 @@ - LOOPIR(HFEnr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask); - - /* Step 2 */ -- bit_ir=UINT_1<<(HFEnr-1); - LOOPIR_DOWN_TO_UP(HFEnr); - #else - /* To begin to last row */ -@@ -178,16 +177,16 @@ - } - - #define LOOPIR_CST(NB_IT) \ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\ - xorLoadMask1_gf2n(Sinv_cpi,Sinv_cpj,mask);\ - });\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\ - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\ - });\ -@@ -314,7 +313,7 @@ - Sinv_cpj+=NB_WORD_GFqn; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -343,7 +342,7 @@ - Sinv_cpj+=NB_WORD_GFqn; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -381,7 +380,7 @@ - for(j=0;j>6])>>(j&63U))&1U); -+ mask=(1+~(((U[j>>6])>>(j&63U))&1U)); - xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask); - - /* next row */ - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c deleted file mode 100644 index 3c998c6..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c +++ /dev/null @@ -1,94 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c -@@ -28,7 +28,7 @@ - - #define ADDROW(LOOPK,LOOPKINV) \ - /* pivot */\ -- mask=-(((*S_cpj)>>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - LOOPK;\ - LOOPKINV; - -@@ -49,7 +49,7 @@ - - #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \ - bit_ir=UINT_1;\ -- for(ir=0;ir>ir)&1);\ -+ mask=(1+~(((*S_cpj)>>ir)&1));\ - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\ - }\ - \ -@@ -133,7 +133,6 @@ - LOOPIR(HFEnvr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask); - - /* Step 2 */ -- bit_ir=UINT_1<<(HFEnvr-1); - LOOPIR_DOWN_TO_UP(HFEnvr); - #else - /* To begin to last row */ -@@ -178,16 +177,16 @@ - } - - #define LOOPIR_CST(NB_IT) \ -- for(ir=0;ir>ir)&UINT_1)));\ -+ LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\ - LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\ - xorLoadMask1_gf2nv(Sinv_cpi,Sinv_cpj,mask);\ - });\ - \ - /* row j += (pivot_j) * row_i */\ -- LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\ -+ LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\ - LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\ - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\ - });\ -@@ -315,7 +314,7 @@ - Sinv_cpj+=NB_WORD_GF2nv; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -344,7 +343,7 @@ - Sinv_cpj+=NB_WORD_GF2nv; - L_cpj+=(j>>6)+1; - -- mask=(-(((*L_cpj)>>ir)&UINT_1)); -+ mask=(1+~(((*L_cpj)>>ir)&UINT_1)); - for(k=0;k<=iq;++k) - { - XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask); -@@ -382,7 +381,7 @@ - for(j=0;j>6])>>(j&63U))&1U); -+ mask=(1+~(((U[j>>6])>>(j&63U))&1U)); - xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask); - - /* next row */ - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c deleted file mode 100644 index 9058654..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c +++ /dev/null @@ -1,20 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c -@@ -51,14 +51,13 @@ - - for(i=0;i>(i<<3); -+ pk[i]=(uint8_t)(pk_last[NB_WORD_GF2m-1]>>(i<<3)); - } - #endif - } - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c deleted file mode 100644 index 3c33c47..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c +++ /dev/null @@ -1,213 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/mul_gf2n.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/mul_gf2n.c -@@ -28,7 +28,7 @@ - for(i=0;i>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A<>j)&ONE64); -+ mask_B=(1+~(((*B)>>j)&ONE64)); - /* k=0 */ - tmp_A=(*A)&mask_B; - C[0]^=tmp_A< -+#include - #include - - #include "sign_keypairHFE.h" -@@ -15,11 +14,7 @@ - * @param[out] sk The secret-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP --int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) --#else --int PREFIX_NAME(crypto_sign_keypair)(unsigned char *pk, unsigned char *sk) --#endif -+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) - { - return sign_keypairHFE(pk,(UINT*)sk); - } -@@ -34,18 +29,14 @@ - * @param[in] sk The secret-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP - int crypto_sign( --#else --int PREFIX_NAME(crypto_sign)( --#endif -- unsigned char *sm, unsigned long long *smlen, -- const unsigned char *m, unsigned long long mlen, -- const unsigned char *sk) -+ uint8_t *sm, size_t *smlen, -+ const uint8_t *m, size_t mlen, -+ const uint8_t *sk) - { - *smlen=mlen+CRYPTO_BYTES; -- memcpy(sm+CRYPTO_BYTES,m,(size_t)mlen); -- return signHFE(sm,m,(size_t)mlen,(UINT*)sk); -+ memmove(sm+CRYPTO_BYTES,m,mlen); -+ return signHFE(sm,m,mlen,(UINT*)sk); - } - - -@@ -58,21 +49,35 @@ - * @param[in] pk The public-key. - * @return Zero if the function runs correctly, non-zero else. - */ --#if SUPERCOP - int crypto_sign_open( --#else --int PREFIX_NAME(crypto_sign_open)( --#endif -- unsigned char *m, unsigned long long *mlen, -- const unsigned char *sm, unsigned long long smlen, -- const unsigned char *pk) -+ uint8_t *m, size_t *mlen, -+ const uint8_t *sm, size_t smlen, -+ const uint8_t *pk) - { - int result; - *mlen=smlen-CRYPTO_BYTES; - result=sign_openHFE(sm+CRYPTO_BYTES,(size_t)(*mlen),sm,pk); - /* For compatibily with SUPERCOP, the memcpy is done only after sign_open */ -- memcpy(m,sm+CRYPTO_BYTES,(size_t)(*mlen)); -+ memmove(m,sm+CRYPTO_BYTES,(size_t)(*mlen)); - return result; - } - - -+int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) -+{ -+ int result; -+ *siglen = CRYPTO_BYTES; -+ result = signHFE(sig,m,mlen,(UINT*)sk); -+ return result; -+} -+ -+int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) -+{ -+ int result; -+ if (siglen == CRYPTO_BYTES) { -+ result = sign_openHFE(m,mlen,sig,pk); -+ } else { -+ result = -1; -+ } -+ return result; -+} - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c deleted file mode 100644 index 57c3fcd..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c +++ /dev/null @@ -1,220 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/signHFE.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/signHFE.c -@@ -83,8 +83,10 @@ - for(k1=1;k1sk_uncomp,UINT*,NB_UINT_HFEVPOLY -- +(LTRIANGULAR_NV_SIZE<<1) -- +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t -- +MATRIXnv_SIZE+MATRIXn_SIZE,sizeof(UINT)); - expandSeed((uint8_t*)(sk_HFE->sk_uncomp),(NB_UINT_HFEVPOLY - +(LTRIANGULAR_NV_SIZE<<1) - +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3, -@@ -313,7 +311,8 @@ - { - #if HFEv - cst_sparse_monic_gf2nx F_HFEv; -- UINT* F; -+ sparse_monic_gf2nx F; -+ sparse_monic_gf2nx F_cp; - unsigned int i; - #endif - -@@ -333,13 +332,10 @@ - #endif - - #if HFEv -+ F=sk_HFE->F_struct.poly; - F_HFEv=sk_HFE->F_HFEv; - -- ALIGNED_GFqn_MALLOC(F,UINT*,NB_UINT_HFEPOLY,sizeof(UINT)); -- VERIFY_ALLOC_RET(F); -- - #if (HFEDeg>1) -- UINT *F_cp; - unsigned int j; - - /* X^(2^0) */ -@@ -351,11 +347,11 @@ - { - /* Copy i quadratic terms */ - -+ j=0; - #if ENABLED_REMOVE_ODD_DEGREE -- for(j=(((1U<HFE_odd_degree) ++j; - #endif -+ for(;jHFE_odd_degree) ++j; - #endif -+ for(;jF_struct.poly=F; - #else - sk_HFE->F_struct.poly=sk_HFE->F_HFEv; - #endif -@@ -447,7 +442,7 @@ - #endif - - int nb_root; -- secret_key_HFE sk_HFE; -+ secret_key_HFE sk_HFE={0}; - - #if(HFEv) - UINT* F; -@@ -666,9 +661,6 @@ - if(nb_root<0) - { - /* Error from chooseRootHFE */ -- #if HFEv -- ALIGNED_GFqn_FREE(F); -- #endif - return nb_root; - } - -@@ -677,7 +669,7 @@ - /* Add the v bits to DR */ - #if HFEnr - DR[NB_WORD_GFqn-1]^=V[0]<>(64-HFEnr))^(V[i+1]<>(64-HFEnr); - #endif - #else -- for(i=0;i>(64-HFEnr))^(V[i+1]<>(64-HFEnr); - #endif - #else -- for(i=0;i1)) - uint64_t val; -@@ -578,10 +576,6 @@ - #endif - - -- #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP&&HFEmr8) -- free(pk_tmp); -- #endif -- - return ret; - } - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c deleted file mode 100644 index 2b5fa23..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c +++ /dev/null @@ -1,223 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/sqr_gf2n.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/sqr_gf2n.c -@@ -78,44 +78,6 @@ - /***********************************************************************/ - /***********************************************************************/ - -- --/* When rem is a macro */ --#if (K3!=1) -- #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \ -- name_function\ -- {\ -- uint64_t res_mul,Q,R;\ -- mul_function;\ -- rem_function;\ -- } --#else -- #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \ -- name_function\ -- {\ -- uint64_t res_mul,Q;\ -- mul_function;\ -- rem_function;\ -- } --#endif -- --#define MUL_MOD_MACRO64(name_function,mul_function,rem_function,size) \ -- name_function\ -- {\ -- uint64_t res_mul[size],Q,R;\ -- mul_function;\ -- rem_function;\ -- } -- --/* HFEn == 64 */ --#define MUL_MOD_MACRO64_K64(name_function,mul_function,rem_function,size) \ -- name_function\ -- {\ -- uint64_t res_mul[size],R;\ -- mul_function;\ -- rem_function;\ -- } -- -- - #if HFEnr - #define INIT_Q(size2) uint64_t Q[size2]; - #else -@@ -123,81 +85,108 @@ - #define INIT_Q(size2) - #endif - --#if ((HFEn==312)||(HFEn==402)||(HFEn==544)) -- #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \ -- name_function\ -- {\ -- uint64_t res_mul[size];\ -- INIT_Q(size2);\ -- mul_function;\ -- rem_function;\ -+#if (REM_MACRO) -+ #if (NB_WORD_GFqn!=1) -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],R; -+ INIT_Q(NB_WORD_GFqn); -+ sqr_gf2x(res_mul,A); -+ REM_GF2N(res,res_mul,Q,R); - } --#else -- #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \ -- name_function\ -- {\ -- uint64_t res_mul[size],R;\ -- INIT_Q(size2);\ -- mul_function;\ -- rem_function;\ -+ -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],R; -+ INIT_Q(NB_WORD_GFqn); -+ sqr_nocst_gf2x(res_mul,A); -+ REM_GF2N(res,res_mul,Q,R); -+ } -+ #elif (HFEn<33) -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul,Q,R; -+ sqr_gf2x(&res_mul,A); -+ REM_GF2N(*res,res_mul,Q,R); - } --#endif - --/* When rem is a function */ --#define MUL_MOD_FUNCTION32(name_function,mul_function,rem_function) \ -- name_function\ -- {\ -- uint64_t res_mul;\ -- mul_function;\ -- rem_function;\ -- } -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul,Q,R; -+ sqr_nocst_gf2x(&res_mul,A); -+ REM_GF2N(*res,res_mul,Q,R); -+ } -+ #elif HFEnr -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],Q,R; -+ sqr_gf2x(res_mul,A); -+ REM_GF2N(*res,res_mul,Q,R); -+ } - --#define MUL_MOD_FUNCTION(name_function,mul_function,rem_function,size) \ -- name_function\ -- {\ -- uint64_t res_mul[size];\ -- mul_function;\ -- rem_function;\ -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],Q,R; -+ sqr_nocst_gf2x(res_mul,A); -+ REM_GF2N(*res,res_mul,Q,R); -+ } -+ #else -+/* HFEn == 64 */ -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],R; -+ sqr_gf2x(res_mul,A); -+ REM_GF2N(*res,res_mul,,R); - } - --#if (REM_MACRO) -- #if (NB_WORD_GFqn!=1) -- MUL_MOD_MACRO(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A), -- REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn); -- MUL_MOD_MACRO(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A), -- REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn); -- #elif (HFEn<33) -- MUL_MOD_MACRO32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A), -- REM_GF2N(*res,res_mul,Q,R)); -- MUL_MOD_MACRO32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A), -- REM_GF2N(*res,res_mul,Q,R)); -- #elif HFEnr -- MUL_MOD_MACRO64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A), -- REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL); -- MUL_MOD_MACRO64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A), -- REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL); -- #else -- /* HFEn == 64 */ -- MUL_MOD_MACRO64_K64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A), -- REM_GF2N(*res,res_mul,,R),NB_WORD_MUL); -- MUL_MOD_MACRO64_K64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A), -- REM_GF2N(*res,res_mul,,R),NB_WORD_MUL); -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL],R; -+ sqr_nocst_gf2x(res_mul,A); -+ REM_GF2N(*res,res_mul,,R); -+ } - #endif - #elif (NB_WORD_GFqn!=1) -- MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A), -- remsqr_gf2n(res,res_mul),NB_WORD_MUL); -- MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A), -- remsqr_gf2n(res,res_mul),NB_WORD_MUL); -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL]; -+ sqr_gf2x(res_mul,A); -+ remsqr_gf2n(res,res_mul); -+ } -+ -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL]; -+ sqr_nocst_gf2x(res_mul,A); -+ remsqr_gf2n(res,res_mul); -+ } - #elif (HFEn<33) -- MUL_MOD_FUNCTION32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A), -- remsqr_gf2n(res,&res_mul)); -- MUL_MOD_FUNCTION32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A), -- remsqr_gf2n(res,&res_mul)); -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul; -+ sqr_gf2x(&res_mul,A); -+ remsqr_gf2n(res,&res_mul); -+ } -+ -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul; -+ sqr_nocst_gf2x(&res_mul,A); -+ remsqr_gf2n(res,&res_mul); -+ } - #else -- MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A), -- remsqr_gf2n(res,res_mul),NB_WORD_MUL); -- MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A), -- remsqr_gf2n(res,res_mul),NB_WORD_MUL); --#endif -+ void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL]; -+ sqr_gf2x(res_mul,A); -+ remsqr_gf2n(res,res_mul); -+ } - -+ void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]) -+ { -+ uint64_t res_mul[NB_WORD_MUL]; -+ sqr_nocst_gf2x(res_mul,A); -+ remsqr_gf2n(res,res_mul); -+ } -+#endif - - diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c deleted file mode 100644 index 34feb72..0000000 --- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c +++ /dev/null @@ -1,258 +0,0 @@ ---- upstream/Reference_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c -+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c -@@ -9,11 +9,11 @@ - - /* for a block of bits of vec */ - #define LOOPIR_M(NB_IT) \ -- for(ir=0;ir>6;iq>ir;\ -- LOOPIR_START(NB_BITS_UINT);\ -- ir=0;\ -- }\ -- /* the last block */\ -- REM_START(LOOPIR_START);\ -+void PREFIX_NAME(vecMatProductv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) { -+ cst_Mn_gf2 S_cp; -+ UINT bit_ir, vec_ir; -+ unsigned int iq,ir; -+ -+ /* initialization of res to 0 */ -+ set0_gf2n(res); -+ -+ S_cp=S; -+ /* for each bit of vec excepted the last block */ -+ iq = 0; -+ ir = 0; -+ while(NB_BITS_UINT*iq + ir < HFEv) -+ { -+ bit_ir = vec[iq]; -+ for(ir=0;(NB_BITS_UINT*iq + ir < HFEv) && (ir>=1; -+ } -+ ++iq; -+ } -+} -+ -+void PREFIX_NAME(vecMatProductn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) { -+ cst_Mn_gf2 S_cp; -+ UINT bit_ir, vec_ir; -+ unsigned int iq,ir; -+ -+ /* initialization of res to 0 */ -+ set0_gf2n(res); -+ -+ S_cp=S; -+ /* for each bit of vec excepted the last block */ -+ for(iq=0;iq>6;iq>ir; -+ LOOPIR_START_N(NB_BITS_UINT); -+ ir=0; -+ } -+ /* the last block */ -+ REM_START_NV(LOOPIR_START_N); - } - - --VECMATPROD_START(PREFIX_NAME(vecMatProductnvn_start_64),set0_gf2n, -- LOOPIR_START_N,REM_START_NV,HFEnvq) --/* --VECMATPROD_START(PREFIX_NAME(vecMatProductn_start_64),set0_gf2n, -- LOOPIR_START_N,REM_START_N,HFEnq) --*/ - - - diff --git a/gemss/update_patches.sh b/gemss/update_patches.sh deleted file mode 100755 index 76a4d36..0000000 --- a/gemss/update_patches.sh +++ /dev/null @@ -1,47 +0,0 @@ -PYTHON=/usr/bin/python3 - -BASE=`dirname $0` -BASE=`cd ${BASE} && pwd` -cd ${BASE} - -PATCHES=${BASE}/patches - -VERSION=$(cat ${BASE}/VERSION) -V1=upstream -V2=upstream-patched - -if [ ! -e "${V1}" ] -then - echo "${BASE}/${V1} not found" - exit -fi - -if [ ! -e "${V2}" ] -then - echo "${BASE}/${V2} not found" - exit -fi - -rm -rf ${PATCHES} -mkdir -p ${PATCHES}/tmp/ - -diff -ru --no-dereference ${V1} ${V2} > ${PATCHES}/tmp/p - -( cd ${PATCHES}/tmp/ - splitpatch ${PATCHES}/tmp/p - rm ${PATCHES}/tmp/p - for X in * - do - Y=$(echo ${X} \ - | head -n 1 ${X} \ - | tr '\t' ' ' \ - | cut -d ' ' -f 2 \ - | cut -d'/' -f 2- \ - | tr '/' '_') - mv ${X} ${Y} - sed -i '1,2 s/\t.*$//' ${Y} - sed -i '$ s/diff.*//' ${Y} - done -) -mv ${PATCHES}/tmp/* ${PATCHES} -rm -rf ${PATCHES}/tmp/