From 044f5c5345a78dd19ce678c99fd11552c69b1eef Mon Sep 17 00:00:00 2001
From: "John M. Schanck" <jschanck@uwaterloo.ca>
Date: Sat, 23 Jan 2021 15:40:05 -0500
Subject: [PATCH] Removing GeMSS

---
 gemss/VERSION                                 |    1 -
 gemss/checkout.sh                             |   67 -
 gemss/meta/crypto_sign_gemss-128_META.yml     |    8 -
 gemss/meta/crypto_sign_gemss-192_META.yml     |    8 -
 gemss/meta/crypto_sign_gemss-256_META.yml     |    8 -
 .../meta/crypto_sign_gemss-blue-128_META.yml  |    8 -
 .../meta/crypto_sign_gemss-blue-192_META.yml  |    8 -
 .../meta/crypto_sign_gemss-blue-256_META.yml  |    8 -
 gemss/meta/crypto_sign_gemss-red-128_META.yml |    8 -
 gemss/meta/crypto_sign_gemss-red-192_META.yml |    8 -
 gemss/meta/crypto_sign_gemss-red-256_META.yml |    8 -
 gemss/package.sh                              |  661 ---
 ...entation_sign_GeMSS128_include_add_gf2nx.h |   30 -
 ...mplementation_sign_GeMSS128_include_arch.h |  192 -
 ...Implementation_sign_GeMSS128_include_bit.h |  383 --
 ...ign_GeMSS128_include_chooseRootHFE_gf2nx.h |   12 -
 ...ntation_sign_GeMSS128_include_conv_gf2nx.h |   12 -
 ...entation_sign_GeMSS128_include_div_gf2nx.h |   41 -
 ...ion_sign_GeMSS128_include_dotProduct_gf2.h |   77 -
 ...on_sign_GeMSS128_include_findRoots_gf2nx.h |   22 -
 ...sign_GeMSS128_include_frobeniusMap_gf2nx.h |   21 -
 ...plementation_sign_GeMSS128_include_gf2nx.h |   12 -
 ...mplementation_sign_GeMSS128_include_hash.h |  346 --
 ...mplementation_sign_GeMSS128_include_init.h |   32 -
 ...plementation_sign_GeMSS128_include_macro.h |   33 -
 ...mentation_sign_GeMSS128_include_mul_gf2n.h |   16 -
 ...mentation_sign_GeMSS128_include_mul_gf2x.h | 4798 -----------------
 ...ion_sign_GeMSS128_include_parameters_HFE.h |   13 -
 ...mentation_sign_GeMSS128_include_rem_gf2n.h |  202 -
 ...ementation_sign_GeMSS128_include_signHFE.h |   23 -
 ...ntation_sign_GeMSS128_include_simd_intel.h |  342 --
 ...mentation_sign_GeMSS128_include_sqr_gf2n.h |   20 -
 ...mentation_sign_GeMSS128_include_sqr_gf2x.h |  372 --
 ...ntation_sign_GeMSS128_include_tools_gf2m.h |   42 -
 ...ntation_sign_GeMSS128_include_tools_gf2n.h |   52 -
 ...tation_sign_GeMSS128_include_tools_gf2nv.h |   40 -
 ...ntation_sign_GeMSS128_include_tools_gf2v.h |   26 -
 ...sign_GeMSS128_src_changeVariablesMQS_gf2.c |   89 -
 ...on_sign_GeMSS128_src_chooseRootHFE_gf2nx.c |  141 -
 ...ementation_sign_GeMSS128_src_convMQS_gf2.c |   40 -
 ...lementation_sign_GeMSS128_src_convMQ_gf2.c |  405 --
 ...lementation_sign_GeMSS128_src_conv_gf2nx.c |   12 -
 ...ation_sign_GeMSS128_src_determinantn_gf2.c |   66 -
 ...tion_sign_GeMSS128_src_determinantnv_gf2.c |   66 -
 ...plementation_sign_GeMSS128_src_div_gf2nx.c |   62 -
 ...tion_sign_GeMSS128_src_evalMQSnocst8_gf2.c |   39 -
 ..._sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c |  129 -
 ...mentation_sign_GeMSS128_src_evalMQSv_gf2.c |   68 -
 ...n_sign_GeMSS128_src_findRootsSplit_gf2nx.c |   74 -
 ...tation_sign_GeMSS128_src_findRoots_gf2nx.c |  142 -
 ...ion_sign_GeMSS128_src_frobeniusMap_gf2nx.c |   84 -
 ...ation_sign_GeMSS128_src_genSecretMQS_gf2.c |  415 --
 ...ation_sign_GeMSS128_src_initMatrixId_gf2.c |  140 -
 ...ntation_sign_GeMSS128_src_invMatrixn_gf2.c |   94 -
 ...tation_sign_GeMSS128_src_invMatrixnv_gf2.c |   94 -
 ...mplementation_sign_GeMSS128_src_inv_gf2n.c |   28 -
 ...on_sign_GeMSS128_src_mixEquationsMQS_gf2.c |   20 -
 ...mplementation_sign_GeMSS128_src_mul_gf2n.c |  213 -
 ...mplementation_sign_GeMSS128_src_mul_gf2x.c |  211 -
 ...plementation_sign_GeMSS128_src_predicate.c |   57 -
 ...ntation_sign_GeMSS128_src_randMatrix_gf2.c |  397 --
 ...mplementation_sign_GeMSS128_src_rem_gf2n.c |    9 -
 ...ed_Implementation_sign_GeMSS128_src_sign.c |   94 -
 ...Implementation_sign_GeMSS128_src_signHFE.c |  220 -
 ...tation_sign_GeMSS128_src_sign_keypairHFE.c |  199 -
 ...mentation_sign_GeMSS128_src_sign_openHFE.c |   41 -
 ...plementation_sign_GeMSS128_src_sort_gf2n.c |   37 -
 ...mplementation_sign_GeMSS128_src_sqr_gf2n.c |  234 -
 ...tion_sign_GeMSS128_src_vecMatProduct_gf2.c |  261 -
 ...entation_sign_GeMSS128_include_add_gf2nx.h |   30 -
 ...mplementation_sign_GeMSS128_include_arch.h |  131 -
 ...Implementation_sign_GeMSS128_include_bit.h |  322 --
 ...ign_GeMSS128_include_chooseRootHFE_gf2nx.h |   12 -
 ...ntation_sign_GeMSS128_include_conv_gf2nx.h |   12 -
 ...entation_sign_GeMSS128_include_div_gf2nx.h |   41 -
 ...ion_sign_GeMSS128_include_dotProduct_gf2.h |   77 -
 ...on_sign_GeMSS128_include_findRoots_gf2nx.h |   22 -
 ...sign_GeMSS128_include_frobeniusMap_gf2nx.h |   12 -
 ...plementation_sign_GeMSS128_include_gf2nx.h |   12 -
 ...mplementation_sign_GeMSS128_include_hash.h |  346 --
 ...mplementation_sign_GeMSS128_include_init.h |   32 -
 ...plementation_sign_GeMSS128_include_macro.h |   33 -
 ...mentation_sign_GeMSS128_include_mul_gf2n.h |   16 -
 ...ion_sign_GeMSS128_include_parameters_HFE.h |   13 -
 ...ementation_sign_GeMSS128_include_signHFE.h |   23 -
 ...mentation_sign_GeMSS128_include_sqr_gf2n.h |   20 -
 ...ntation_sign_GeMSS128_include_tools_gf2m.h |   42 -
 ...ntation_sign_GeMSS128_include_tools_gf2n.h |   52 -
 ...tation_sign_GeMSS128_include_tools_gf2nv.h |   40 -
 ...ntation_sign_GeMSS128_include_tools_gf2v.h |   26 -
 ...sign_GeMSS128_src_changeVariablesMQS_gf2.c |   33 -
 ...on_sign_GeMSS128_src_chooseRootHFE_gf2nx.c |  141 -
 ...ementation_sign_GeMSS128_src_convMQS_gf2.c |   40 -
 ...lementation_sign_GeMSS128_src_convMQ_gf2.c |  405 --
 ...lementation_sign_GeMSS128_src_conv_gf2nx.c |   12 -
 ...ation_sign_GeMSS128_src_determinantn_gf2.c |   66 -
 ...tion_sign_GeMSS128_src_determinantnv_gf2.c |   66 -
 ...plementation_sign_GeMSS128_src_div_gf2nx.c |   62 -
 ...tion_sign_GeMSS128_src_evalMQSnocst8_gf2.c |   39 -
 ..._sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c |  129 -
 ...mentation_sign_GeMSS128_src_evalMQSv_gf2.c |   45 -
 ...n_sign_GeMSS128_src_findRootsSplit_gf2nx.c |   74 -
 ...tation_sign_GeMSS128_src_findRoots_gf2nx.c |  142 -
 ...ion_sign_GeMSS128_src_frobeniusMap_gf2nx.c |   21 -
 ...ation_sign_GeMSS128_src_genSecretMQS_gf2.c |  322 --
 ...ation_sign_GeMSS128_src_initMatrixId_gf2.c |  140 -
 ...ntation_sign_GeMSS128_src_invMatrixn_gf2.c |   94 -
 ...tation_sign_GeMSS128_src_invMatrixnv_gf2.c |   94 -
 ...on_sign_GeMSS128_src_mixEquationsMQS_gf2.c |   20 -
 ...mplementation_sign_GeMSS128_src_mul_gf2n.c |  213 -
 ...plementation_sign_GeMSS128_src_predicate.c |   57 -
 ...ntation_sign_GeMSS128_src_randMatrix_gf2.c |  397 --
 ...ce_Implementation_sign_GeMSS128_src_sign.c |   94 -
 ...Implementation_sign_GeMSS128_src_signHFE.c |  220 -
 ...tation_sign_GeMSS128_src_sign_keypairHFE.c |  200 -
 ...mentation_sign_GeMSS128_src_sign_openHFE.c |   41 -
 ...mplementation_sign_GeMSS128_src_sqr_gf2n.c |  223 -
 ...tion_sign_GeMSS128_src_vecMatProduct_gf2.c |  258 -
 gemss/update_patches.sh                       |   47 -
 119 files changed, 17150 deletions(-)
 delete mode 100644 gemss/VERSION
 delete mode 100755 gemss/checkout.sh
 delete mode 100644 gemss/meta/crypto_sign_gemss-128_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-192_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-256_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-blue-128_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-blue-192_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-blue-256_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-red-128_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-red-192_META.yml
 delete mode 100644 gemss/meta/crypto_sign_gemss-red-256_META.yml
 delete mode 100755 gemss/package.sh
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2nv.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2v.h
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQ_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_conv_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantn_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2x.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_predicate.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_rem_gf2n.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_openHFE.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sqr_gf2n.c
 delete mode 100644 gemss/patches/Optimized_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2nv.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2v.h
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQ_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_conv_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantn_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_predicate.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_openHFE.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c
 delete mode 100644 gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
 delete mode 100755 gemss/update_patches.sh

diff --git a/gemss/VERSION b/gemss/VERSION
deleted file mode 100644
index 4078569..0000000
--- a/gemss/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-GeMSS-Round2_V2.a
diff --git a/gemss/checkout.sh b/gemss/checkout.sh
deleted file mode 100755
index 890fb64..0000000
--- a/gemss/checkout.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-PYTHON=/usr/bin/python3
-
-BASE=`dirname $0`
-BASE=`cd ${BASE} && pwd`
-
-VERSION=$(cat ${BASE}/VERSION)
-V1=upstream
-V2=upstream-patched
-
-ARCHIVE=${VERSION/.a/.zip}
-
-PATCHES=${BASE}/patches
-SCRIPTS=${BASE}/scripts
-
-cd ${BASE}
-
-if [ -e "${V1}" ]
-then
-  read -p "${V1} directory already exists. Delete it? " yn
-  if [ "${yn:-n}" != "y" ]
-  then
-    exit -1
-  fi
-  rm -rf ${V1}
-fi
-
-if [ -e "${V2}" ]
-then
-  read -p "${V2} directory already exists. Delete it? " yn
-  if [ "${yn:-n}" != "y" ]
-  then
-    exit -1
-  fi
-  rm -rf ${V2}
-fi
-
-if [ ! -f ${BASE}/${ARCHIVE} ]
-then
-  wget -P ${BASE} http://www-polsys.lip6.fr/~ryckeghem/packages/${ARCHIVE}
-fi
-unzip -qq -d ${BASE} ${BASE}/${ARCHIVE}
-
-mv ${VERSION} ${V1}
-mkdir -p ${V2}
-
-
-# De-duplicate files with symlinks. All patches are applied to
-# Reference_Implementation/GeMSS-128 and Optimized_Implementation/GeMSS-128
-
-( cd ${V1}/Reference_Implementation/sign/
-for X in */*/*
-do
-  IFS=/ read PARAM SUBDIR FILE <<< $X
-  if [ ${PARAM} == 'GeMSS128' ]; then continue; fi 
-  if [ ${FILE} == 'choice_crypto.h' ]; then continue; fi 
-  ln -sf ../../GeMSS128/${SUBDIR}/${FILE} ${PARAM}/${SUBDIR}/${FILE}
-done
-)
-
-cp -rp ${V1}/* ${V2}
-
-( cd ${V2}
-for X in ${PATCHES}/*
-do
-  patch -p1 < ${X}
-done
-)
diff --git a/gemss/meta/crypto_sign_gemss-128_META.yml b/gemss/meta/crypto_sign_gemss-128_META.yml
deleted file mode 100644
index 23df5e3..0000000
--- a/gemss/meta/crypto_sign_gemss-128_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-128
-type: signature
-claimed-nist-level: 1
-length-public-key: 352188
-length-secret-key: 16
-length-signature: 33
-nistkat-sha256: a60bb56ac878e85fd716ed5c8a003cb44034410509ae9140ddb4fa8c4ce248bb
-testvectors-sha256: b207250643b2e76732f99aa91f06129f17cfa26567d127d432fb759179f93953
diff --git a/gemss/meta/crypto_sign_gemss-192_META.yml b/gemss/meta/crypto_sign_gemss-192_META.yml
deleted file mode 100644
index 5aa8035..0000000
--- a/gemss/meta/crypto_sign_gemss-192_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-192
-type: signature
-claimed-nist-level: 3
-length-public-key: 1237964
-length-secret-key: 24
-length-signature: 52
-nistkat-sha256: 47c4ad0a28de204c77d44c85e9e578689a3a7c490c9d3d3bdbc544cb7bc087b2
-testvectors-sha256: cb5ffcd708f16700e2a46355b0d6121096ff785ddc5143d9a603441428ee3049
diff --git a/gemss/meta/crypto_sign_gemss-256_META.yml b/gemss/meta/crypto_sign_gemss-256_META.yml
deleted file mode 100644
index e423e1c..0000000
--- a/gemss/meta/crypto_sign_gemss-256_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-256
-type: signature
-claimed-nist-level: 5
-length-public-key: 3040700
-length-secret-key: 32
-length-signature: 72
-nistkat-sha256: ac118f8b8e554be7b0a3df2e541a8dc6e42324a44d519ab69cb125068f64333b
-testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
diff --git a/gemss/meta/crypto_sign_gemss-blue-128_META.yml b/gemss/meta/crypto_sign_gemss-blue-128_META.yml
deleted file mode 100644
index 11f4a08..0000000
--- a/gemss/meta/crypto_sign_gemss-blue-128_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-blue-128
-type: signature
-claimed-nist-level: 1
-length-public-key: 363609
-length-secret-key: 16
-length-signature: 34
-nistkat-sha256: 728f19ede55490cfa7d34de2e645a96a39dca2dda38d712dfb77fa5142c85ad8
-testvectors-sha256: 8e62484fd253f76fb0d90d0819fd7ba46ec17ca9d7afb6adedd514fe7d1456db
diff --git a/gemss/meta/crypto_sign_gemss-blue-192_META.yml b/gemss/meta/crypto_sign_gemss-blue-192_META.yml
deleted file mode 100644
index 272164c..0000000
--- a/gemss/meta/crypto_sign_gemss-blue-192_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-blue-192
-type: signature
-claimed-nist-level: 3
-length-public-key: 1264117
-length-secret-key: 24
-length-signature: 53
-nistkat-sha256: 4832164471d1a7ea4d73c65c83d256b2ec402c4110838fc564cde2b3f05472d8
-testvectors-sha256: 1e738fa80841ac0386ce47b275ec482d4c881093105ac9bd78b60cbbd9eba314
diff --git a/gemss/meta/crypto_sign_gemss-blue-256_META.yml b/gemss/meta/crypto_sign_gemss-blue-256_META.yml
deleted file mode 100644
index 4f35f86..0000000
--- a/gemss/meta/crypto_sign_gemss-blue-256_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-blue-256
-type: signature
-claimed-nist-level: 5
-length-public-key: 3087963
-length-secret-key: 32
-length-signature: 74
-nistkat-sha256: ae7777b16435db97b3b7d8e5bf86e321ba2c44b8841a7042ea43ea77b5a6fc87
-testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
diff --git a/gemss/meta/crypto_sign_gemss-red-128_META.yml b/gemss/meta/crypto_sign_gemss-red-128_META.yml
deleted file mode 100644
index 72b90ef..0000000
--- a/gemss/meta/crypto_sign_gemss-red-128_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-red-128
-type: signature
-claimed-nist-level: 1
-length-public-key: 375213
-length-secret-key: 16
-length-signature: 36
-nistkat-sha256: adfd43ea1924dbe539aed44da0e216587afecd5feeff0c82a67abb30671160ca
-testvectors-sha256: 4244f2dde66719767fa3ff7cb1dc2fbf9de738edef9c4d474feba70ad6b07cce
diff --git a/gemss/meta/crypto_sign_gemss-red-192_META.yml b/gemss/meta/crypto_sign_gemss-red-192_META.yml
deleted file mode 100644
index 5e964ad..0000000
--- a/gemss/meta/crypto_sign_gemss-red-192_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-red-192
-type: signature
-claimed-nist-level: 3
-length-public-key: 1290543
-length-secret-key: 24
-length-signature: 55
-nistkat-sha256: a3edfab4e1387318d720589d7173a5df5221b611d4aac70ac1f9c41e31da2223
-testvectors-sha256: b6b18406f3be5e845454ce73ab6d11221cf21b18ec098db16d6e227d3a7a6185
diff --git a/gemss/meta/crypto_sign_gemss-red-256_META.yml b/gemss/meta/crypto_sign_gemss-red-256_META.yml
deleted file mode 100644
index 97c3408..0000000
--- a/gemss/meta/crypto_sign_gemss-red-256_META.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: gemss-red-256
-type: signature
-claimed-nist-level: 5
-length-public-key: 3135591
-length-secret-key: 32
-length-signature: 75
-nistkat-sha256: 624ec7629d3a835fc86a2f4cc1cb2386666b19c0f0c3df9b06f7006edb10726b
-testvectors-sha256: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
diff --git a/gemss/package.sh b/gemss/package.sh
deleted file mode 100755
index ce975d9..0000000
--- a/gemss/package.sh
+++ /dev/null
@@ -1,661 +0,0 @@
-#!/bin/bash
-
-BASE=`dirname $0`
-BASE=`cd ${BASE} && pwd`
-cd ${BASE}
-
-VERSION=$(cat ${BASE}/VERSION)
-ARCHIVE=${VERSION/.a/.zip}
-
-BUILD=${BASE}/build
-BUILD_UPSTREAM=${BUILD}/upstream
-BUILD_CRYPTO_SIGN=${BUILD}/crypto_sign
-BUILD_TEST=${BUILD}/test
-BUILD_DFILE=${BUILD}/dfile
-
-SCRIPTS=${BASE}/scripts
-
-# some useful sed arguments
-GRAB=( -e '/\#define/!d' -e '/\#define _.*_H$/d' -e 's/\(define .*\)U$/\1/' )
-STRAIGHTEN_IF=( -e :a -e '/#if.*\\$/N; s/\\\n//; ta' )
-STRAIGHTEN_DEF=( -e :a -e '/#define.*\\$/N; s/\\\n//; ta' )
-
-set -eu
-
-function task {
-  echo -e "[ ]" $1
-}
-
-function endtask {
-  echo -e "\e[1A[x]"
-}
-
-function cleanup {
-  rm -rf ${BUILD_UPSTREAM}
-}
-trap cleanup EXIT
-
-if [ -e "${BUILD_CRYPTO_SIGN}" ]
-then
-  read -p "${BUILD_CRYPTO_SIGN} directory already exists. Delete it? " yn
-  if [ "${yn:-n}" != "y" ]
-  then
-    exit -1
-  fi
-  rm -rf ${BUILD_CRYPTO_SIGN} ${BUILD_TEST}
-fi
-mkdir -p ${BUILD_CRYPTO_SIGN} ${BUILD_TEST} ${BUILD_DFILE}
-
-if [ ! -f ${BASE}/${ARCHIVE} ]
-then
-  wget -P ${BASE} http://www-polsys.lip6.fr/~ryckeghem/packages/${ARCHIVE}
-fi
-
-task "Unpacking ${ARCHIVE}"
-( unzip -qq -d ${BUILD} ${BASE}/${ARCHIVE}
-mv ${BUILD}/${VERSION}* ${BUILD_UPSTREAM}
-cd ${BUILD_UPSTREAM}
-for X in {Ref*,Opt*}/sign/*/*/*
-do
-  IFS=/ read IMPL SIGN PARAM SUB FILE <<< ${X}
-  if [ ${PARAM} == 'GeMSS128' ]; then continue; fi 
-  if [ ${FILE} == 'choice_crypto.h' ]; then continue; fi 
-  ln -sf ../../GeMSS128/${SUB}/${FILE} ${X}
-done )
-endtask
-
-task 'Applying patches to upstream source code'
-( cd ${BUILD_UPSTREAM}
-
-for X in ${BASE}/patches/*
-do
-  patch -s -p1 < ${X}
-done )
-endtask
-
-task 'Copying files'
-( for COLOR in '' 'Blue' 'Red'
-do
-  for SECURITY in 128 192 256
-  do
-    INPARAM=${COLOR}GeMSS${SECURITY}
-    OUTPARAM=gemss-$(echo $COLOR | tr [:upper:] [:lower:])-${SECURITY}
-    OUTPARAM=${OUTPARAM/--/-}
-    mkdir -p ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/{clean,avx2}
-    cp -Lp --no-preserve=mode ${BUILD_UPSTREAM}/Reference_Implementation/sign/${INPARAM}/*/*.{c,h} ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/clean/
-    cp -Lp --no-preserve=mode ${BUILD_UPSTREAM}/Optimized_Implementation/sign/${INPARAM}/*/*.{c,h} ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/avx2/
-    for F in PQCgenKAT_sign.c debug.h KAT_int.c hash.c KAT_int.h prefix_name.h\
-             randombytes_FIPS.c randombytes_FIPS.h randombytes.h rem_gf2x.h rng.c rng.h
-    do
-      rm -f ${BUILD_CRYPTO_SIGN}/${OUTPARAM}/*/$F
-    done
-    sed -i -s '/include "KAT_int.h"/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h
-  done
-done )
-endtask
-
-task 'Removing ifdefs'
-(
-UNIFDEFOPTS="-B -k -m \
--D__x86_64__ -U__cplusplus \
--U__AVX2__ -U__AVX__ -U__PCLMUL__ -U__POPCNT__ -U__SSE2__ -U__SSE4_1__ -U__SSE__ -U__SSSE3__ \
--DENABLED_GF2X=0 \
--UKAT_INT -USUPERCOP -UTEST_LIBRARY \
--UBlueGeMSS -UDualModeMS -UFGeMSS -UGeMSS -UMQSOFT_REF -UQUARTZ -UQUARTZ_V1 -URedGeMSS \
--DENABLED_SEED_SK=1 -DGEN_MINUS_VINEGARS=1 -DITOH -DInnerMode -UUNIQUE_ROOT \
--DNB_BITS_UINT=64"
-
-# All of the compilation decisions are set by choice_crypto.h
-sed -s -i "${STRAIGHTEN_IF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/choice_crypto.h
-unifdef ${UNIFDEFOPTS} -DBlueGeMSS ${BUILD_CRYPTO_SIGN}/gemss-blue-{128,192,256}/*/choice_crypto.h || true
-unifdef ${UNIFDEFOPTS} -DGeMSS ${BUILD_CRYPTO_SIGN}/gemss-{128,192,256}/*/choice_crypto.h || true
-unifdef ${UNIFDEFOPTS} -DRedGeMSS ${BUILD_CRYPTO_SIGN}/gemss-red-{128,192,256}/*/choice_crypto.h || true
-
-# We'll build without libgf2x
-sed -i 's/ENABLED_GF2X 1/ENABLED_GF2X 0/' ${BUILD_CRYPTO_SIGN}/*/*/arch.h
-
-for X in ${BUILD_CRYPTO_SIGN}/*/*/choice_crypto.h
-do
-  ( cd $(dirname ${X})
-  PARAM=$(echo ${X} | awk -F/ '{print $(NF-2)}' )
-  IMPL=$(echo ${X} | awk -F/ '{print $(NF-1)}' )
-  DFILE=${BUILD_DFILE}/${PARAM}
-
-  cat choice_crypto.h | sed "${GRAB[@]}" > ${DFILE}
-
-  if [ ${IMPL} == "avx2" ]
-  then
-    UNIFDEFOPTS="${UNIFDEFOPTS} -DENABLED_AVX2 -DENABLED_SSE -DENABLED_POPCNT -DENABLED_PCLMUL \
-      -DENABLED_PCLMUL_AVX2 -DENABLED_PCLMUL_SSE2 -DENABLED_AVX -DENABLED_SSE2 \
-      -DENABLED_SSE4_1 -DENABLED_SSSE3"
-  else
-    UNIFDEFOPTS="${UNIFDEFOPTS} -UENABLED_SSE -UENABLED_POPCNT -UENABLED_PCLMUL \
-      -UENABLED_PCLMUL_AVX2 -UENABLED_PCLMUL_SSE2 -UENABLED_AVX -UENABLED_SSE2 \
-      -UENABLED_SSE4_1 -UENABLED_SSSE3"
-  fi
-
-  # Remove line breaks in multi-line #if statements.
-  sed -s -i "${STRAIGHTEN_IF[@]}" config_gf2n.h sign_keypairHFE.c rem_gf2n.c rem_gf2n.h
-
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} parameters_HFE.h || true
-  cat parameters_HFE.h | sed "${GRAB[@]}" >> ${DFILE}
-
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} arch.h || true
-  cat arch.h | sed "${STRAIGHTEN_DEF[@]}" | sed "${GRAB[@]}" >> ${DFILE}
-
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} chooseRootHFE_gf2nx.h || true
-  cat chooseRootHFE_gf2nx.h | sed "${GRAB[@]}" >> ${DFILE}
-
-  FILE=config_gf2n.h
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true
-  grep "#define K[123]" ${FILE} | sed "${GRAB[@]}" >> ${DFILE}
-  unifdef ${UNIFDEFOPTS} -UK2 -UK3 -f ${DFILE} ${FILE} || true
-  grep "#define __.*NOMIAL_GF2N__" ${FILE} | sed "${GRAB[@]}" >> ${DFILE}
-  unifdef ${UNIFDEFOPTS} -U__TRINOMIAL_GF2N__ -U__PENTANOMIAL_GF2N__ -f ${DFILE} ${FILE} || true
-  UNIFDEFOPTS="${UNIFDEFOPTS} -UK2 -UK3 -U__TRINOMIAL_GF2N__ -U__PENTANOMIAL_GF2N__"
-
-  FILE=config_HFE.h
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true
-  cat ${FILE} | sed "${GRAB[@]}" >> ${DFILE}
-
-  FILE=frobeniusMap_gf2nx.h
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true
-  grep "#define.*II" ${FILE} | sed "${GRAB[@]}" >> ${DFILE}
-
-  K=$(grep 'define K [0-9]' ${DFILE} | awk '{print $(NF)}')
-  N=$(grep 'define HFEn [0-9]' ${DFILE} | awk '{print $(NF)}')
-  V=$(grep 'define HFEv [0-9]' ${DFILE} | awk '{print $(NF)}')
-  DELTA=$(grep 'define HFEDELTA [0-9]' ${DFILE} | awk '{print $(NF)}')
-  HFEDEG=$(grep 'define HFEDeg [0-9]' ${DFILE} | awk '{print $(NF)}')
-  HFEDEGI=$(grep 'define HFEDegI [0-9]' ${DFILE} | awk '{print $(NF)}')
-  HFEDEGJ=$(grep 'define HFEDegJ [0-9]' ${DFILE} | awk '{print $(NF)}')
-  NB_ITE=$(grep 'define NB_ITE [0-9]' ${DFILE} | awk '{print $(NF)}')
-  II=$(grep 'define II [0-9]' ${DFILE} | awk '{print $(NF)}')
-
-  NR=$((${N}%64))
-  NQ=$((${N}/64))
-  NR8=$((${N}%8))
-  NQ8=$((${N}/8))
-  NB_WORD_MUL=$(((2*(${N}-1))/64+1))
-  NB_WORD_MMUL=$(((2*(${N}-1))/64+1))
-
-  VR=$((${V}%64))
-  VQ=$((${V}/64))
-  VR8=$((${V}%8))
-  VQ8=$((${V}/8))
-
-  M=$((${N}-${DELTA}))
-  MR=$((${M}%64))
-  MQ=$((${M}/64))
-  MR8=$((${M}%8))
-  MQ8=$((${M}/8))
-
-  NV=$((${N}+${V}))
-  NVR=$((${NV}%64))
-  NVQ=$((${NV}/64))
-  NVR8=$((${NV}%8))
-  NVQ8=$((${NV}/8))
-
-  NB_MONOMIAL_PK=$(((${NV}*(${NV}+1))/2+1))
-
-  HFENq=$((${NB_MONOMIAL_PK}/64))
-  HFENq8=$((${NB_MONOMIAL_PK}/8))
-  HFENr=$((${NB_MONOMIAL_PK}%64))
-  HFENr8=$((${NB_MONOMIAL_PK}%8))
-  HFENr8c=$(((8-(${NB_MONOMIAL_PK}%8))%8))
-
-  LOST_BITS=$(((${MR8}-1)*${HFENr8c}))
-
-  NB_WORD_GFqn=${NQ}
-  [ ${NR} -ne 0 ] && ((NB_WORD_GFqn+=1))
-
-  NB_WORD_GFqv=${VQ}
-  [ ${VR} -ne 0 ] && ((NB_WORD_GFqv+=1))
-
-  NB_WORD_GF2nv=${NVQ}
-  [ ${NVR} -ne 0 ] && ((NB_WORD_GF2nv+=1))
-
-  NB_BYTES_GFqm=${MQ8}
-  [ ${MR8} -ne 0 ] && ((NB_BYTES_GFqm+=1))
-
-  NB_WORD_GF2m=${MQ}
-  [ ${MR} -ne 0 ] && ((NB_WORD_GF2m+=1))
-
-  NB_WORD_GF2nvm=$((${NB_WORD_GF2nv}-${NB_WORD_GF2m}))
-  [ ${MR} -ne 0 ] && ((NB_WORD_GF2nvm+=1))
-
-  [ $((${DELTA}+${V})) -lt $((8-${MR8})) ] &&
-    VAL_BITS_M=$((${DELTA}+${V})) ||
-    VAL_BITS_M=$((8-${MR8}))
-
-  echo "#define HFENq ${HFENq}" >> ${DFILE}
-  echo "#define HFENq8 ${HFENq8}" >> ${DFILE}
-  echo "#define HFENr ${HFENr}" >> ${DFILE}
-  echo "#define HFENr8 ${HFENr8}" >> ${DFILE}
-  echo "#define HFENr8c ${HFENr8c}" >> ${DFILE}
-  echo "#define HFEm ${M}" >> ${DFILE}
-  echo "#define HFEmq ${MQ}" >> ${DFILE}
-  echo "#define HFEmq8 ${MQ8}" >> ${DFILE}
-  echo "#define HFEmr ${MR}" >> ${DFILE}
-  echo "#define HFEmr8 ${MR8}" >> ${DFILE}
-  echo "#define HFEn ${N}" >> ${DFILE}
-  echo "#define HFEnr ${NR}" >> ${DFILE}
-  echo "#define HFEnr8 $((${N}%8))" >> ${DFILE}
-  echo "#define HFEnv ${NV}" >> ${DFILE}
-  echo "#define HFEnvr ${NVR}" >> ${DFILE}
-  echo "#define HFEnvr8 ${NVR8}" >> ${DFILE}
-  echo "#define HFEnvrm1 $(((${NV}-1)%64))" >> ${DFILE}
-  echo "#define HFEv ${V}" >> ${DFILE}
-  echo "#define HFEvr ${VR}" >> ${DFILE}
-  echo "#define HFEvr8 ${VR8}" >> ${DFILE}
-  echo "#define KI ${NR}" >> ${DFILE} 
-  echo "#define LAST_ROW_Q $(((${NV}-${LOST_BITS})/64))" >> ${DFILE}
-  echo "#define LAST_ROW_R $(((${NV}-${LOST_BITS})%64))" >> ${DFILE}
-  echo "#define LOST_BITS ${LOST_BITS}" >> ${DFILE}
-  echo "#define NB_MONOMIAL_PK ${NB_MONOMIAL_PK}" >> ${DFILE}
-  echo "#define NB_WHOLE_BLOCKS $(((${NV}-(64-((${NB_MONOMIAL_PK}-${LOST_BITS}-${NVR})%64))%64)>>6))" >> ${DFILE}
-  echo "#define NB_WORD_GF2m ${NB_WORD_GF2m}" >> ${DFILE}
-  echo "#define NB_WORD_GF2m_TMP ${NB_WORD_GF2m}" >> ${DFILE}
-  echo "#define NB_WORD_GF2nv ${NB_WORD_GF2nv}" >> ${DFILE}
-  echo "#define NB_WORD_GF2nv_TMP ${NB_WORD_GF2nv}" >> ${DFILE}
-  echo "#define NB_WORD_GF2nvm ${NB_WORD_GF2nvm}" >> ${DFILE}
-  echo "#define NB_WORD_GFqn ${NB_WORD_GFqn}" >> ${DFILE}
-  echo "#define NB_WORD_GFqn_TMP ${NB_WORD_GFqn}" >> ${DFILE}
-  echo "#define NB_WORD_GFqv ${NB_WORD_GFqv}" >> ${DFILE}
-  echo "#define NB_WORD_GFqv_TMP ${NB_WORD_GFqv}" >> ${DFILE}
-  echo "#define NB_WORD_MMUL ${NB_WORD_MMUL}" >> ${DFILE}
-  echo "#define NB_WORD_MMUL_TMP ${NB_WORD_MMUL}" >> ${DFILE}
-  echo "#define NB_WORD_MUL ${NB_WORD_MUL}" >> ${DFILE}
-  echo "#define REM_MACRO 0" >> ${DFILE}
-  echo "#define SIZE_ALIGNED_GFqm 0" >> ${DFILE}
-  echo "#define SIZE_ALIGNED_GFqn 0" >> ${DFILE}
-
-  FILE=gf2nx.h
-  sed -i "s/(HFEDeg&1U\?)/($((${HFEDEG}%2)))/" ${FILE}
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} ${FILE} || true
-  grep 'define ENABLED_REMOVE_ODD_DEGREE' ${FILE} >> ${DFILE}
-
-  # Done extracting defines. Do a full pass to simplify later work.
-
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} *.{c,h} || true
-
-  # Clean up some arithmetic that unifdef cannot handle.
-
-  FILE=chooseRootHFE_gf2nx.h
-  sed -i 's/FIRST_ROOT+DETERMINIST_ROOT+QUARTZ_ROOT/1/' ${FILE}
-
-  FILE=bit.h
-  sed -i 's/if (NB_BITS_UINT==64U)/if (1)/' ${FILE}
-
-  FILE=convMQ_gf2.c
-  VAL=$((((${NB_MONOMIAL_PK}-${LOST_BITS}+7)/8)%8))
-  sed -i "s/((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7/${VAL}/" ${FILE}
-
-  FILE=evalMQSv_gf2.c
-  NB_WORD_EQ=${NQ}
-  [ ${NR} -ne 0 ] && ((NB_WORD_EQ+=1))
-  unifdef ${UNIFDEFOPTS} -DNB_VAR=${V} -DNB_VARq=${VQ} -DNB_VARr=${VR} -DNB_EQr=${NR}\
-          -DNB_WORD_EQ_TMP=${NB_WORD_EQ} -DNB_WORD_EQ=${NB_WORD_EQ}\
-          -DHYBRID_FUNCTIONS=0\
-          -f ${DFILE} ${FILE} || true
-
-  FILE=evalMQSnocst8_gf2.c
-  NB_BYTES_EQ=${MQ8}
-  [ ${MR} -ne 0 ] && ((NB_BYTES_EQ+=1))
-  NB_WORD_EQ_NOCST8=$(((${NB_BYTES_EQ}+7)/8))
-  LEN_UNROLLED_64=$(grep 'define LEN_UNROLLED_64' evalMQSnocst8_gf2.c | awk '{print $(NF)}')
-  sed -i "s/NB_EQ&63/${MR}/" ${FILE}
-  sed -i "s/LEN_UNROLLED_64<<1/$((${LEN_UNROLLED_64}*2))/" evalMQSnocst8_gf2.c
-  unifdef ${UNIFDEFOPTS} -DNB_VAR=${NV} -DNB_VARq=${NVQ} -DNB_VARr=${NVR} -DNB_EQr=${MR}\
-          -DNB_BYTES_EQ=${NB_BYTES_EQ} -DNB_WORD_EQ=$((NB_WORD_EQ_NOCST8))\
-          -DLEN_UNROLLED_64=${LEN_UNROLLED_64}\
-          -f ${DFILE} ${FILE} || true
-
-  FILE=evalMQSnocst8_quo_gf2.c
-  [ ${MQ8} == 0 ] && NB_EQ=${M} || NB_EQ=$((8*${MQ8}))
-  NB_EQR=$((${NB_EQ}%8))
-  NB_BYTES_EQ=$((${NB_EQ}/8))
-  [ ${NB_EQR} -ne 0 ] && ((NB_BYTES_EQ+=1))
-  NB_WORD_EQ=$(((${NB_BYTES_EQ}+7)/8))
-  sed -i "s/NB_EQ&63/$((${NB_EQ}%64))/" ${FILE}
-  LEN_UNROLLED_64=$(grep 'define LEN_UNROLLED_64' ${FILE} | awk '{print $(NF)}')
-  sed -i "s/LEN_UNROLLED_64<<1/$((${LEN_UNROLLED_64}*2))/" ${FILE}
-  unifdef ${UNIFDEFOPTS} -DNB_VAR=${NV} -DNB_VARq=${NVQ} -DNB_VARr=${NVR}\
-          -DNB_EQq_orig=${MQ8} -DNB_EQr=$((${NB_EQ}%8)) \
-          -DNB_BYTES_EQ=${NB_BYTES_EQ} -DNB_WORD_EQ=${NB_WORD_EQ} \
-          -DLEN_UNROLLED_64=${LEN_UNROLLED_64} \
-          -f ${DFILE} ${FILE} || true
-
-  FILE=frobeniusMap_gf2nx.c
-  [ ${IMPL} == "avx2" ] &&
-    sed -i "s/(HFEn-HFEDegI)%II/$(((${N} - ${HFEDEGI})%${II}))/" ${FILE}
-
-  FILE=frobeniusMap_gf2nx.h
-  [ ${IMPL} == "avx2" ] &&
-    sed -i "s/((HFEDeg%POW_II)?1:0)/$(((${HFEDEG}%(1<<${II}))?1:0))/" ${FILE}
-
-  FILE=inv_gf2n.c
-  sed -i "s/HFEn&63/$((${N}%64))/" ${FILE}
-
-  FILE=mixEquationsMQS_gf2.c
-  sed -i "s/NB_BYTES_GFqm&7/$((${NB_BYTES_GFqm}%8))/" ${FILE}
-
-  FILE=parameters_HFE.h
-  sed -i "s/K<<1/$((2*${K}))/" ${FILE}
-  sed -i "s/(1U<<HFEDegI)+(1U<<HFEDegJ)/$((2**${HFEDEGI} + 2**${HFEDEGJ}))/" ${FILE}
-  sed -i "s/HFEn-1/$((${N}-1))/" ${FILE}
-
-  FILE=rem_gf2n.c
-  K3=$(grep 'define K3 [0-9]' ${DFILE} | awk '{print $(NF)}')
-  sed -i "s/(HFEn-2+K3)&1/$(((${N}+${K3}-2)%2))/" ${FILE}
-  sed -i "s/(NB_WORD_MMUL&1)/$((${NB_WORD_MMUL}%2))/" ${FILE}
-
-  FILE=signHFE.c
-  sed -i "s/#define VAL_BITS_M.*/#define VAL_BITS_M ${VAL_BITS_M}/" ${FILE}
-  sed -i "s/HFEDELTA+HFEv/$((${DELTA}+${V}))/" ${FILE}
-  sed -i "s/(HFEm&7)/(${MR8})/" ${FILE}
-  sed -i "s/HFEn&7/$((${N}%8))/" ${FILE}
-  sed -i "s/NB_WORD_GFqn+NB_WORD_GFqv/$((${NB_WORD_GFqn}+${NB_WORD_GFqv}))/" ${FILE}
-
-  FILE=sign_openHFE.c
-  sed -i "s/#define VAL_BITS_M.*/#define VAL_BITS_M ${VAL_BITS_M}/" ${FILE}
-  sed -i "s/HFEDELTA+HFEv/$((${DELTA}+${V}))/" ${FILE}
-  sed -i "s/HFEm&7/${MR8}/" ${FILE}
-
-  FILE=sqr_gf2n.c
-  sed -i "s/NB_WORD_MUL&1/$((${NB_WORD_MUL}%2))/" ${FILE}
-
-  FILE=tools_gf2m.h
-  sed -i "s/NB_WORD_GF2m&3/$((${NB_WORD_GF2m}%4))/" ${FILE}
-  sed -i "s/NB_WORD_GF2m&1/$((${NB_WORD_GF2m}%2))/" ${FILE}
-  sed -i "s/((HFEmr8)?1:0)/$((${MR8}?1:0))/" ${FILE}
-
-  FILE=tools_gf2n.h
-  sed -i "s/NB_WORD_GFqn&3/$((${NB_WORD_GFqn}%4))/" ${FILE}
-  sed -i "s/NB_WORD_GFqn&1/$((${NB_WORD_GFqn}%2))/" ${FILE}
-  sed -i "s/HFEn%NB_BITS_UINT/$((${N}%64))/" ${FILE}
-  sed -i "s/((HFEnr8)?1:0)/$((${NR8}?1:0))/" ${FILE}
-
-  FILE=tools_gf2nv.h
-  sed -i "s/((HFEnvr8)?1:0)/$((${NVR8}?1:0))/" ${FILE}
-  sed -i "s/(HFEmr?1:0)/$((${MR}?1:0))/" ${FILE}
-
-  FILE=tools_gf2v.h
-  sed -i "s/((HFEvr8)?1:0)/$((${VR8}?1:0))/" ${FILE}
-
-
-  # final pass
-  unifdef ${UNIFDEFOPTS} -f ${DFILE} *.{c,h} || true
-
-  sed -i 's/^\s*//' ${DFILE}
-  sort -o ${DFILE} ${DFILE}
-
-  # Write api.h while we have all of the information we need
-  SIZE_SK=$((${K}/8))
-  SIZE_PK=$(((${NB_MONOMIAL_PK}*${M}+7)/8))
-  SIZE_SIGN=$(((${NV}+(${NB_ITE}-1)*(${NV}-${M})+7)/8))
-  echo "\
-#ifndef API_H
-#define API_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-#define CRYPTO_ALGNAME                      \"${PARAM}\"
-
-#define CRYPTO_SECRETKEYBYTES               ${SIZE_SK}
-#define CRYPTO_PUBLICKEYBYTES               ${SIZE_PK}
-#define CRYPTO_BYTES                        ${SIZE_SIGN}
-
-int crypto_sign_keypair(uint8_t *pk, uint8_t *sk);
-int crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *msg, size_t len, const uint8_t *sk);
-int crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk);
-int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk);
-int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk);
-
-#endif
-" > api.h
-)
-done )
-endtask
-
-task 'Sorting #includes'
-( for PARAM in gemss-{,blue-,red-}{128,192,256}
-do
-  for IMPL in clean avx2
-  do
-    for F in ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL}/*.h
-    do
-      START=$(grep -n -m 1 '^\s*#include' ${F} | cut -d: -f1)
-      if [ x${START} == x ]; then continue; fi
-      GUARD=$(head -n $((${START}-1)) ${F})
-      INCL1=$(grep '^\s*#include \"' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u)
-      INCL2=$(grep '^\s*#include <' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u)
-      REST=$(tail -n+$((${START}+1)) ${F} | sed '/^\s*#include/d')
-      echo "${GUARD}\n${INCL1}\n${INCL2}\n${REST}" | sed 's/\\n/\n/g' > ${F}
-    done
-    for F in ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL}/*.c
-    do
-      INCL1=$(grep '^\s*#include \"' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u)
-      INCL2=$(grep '^\s*#include <' ${F} | sed 's/^\s*//' | LC_ALL=C sort -u)
-      REST=$(sed '/^\s*#include/d' ${F})
-      echo "${INCL1}\n${INCL2}\n${REST}" | sed 's/\\n/\n/g' > ${F}
-    done
-  done
-done )
-endtask
-
-#MANIFEST=${BUILD_TEST}/duplicate_consistency
-#mkdir -p ${MANIFEST}
-#task "Preparing for duplicate consistency"
-#( cd ${MANIFEST}
-#for P1 in gemss-{,blue-,red-}{128,192,256}
-#do
-#  for OUT in clean avx2
-#  do
-#    sha1sum ${BUILD_CRYPTO_SIGN}/${P1}/${OUT}/*.{h,c} > ${P1}_${OUT}.xxx
-#  done
-#done
-#)
-#endtask
-#
-#( cd ${MANIFEST}
-#for P1 in gemss-{,blue-,red-}{128,192,256}
-#do
-#  for OUT in clean avx2
-#  do
-#    task "${P1}/${OUT} duplicate consistency"
-#    echo "\
-#consistency_checks:" > ${P1}_${OUT}.yml
-#    for P2 in gemss-{,blue-,red-}{128,192,256}
-#    do
-#      for IN in clean avx2
-#      do
-#        if ([ "${P1}" == "${P2}" ] && [ "${IN}" == "${OUT}" ]) || [ "${P1}" \> "${P2}" ]; then continue; fi
-#        echo "\
-#- source:
-#    scheme: ${P2}
-#    implementation: ${IN}
-#  files:" >> ${P1}_${OUT}.yml
-#        for HASH in $(cat ${P2}_${IN}.xxx | cut -d ' ' -f 1)
-#        do
-#          X=$(grep $HASH ${P1}_${OUT}.xxx | cut -d ' ' -f 3)
-#          if [ x${X} != 'x' ]
-#          then
-#            [ -e ${BUILD_CRYPTO_SIGN}/${P2}/${OUT}/$(basename $X) ] && \
-#            echo "\
-#      - $(basename $X)" >> ${P1}_${OUT}.yml
-#          fi
-#        done
-#      done
-#    done
-#    endtask
-#  done
-#done
-#)
-#rm -rf ${MANIFEST}/*.xxx
-
-task 'Namespacing' 
-(
-
-# Fix definitions that need namespacing but are split over multiple lines
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/conv_gf2nx.h
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/convMQS_gf2.h
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/evalMQShybrid_gf2.h
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/*/evalMQnocst_gf2.h
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/avx2/frobeniusMap_gf2nx.h
-sed -i -s "${STRAIGHTEN_DEF[@]}" ${BUILD_CRYPTO_SIGN}/*/avx2/genCanonicalBasis_gf2n.h
-
-# GeMSS has its own namespacing macro. We'll delete it and do it our way.
-sed -i -s '/include "prefix_name.h"/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h
-sed -i -s 's/^\s*\(int\|void\|gf2\|UINT\|uint64_t\|unsigned int\)\s\+PREFIX_NAME(\([^)]*\))/\1 \2/' ${BUILD_CRYPTO_SIGN}/*/*/*.{h,c}
-
-# Insert hooks for namespacing. These will be removed later.
-sed -i -s 's/^\s*\(int\|void\|gf2\|UINT\|uint64_t\|unsigned int\)\s\+\([^(]*\)(/#define \2 CRYPTO_NAMESPACE(\2)\n&/' ${BUILD_CRYPTO_SIGN}/*/*/*.h
-sed -i -s '/#define.*PREFIX_NAME/d' ${BUILD_CRYPTO_SIGN}/*/*/*.h
-
-for PARAM in gemss-{,blue-,red-}{128,192,256}
-do
-  for IMPL in clean avx2
-  do
-    ( cd ${BUILD_CRYPTO_SIGN}/${PARAM}/${IMPL}
-    NAMESPACE=$(echo PQCLEAN_${PARAM//-/}_${IMPL} | tr [:lower:] [:upper:])
-    for X in $(grep CRYPTO_NAMESPACE *.{c,h} | cut -f2 -d' ' | sort -u); do
-      sed -i -s "s/\([^a-zA-Z_]\)${X}\([^a-zA-Z\._]\|$\)/\1${NAMESPACE}_${X}\2/g" *.c *.h
-    done
-    sed -i -s '/CRYPTO_NAMESPACE/d' *.{c,h}
-    sed -i -s "s/CRYPTO_/${NAMESPACE}_CRYPTO_/" *.{c,h}
-    sed -i "s/API_H/${NAMESPACE}_API_H/" api.h
-
-    sed -i -s "s/f_/${NAMESPACE}_/" tools_gf2n.h tools_gf2m.h hash.h
-    )
-  done
-done )
-endtask
-
-task 'Copying metadata'
-( # Makefiles and other metadata
-for PARAM in gemss-{,blue-,red-}{128,192,256}
-do
-  ( cd ${BUILD_CRYPTO_SIGN}/${PARAM}/
-
-  #echo "Public Domain" > clean/LICENSE
-  #cp -Lp clean/LICENSE avx2/LICENSE
-  cp -Lp ${BASE}/meta/crypto_sign_${PARAM}_META.yml META.yml
-  echo "\
-principal-submitters:
-  - A. Casanova
-  - J.-C. Faugere
-  - G. Macario-Rat
-  - J. Patarin
-  - L. Perret
-  - J. Ryckeghem
-implementations:
-    - name: clean
-      version: ${VERSION}
-    - name: avx2
-      version: ${VERSION}
-      supported_platforms:
-          - architecture: x86_64
-            operating_systems:
-                - Linux
-                - Darwin
-            required_flags:
-                - avx2" >> META.yml
-
-  echo "\
-# This Makefile can be used with GNU Make or BSD Make
-
-LIB=lib${PARAM}_clean.a
-HEADERS=$(basename -a clean/*.h | tr '\n' ' ')
-OBJECTS=$(basename -a clean/*.c | sed 's/\.c/.o/' | tr '\n' ' ')
-
-CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common \$(EXTRAFLAGS)
-
-all: \$(LIB)
-
-%.o: %.c \$(HEADERS)
-	\$(CC) \$(CFLAGS) -c -o \$@ $<
-
-\$(LIB): \$(OBJECTS)
-	\$(AR) -r \$@ \$(OBJECTS)
-
-clean:
-	\$(RM) \$(OBJECTS)
-	\$(RM) \$(LIB)" > clean/Makefile
-
-echo "\
-# This Makefile can be used with Microsoft Visual Studio's nmake using the command:
-#    nmake /f Makefile.Microsoft_nmake
-
-LIBRARY=lib${PARAM}_clean.lib
-OBJECTS=$(basename -a clean/*.c | sed 's/\.c/.obj/' | tr '\n' ' ')
-
-CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX
-
-all: \$(LIBRARY)
-
-# Make sure objects are recompiled if headers change.
-\$(OBJECTS): *.h
-
-\$(LIBRARY): \$(OBJECTS)
-    LIB.EXE /NOLOGO /WX /OUT:\$@ \$**
-
-clean:
-    -DEL \$(OBJECTS)
-    -DEL \$(LIBRARY)" > clean/Makefile.Microsoft_nmake
-
-echo "\
-# This Makefile can be used with GNU Make or BSD Make
-
-LIB=lib${PARAM}_avx2.a
-HEADERS=$(basename -a avx2/*.h | tr '\n' ' ')
-OBJECTS=$(basename -a avx2/*.c | sed 's/\.c/.o/' | tr '\n' ' ')
-
-CFLAGS=-O3 -mavx2 -mbmi -mpclmul -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common \$(EXTRAFLAGS)
-
-all: \$(LIB)
-
-%.o: %.s \$(HEADERS)
-	\$(AS) -o \$@ $<
-
-%.o: %.c \$(HEADERS)
-	\$(CC) \$(CFLAGS) -c -o \$@ $<
-
-\$(LIB): \$(OBJECTS)
-	\$(AR) -r \$@ \$(OBJECTS)
-
-clean:
-	\$(RM) \$(OBJECTS)
-	\$(RM) \$(LIB)" > avx2/Makefile
-
-  )
-done )
-endtask
-
-task 'Styling'
-astyle \
-  --style=google \
-  --indent=spaces \
-  --indent-preproc-define \
-  --indent-preproc-cond \
-  --pad-oper \
-  --pad-comma \
-  --pad-header \
-  --align-pointer=name \
-  --add-braces \
-  --convert-tabs \
-  --mode=c \
-  --suffix=none \
-  ${BUILD_CRYPTO_SIGN}/*/*/*.{c,h} >/dev/null
-endtask
-
-task "Removing 256-bit implementations. See https://github.com/PQClean/PQClean/pull/326#issuecomment-700374801"
-rm -rf ${BUILD_CRYPTO_SIGN}/*256
-endtask
-
-# Package
-task "Packaging pqclean-gemss-$(date +"%Y%m%d").tar.gz"
-tar czf ${BASE}/pqclean-gemss-$(date +"%Y%m%d").tar.gz -C ${BUILD} crypto_sign test
-endtask
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h
deleted file mode 100644
index 788dfc5..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_add_gf2nx.h
+++ /dev/null
@@ -1,30 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/add_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/add_gf2nx.h
-@@ -14,7 +14,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define add2_gf2nx(res,A,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]^=(A)[i];\
-     }
-@@ -30,7 +30,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define copy_gf2nx(res,A,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]=(A)[i];\
-     }
-@@ -45,7 +45,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define set0_gf2nx(res,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]=0;\
-     }
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h
deleted file mode 100644
index a6280bf..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_arch.h
+++ /dev/null
@@ -1,192 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/arch.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/arch.h
-@@ -6,6 +6,8 @@
- #include <inttypes.h>
- #include "macro.h"
- 
-+#include <immintrin.h>
-+
- 
- /****************** uintXX_t for compatibility  ******************/
- 
-@@ -26,18 +28,6 @@
- #define ZERO8 ((uint8_t)0)
- #define ONE8  ((uint8_t)1)
- 
--/* 0x... */
--#define PRINT_X64(a) printf("0x%"PRIx64,a);
--#define PRINT_X32(a) printf("0x%"PRIx32,a);
--#define PRINT_X16(a) printf("0x%"PRIx16,a);
--#define PRINT_X8(a) printf("0x%"PRIx8,a);
--/* ... */
--#define PRINT_U64(a) printf("%"PRIx64,a);
--#define PRINT_U32(a) printf("%"PRIx32,a);
--#define PRINT_U16(a) printf("%"PRIx16,a);
--#define PRINT_U8(a) printf("%"PRIx8,a);
--
--
- /****************** Definition of an UINT  ******************/
- 
- /* XXX For the moment, this parameter cannot be modified. XXX */
-@@ -81,9 +71,6 @@
- /** Print an UINT. */
- #define PRINT_UINT(a) CONCAT(PRINT_X,NB_BITS_UINT)(a);
- 
--
--
--
- /** A reserved variable to do a for loop on a buffer of UINT. */
- #define RESERVED_VARIABLE reserved_variable
- 
-@@ -103,6 +90,35 @@
-         } \
-     }
- 
-+/** Load a UINT from unsigned char * **/
-+
-+#define LOAD_UINT(a, p) \
-+  (a) = (p)[7]; (a) <<= 8;\
-+  (a) |= (p)[6]; (a) <<= 8;\
-+  (a) |= (p)[5]; (a) <<= 8;\
-+  (a) |= (p)[4]; (a) <<= 8;\
-+  (a) |= (p)[3]; (a) <<= 8;\
-+  (a) |= (p)[2]; (a) <<= 8;\
-+  (a) |= (p)[1]; (a) <<= 8;\
-+  (a) |= (p)[0];
-+
-+#define LOAD_UINT_ARRAY(a, p, N) \
-+  FOR_LOOP(LOAD_UINT((a)[RESERVED_VARIABLE], &(p)[8*RESERVED_VARIABLE]), (N))
-+
-+/** Store a UINT to an unsigned char * **/
-+#define STORE_UINT(p, a) \
-+  (p)[0] = ((a) >> 0x00) & 0xff; \
-+  (p)[1] = ((a) >> 0x08) & 0xff; \
-+  (p)[2] = ((a) >> 0x10) & 0xff; \
-+  (p)[3] = ((a) >> 0x18) & 0xff; \
-+  (p)[4] = ((a) >> 0x20) & 0xff; \
-+  (p)[5] = ((a) >> 0x28) & 0xff; \
-+  (p)[6] = ((a) >> 0x30) & 0xff; \
-+  (p)[7] = ((a) >> 0x38) & 0xff;
-+
-+#define STORE_UINT_ARRAY(a, p, N) \
-+  FOR_LOOP(STORE_UINT(&(p)[8*RESERVED_VARIABLE], (a)[RESERVED_VARIABLE]), (N))
-+
- 
- 
- /****************** C++ compatibility ******************/
-@@ -160,60 +176,44 @@
- 
- 
- #ifdef __SSE__
--    #include <xmmintrin.h>
--    /** To use sse. */
-     #define ENABLED_SSE
- #endif
- 
- #ifdef __SSE2__
--    #include <emmintrin.h>
--    /** To use sse2. */
-     #define ENABLED_SSE2
- #endif
- 
- #ifdef __SSSE3__
--    #include <tmmintrin.h>
--    /** To use ssse3. */
-     #define ENABLED_SSSE3
- #endif
- 
- #ifdef __SSE4_1__
--    #include <smmintrin.h>
--    /** To use sse4.1. */
-     #define ENABLED_SSE4_1
- #endif
- 
- #ifdef __AVX__
--    #include <immintrin.h>
--    /** To use avx. */
-     #define ENABLED_AVX
- #endif
- 
- #ifdef __AVX2__
--    /** To use avx2. */
-     #define ENABLED_AVX2
- #endif
- 
- #if (defined(__PCLMUL__) && defined(ENABLED_SSE))
--    #include <wmmintrin.h>
--    /** To use multiplication in binary field with PCLMULQDQ and sse. */
-     #define ENABLED_PCLMUL
- #endif
- 
- #if (defined(ENABLED_PCLMUL) && defined(ENABLED_SSE2))
--    #include <wmmintrin.h>
-     /** To use multiplication in binary field with PCLMULQDQ and sse2. */
-     #define ENABLED_PCLMUL_SSE2
- #endif
- 
- #if (defined(ENABLED_PCLMUL) && defined(ENABLED_AVX2))
--    #include <wmmintrin.h>
-     /** To use multiplication in binary field with PCLMULQDQ and avx2. */
-     #define ENABLED_PCLMUL_AVX2
- #endif
- 
- #ifdef __POPCNT__
--    #include <nmmintrin.h>
-     /** Improve the computation of the number of bits set to 1 in a 64-bit 
-      *  or 32-bit integer. */
-     #define ENABLED_POPCNT
-@@ -237,7 +237,7 @@
- /** Verify if the allocation by malloc or calloc succeeds. 
-  *  Exit in the failure case. */
- #define VERIFY_ALLOC(p) \
--    if(!p) \
-+    if(!(p)) \
-     {\
-         exit(ERROR_ALLOC);\
-     }
-@@ -245,7 +245,7 @@
- /** Verify if the allocation by malloc or calloc succeeds. 
-  *  Return ERROR_ALLOC in the failure case. */
- #define VERIFY_ALLOC_RET(p) \
--    if(!p) \
-+    if(!(p)) \
-     {\
-         return(ERROR_ALLOC);\
-     }
-@@ -272,7 +272,7 @@
-                 p=(type)_mm_malloc((nmemb)*(size),16);
- #else
-     #define ALIGNED16_MALLOC(p,type,nmemb,size) \
--                if(posix_memalign((void**)(&p),16,(nmemb)*(size)))\
-+                if(posix_memalign((void**)(&(p)),16,(nmemb)*(size)))\
-                 {\
-                     exit(1);\
-                 }
-@@ -280,7 +280,7 @@
- 
- #define ALIGNED16_CALLOC(p,type,nmemb,size) \
-             ALIGNED16_MALLOC(p,type,nmemb,size);\
--            memset((void*)p,0,(nmemb)*(size))
-+            memset((void*)(p),0,(nmemb)*(size))
- 
- 
- /** Align the data on 32 bytes, useful for avx. */
-@@ -291,7 +291,7 @@
-                 p=(type)_mm_malloc((nmemb)*(size),32);
- #else
-     #define ALIGNED32_MALLOC(p,type,nmemb,size) \
--                if(posix_memalign((void**)(&p),32,(nmemb)*(size)))\
-+                if(posix_memalign((void**)(&(p)),32,(nmemb)*(size)))\
-                 {\
-                     exit(1);\
-                 }
-@@ -299,7 +299,7 @@
- 
- #define ALIGNED32_CALLOC(p,type,nmemb,size) \
-             ALIGNED32_MALLOC(p,type,nmemb,size);\
--            memset((void*)p,0,(nmemb)*(size));
-+            memset((void*)(p),0,(nmemb)*(size));
- 
- 
- #define NO_ALIGNED_MALLOC(p,type,nmemb,size) p=(type)malloc((nmemb)*(size));
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h
deleted file mode 100644
index e36ad1b..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_bit.h
+++ /dev/null
@@ -1,383 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/bit.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/bit.h
-@@ -8,18 +8,8 @@
- 
- /* Tools for the bits manipulation */
- 
--
--/* (2^k) - 1, k<64, and -1 for k=0  */
--#define mask64(k) ((k)?(ONE64<<(k))-ONE64:MONE64)
--
--/* (2^k) - 1, k<32, and -1 for k=0 */
--#define mask32(k) ((k)?(ONE32<<(k))-ONE32:MONE32)
--
--#define maskUINT(k) ((k)?(UINT_1<<(k))-UINT_1:UINT_M1)
--
--
- /** The i-th bit of the UINT val. */
--#define ITHBIT(val,i) ((val>>i)&UINT_1)
-+#define ITHBIT(val,i) (((val)>>(i))&UINT_1)
- 
- 
- /** Compute the MSB position of one UINT. */
-@@ -27,14 +17,14 @@
-     Output: res the MSB position of U. If U is zero, res=0
- */
- #define MSB_SP(res,U,j) \
--    res=0;\
-+    (res)=0;\
-     /* Search the MSB position of one word */\
--    for(j=NB_BITS_UINT>>1;j!=0;j>>=1) \
-+    for((j)=NB_BITS_UINT>>1;(j)!=0;(j)>>=1) \
-     {\
--        if((U)>>(res^j))\
-+        if((U)>>((res)^(j)))\
-         {\
-             /* To remember the choice of the high part */\
--            res^=j;\
-+            (res)^=(j);\
-         }\
-     }
- 
-@@ -43,15 +33,15 @@
-     Output: res the MSB position of U. If U is zero, res=0
- */
- #define MSB_MP(res,U,i,j,nb_word) \
--    i=nb_word-1;\
-+    (i)=(nb_word)-1;\
-     /* Search the first word different from zero */\
--    while(i&&(!U[i])) \
-+    while((i)&&(!(U)[i])) \
-     {\
--        --i;\
-+        --(i);\
-     }\
-     /* Search the MSB of one word */\
--    MSB_SP(res,U[i],j);\
--    res^=i<<LOG2_SIZE_UINT;
-+    MSB_SP((res),(U)[i],j);\
-+    (res)^=(i)<<LOG2_SIZE_UINT;
- 
- 
- 
-@@ -72,45 +62,45 @@
- #ifdef ENABLED_POPCNT
-     #ifdef MODE_64_BITS
-         #define COUNTBITS64_POP(n) \
--            n=POPCNT_U64(n);
-+            (n)=POPCNT_U64(n);
-     #elif defined(MODE_32_BITS) 
-         #define COUNTBITS64_POP(n) \
--            n=POPCNT_U32(n)+POPCNT_U32(n>>32);
-+            (n)=POPCNT_U32(n)+POPCNT_U32(n>>32);
-     #endif
- 
-     #define COUNTBITS32_POP(n) \
--        n=POPCNT_U32(n);
-+        (n)=POPCNT_U32(n);
- 
-     #define ORBITS64_POP(n) \
-         COUNTBITS64_POP(n); \
-         /* The result is in {0,1,...,64} */\
--        n+=63;\
-+        (n)+=63;\
-         /* Now, the result is in {63,64,...,127} */\
--        n>>=6;\
-+        (n)>>=6;\
-         /* Now, the result is in {0,1,...,1} */
- 
- 
-     #define NORBITS64_POP(n) \
--        --n;\
-+        --(n);\
-         COUNTBITS64_POP(n); \
-         /* If n=0, then Hamming_weight(n-1)==64, else Hamming_weight(n-1)<64 */\
-         /* Now, the result is in {64,0,...,63} */\
--        n>>=6;\
-+        (n)>>=6;\
-         /* Now, the result is in {1,0,...,0} */
- 
- 
-     #define NORBITS64_POP2(n) \
-         COUNTBITS64_POP(n); \
-         /* The result is in {0,1,...,64} */\
--        --n;\
-+        --(n);\
-         /* Now, the result is in {2^{64}-1,0,...,63} */\
--        n>>=63;\
-+        (n)>>=63;\
-         /* Now, the result is in {1,0,...,0} */
- 
- 
-     #define XORBITS64_POP(n) \
-         COUNTBITS64_POP(n); \
--        n&=ONE64;
-+        (n)&=ONE64;
- #endif
- 
- 
-@@ -120,18 +110,18 @@
- 
- /* 5 logical operations */
- #define ORBITS64_SHORT(n) \
--    n|=n << 32U;\
--    n>>=32U;\
--    n+=((uint64_t)0xFFFFFFFF);\
--    n>>=32U;
-+    (n)|=(n) << 32U;\
-+    (n)>>=32U;\
-+    (n)+=((uint64_t)0xFFFFFFFF);\
-+    (n)>>=32U;
- 
- 
- /* 5 logical operations */
- #define NORBITS64_SHORT(n) \
--    n|=n << 32U;\
--    n>>=32U;\
--    --n;\
--    n>>=63U;
-+    (n)|=(n) << 32U;\
-+    (n)>>=32U;\
-+    --(n);\
-+    (n)>>=63U;
- 
- 
- /* The third fastest method, based on the variable-precision SWAR algorithm */
-@@ -141,95 +131,95 @@
- 
- /* 12 logical operations */
- #define COUNTBITS64_SWAR(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
--    n=(((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)=((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-        * ((uint64_t)0x101010101010101)) >> 56U;
- 
- 
- /* 13 logical operations */
- #define ORBITS64_SWAR(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-     /* We change ((n/(2^56))+63)/64 in (n+63*(2^56))/(2^62) */\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-       * ((uint64_t)0x101010101010101)) + ((uint64_t)0x3F00000000000000)) >> 62U;
- 
- 
- /* 13 logical operations */
- #define NORBITS64_SWAR(n) \
--    --n;\
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    --(n);\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-         * ((uint64_t)0x101010101010101))) >> 62U;
- 
- 
- /* 13 logical operations */
- #define NORBITS64_SWAR2(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-     /* We remark that ({0,1,...,63}+255)<<56 ={255<<56,0<<56,...,62<<56}*2^56*/\
-     /* So, the 63-th bit is 1 iff Hamming_weight(n)=0 */\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-       * ((uint64_t)0x101010101010101)) + ((uint64_t)0xFF00000000000000)) >> 63U;
- 
- 
- /* Slow, 13 logical operations */
- #define XORBITS64_SWAR2(n) \
-     COUNTBITS64_SWAR(n); \
--    n&=ONE64;
-+    (n)&=ONE64;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS64_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***+***+***+***+***+***+***+***+***+***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000+000+000+000+000+000+000+000+000+000+000+000+000  */\
-     /* Then, we sum the 16 bits and store them in the bits 63 to 67. */\
-     /* So the 63-th bit in the bit of parity. */\
--    n=((n & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\
-+    (n)=(((n) & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\
-       >> 63;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS32_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***+***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000+000+000+000+000  */\
-     /* Then, we sum the 8 bits and store them in the bits 31 to 34. */\
-     /* So the 31-th bit in the bit of parity. */\
--    n=((n & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31;
-+    (n)=(((n) & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS16_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000  */\
-     /* Then, we sum the 4 bits and store them in the bits 15 to 17. */\
-     /* So the 15-th bit in the bit of parity. */\
--    n=((n & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15;
-+    (n)=(((n) & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15;
- 
- 
- /* A special algorithm with 5 logical operations */
- #define XORBITS8_SWAR(n) \
-     /*  +*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +0+0+0+0  */\
-     /* Then, we sum the 4 bits and store them in the bits 7 to 9. */\
-     /* So the 15-th bit in the bit of parity. */\
--    n=((n & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7;
-+    (n)=(((n) & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7;
- 
- 
- /* The slowest method, based on the "dichotomic xor/or" */
-@@ -238,55 +228,55 @@
- /* A generic method using the dichotomic principle */
- #define ORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n|=n>>RESERVED_VARIABLE)\
--    n&=UINT_1;
-+                      (n)|=(n)>>RESERVED_VARIABLE)\
-+    (n)&=UINT_1;
- 
- 
- #define NORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n|=n>>RESERVED_VARIABLE)\
--    n=~n;\
--    n&=UINT_1;
-+                      (n)|=(n)>>RESERVED_VARIABLE)\
-+    (n)=~(n);\
-+    (n)&=UINT_1;
- 
- 
- #define XORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n^=n>>RESERVED_VARIABLE)\
--    n&=UINT_1;
-+                      (n)^=(n)>>RESERVED_VARIABLE)\
-+    (n)&=UINT_1;
- 
- 
- /* 13 logical operations */
- #define ORBITS64_DICHO(n) \
--    n|=n >> 32U;\
--    n|=n >> 16U;\
--    n|=n >> 8U;\
--    n|=n >> 4U;\
--    n|=n >> 2U;\
--    n|=n >> 1U;\
--    n&=ONE64;
-+    (n)|=(n) >> 32U;\
-+    (n)|=(n) >> 16U;\
-+    (n)|=(n) >> 8U;\
-+    (n)|=(n) >> 4U;\
-+    (n)|=(n) >> 2U;\
-+    (n)|=(n) >> 1U;\
-+    (n)&=ONE64;
- 
- 
- /* 14 logical operations */
- #define NORBITS64_DICHO(n) \
--    n|=n >> 32U;\
--    n|=n >> 16U;\
--    n|=n >> 8U;\
--    n|=n >> 4U;\
--    n|=n >> 2U;\
--    n|=n >> 1U;\
--    n=~n;\
--    n&=ONE64;
-+    (n)|=(n) >> 32U;\
-+    (n)|=(n) >> 16U;\
-+    (n)|=(n) >> 8U;\
-+    (n)|=(n) >> 4U;\
-+    (n)|=(n) >> 2U;\
-+    (n)|=(n) >> 1U;\
-+    (n)=~(n);\
-+    (n)&=ONE64;
- 
- 
- /* 13 logical operations */
- #define XORBITS64_DICHO(n) \
--    n^=n >> 32U;\
--    n^=n >> 16U;\
--    n^=n >> 8U;\
--    n^=n >> 4U;\
--    n^=n >> 2U;\
--    n^=n >> 1U;\
--    n&=ONE64;
-+    (n)^=(n) >> 32U;\
-+    (n)^=(n) >> 16U;\
-+    (n)^=(n) >> 8U;\
-+    (n)^=(n) >> 4U;\
-+    (n)^=(n) >> 2U;\
-+    (n)^=(n) >> 1U;\
-+    (n)&=ONE64;
- 
- 
- /* Choose the best method */
-@@ -311,22 +301,10 @@
- #endif
- 
- 
--#if (NB_BITS_UINT==64U)
--    #define COUNTBITS_UINT CONCAT(COUNTBITS,NB_BITS_UINT)
--    #define    ORBITS_UINT CONCAT(   ORBITS,NB_BITS_UINT)
--    #define   NORBITS_UINT CONCAT(  NORBITS,NB_BITS_UINT)
--    #define   XORBITS_UINT CONCAT(  XORBITS,NB_BITS_UINT)
--#elif defined(MQSOFT_REF)
--    #define COUNTBITS_UINT  COUNTBITS64_SWAR
--    #define  ORBITS_UINT(n)  ORBITS(n,NB_BITS_UINT)
--    #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT)
--    #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT)
--#else
--    #define COUNTBITS_UINT  COUNTBITS64_SWAR
--    #define  ORBITS_UINT(n)  ORBITS(n,NB_BITS_UINT)
--    #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT)
--    #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT)
--#endif
-+#define COUNTBITS_UINT COUNTBITS64
-+#define    ORBITS_UINT    ORBITS64
-+#define   NORBITS_UINT   NORBITS64
-+#define   XORBITS_UINT   XORBITS64
- 
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
deleted file mode 100644
index cdf0268..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h
-@@ -30,7 +30,7 @@
-     #include "gf2nx.h"
- 
-     int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n U);
-     #define chooseRootHFE_gf2nx PREFIX_NAME(chooseRootHFE_gf2nx)
- #endif
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h
deleted file mode 100644
index 76647dd..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_conv_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/conv_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/conv_gf2nx.h
-@@ -10,7 +10,7 @@
- 
- 
- void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense,
--                                          const complete_sparse_monic_gf2nx F);
-+                                          complete_sparse_monic_gf2nx F);
- #define convHFEpolynomialSparseToDense_gf2nx \
-             PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h
deleted file mode 100644
index 2d7c011..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_div_gf2nx.h
+++ /dev/null
@@ -1,41 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/div_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/div_gf2nx.h
-@@ -24,13 +24,13 @@
- 
- 
- unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp,
--                                          const complete_sparse_monic_gf2nx F,
-+                                          complete_sparse_monic_gf2nx F,
-                                           cst_gf2n cst);
- void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst);
- void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly,
--                                      const complete_sparse_monic_gf2nx F,
-+                                      complete_sparse_monic_gf2nx F,
-                                       cst_gf2n cst);
- #define div_r_HFE_gf2nx PREFIX_NAME(div_r_HFE_gf2nx)
- #define div_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)
-@@ -39,16 +39,16 @@
- 
- #if ENABLED_REMOVE_ODD_DEGREE
-     void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                           const complete_sparse_monic_gf2nx F,
-+                                           complete_sparse_monic_gf2nx F,
-                                            cst_gf2n cst);
-     void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst);
-     #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)
-     #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)
- #else
--    #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)
--    #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(div_r_HFE_cst_gf2nx)
-+    #define divsqr_r_HFE_cstdeg_gf2nx div_r_HFE_cstdeg_gf2nx
-+    #define divsqr_r_HFE_cst_gf2nx div_r_HFE_cst_gf2nx
- #endif
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
deleted file mode 100644
index 566feaf..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
+++ /dev/null
@@ -1,77 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/dotProduct_gf2.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/dotProduct_gf2.h
-@@ -10,50 +10,50 @@
- 
- /* Dot product of vector of bits */
- #define DOTPRODUCT(res,a,b,SIZE) \
--    res=(a)[0]&(b)[0];\
-+    (res)=(a)[0]&(b)[0];\
-     FOR_LOOP_COMPLETE(1,RESERVED_VARIABLE<(SIZE),++RESERVED_VARIABLE,\
--                        res^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\
-+                        (res)^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\
-     XORBITS_UINT(res);
- 
- 
- /* Inlined version */
- #define DOTPRODUCT1(res,a,b) \
--    res=(a)[0]&(b)[0];\
-+    (res)=(a)[0]&(b)[0];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT2(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT3(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT4(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT5(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
--    res^=(a)[4]&(b)[4];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-+    (res)^=(a)[4]&(b)[4];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT6(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
--    res^=(a)[4]&(b)[4];\
--    res^=(a)[5]&(b)[5];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-+    (res)^=(a)[4]&(b)[4];\
-+    (res)^=(a)[5]&(b)[5];\
-     XORBITS_UINT(res);
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
deleted file mode 100644
index 56bb54c..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
+++ /dev/null
@@ -1,22 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h
-@@ -19,14 +19,14 @@
-     convHFEpolynomialSparseToDense_gf2nx(poly2,F);\
-     /* Initialize to F-U */\
-     add2_gf2n(poly2,U);\
--    l=gcd_gf2nx(&i,poly2,d2,poly,l);
-+    (l)=gcd_gf2nx(&(i),poly2,d2,poly,l);
- 
- 
--int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots,
--                                    const complete_sparse_monic_gf2nx F,
-+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots,
-+                                    complete_sparse_monic_gf2nx F,
-                                     cst_gf2n U);
- int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root,
--                                       const complete_sparse_monic_gf2nx F,
-+                                       complete_sparse_monic_gf2nx F,
-                                        cst_gf2n U);
- #define findRootsHFE_gf2nx PREFIX_NAME(findRootsHFE_gf2nx)
- #define findUniqRootHFE_gf2nx PREFIX_NAME(findUniqRootHFE_gf2nx)
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
deleted file mode 100644
index 41e1286..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
+++ /dev/null
@@ -1,21 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h
-@@ -9,7 +9,7 @@
- #include "gf2nx.h"
- 
- 
--unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const
-+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn,
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U);
- #define frobeniusMap_HFE_gf2nx PREFIX_NAME(frobeniusMap_HFE_gf2nx)
- 
-@@ -87,7 +87,7 @@
- #define KX (HFEDeg-KP)
- 
- 
--unsigned int PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)(gf2nx Xqn, const
-+unsigned int PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)(gf2nx Xqn,
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U);
- #define frobeniusMap_multisqr_HFE_gf2nx \
-             PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h
deleted file mode 100644
index 03eec7c..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/gf2nx.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/gf2nx.h
-@@ -119,7 +119,7 @@
- 
- /* A structure with a special list to find the exponents of the monomials */
- typedef struct {
--    cst_sparse_monic_gf2nx poly;
-+    UINT poly[NB_UINT_HFEPOLY];
-     /* List of the successive differences of the exponents of the monomials of
-        poly multiplied by NB_WORD_GFqn */
-     unsigned int L[NB_COEFS_HFEPOLY];
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h
deleted file mode 100644
index e2e9b51..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_hash.h
+++ /dev/null
@@ -1,346 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/hash.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/hash.h
-@@ -1,311 +1,40 @@
- #ifndef _HASH_H
- #define _HASH_H
- 
--#include "arch.h"
- #include "choice_crypto.h"
--#include "parameters_HFE.h"
--#include "predicate.h"
--#include "init.h"
--
--
--/******************************************************************/
--/****************** Choice of the hash functions ******************/
--/******************************************************************/
--
--
--/* Choice of the hash function */
--/* The user is allowed to switch between SHA2 and SHA3 */
--#if (defined(QUARTZ)||defined(QUARTZ_V1))
--    #define CHOICE_HASH_SHA1
--#elif 0
--    #define CHOICE_HASH_SHA2
--#else
--    #define CHOICE_HASH_SHA3
--#endif
--
--
--/******************************************************************/
--/******************** Enable the hash functions *******************/
--/******************************************************************/
--
--
--/* Use of third libraries */
--/* The user is allowed to switch between OpenSSL and XKCP */
--/* The user can define several macros, while several SHA3 are not defined
--   (if several SHA3 are defined, XKCP has priority). */
--#ifdef CHOICE_HASH_SHA1
--    #define ENABLED_SHA1_OPENSSL
--#endif
--#if defined(CHOICE_HASH_SHA2)
--    #define ENABLED_SHA2_OPENSSL
--#endif
--#if defined(CHOICE_HASH_SHA3)
--    /* XKCP is constant-time and faster than OpenSSL */
--    #define ENABLED_SHA3_XKCP
--    /* #define ENABLED_SHA3_OPENSSL */
--#endif
--
--
--#define ENABLED_SHAKE_XKCP
--/* #define ENABLED_TUPLEHASH_XKCP XXX Disabled XXX */
--
--
--/******************************************************************/
--/***************** Include for the hash functions *****************/
--/******************************************************************/
--
--
--/* We minimize the numbers of #include to decrease the dependencies with the
--   third libraries. */
--#if (defined(ENABLED_SHA1_OPENSSL)||defined(ENABLED_SHA2_OPENSSL))
--    #include <openssl/sha.h>
--#endif
--
--
--#ifdef ENABLED_SHA2_OPENSSL
--    #include "randombytes.h"
--
--    #if ENABLED_OPENSSL_FIPS
--        #include <openssl/err.h>
--        #include <openssl/crypto.h>
--    #endif
--#endif
--
--
--#ifdef ENABLED_SHA3_OPENSSL
--    #include <stddef.h>
--    #include "prefix_name.h"
--    int PREFIX_NAME(sha3_256)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    int PREFIX_NAME(sha3_384)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    int PREFIX_NAME(sha3_512)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    #define sha3_256 PREFIX_NAME(sha3_256)
--    #define sha3_384 PREFIX_NAME(sha3_384)
--    #define sha3_512 PREFIX_NAME(sha3_512)
--#endif
--
--
--#if (defined(ENABLED_SHA3_XKCP)||defined(ENABLED_SHAKE_XKCP))
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/SimpleFIPS202.h>
--    END_EXTERNC
--#endif
--
--
--#ifdef ENABLED_SHAKE_XKCP
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/KeccakHash.h>
--    END_EXTERNC
--#endif
--
--
--#ifdef ENABLED_TUPLEHASH_XKCP
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/SP800-185.h>
--    END_EXTERNC
--#endif
--
--
--/******************************************************************/
--/**************** Macro to call the hash functions ****************/
--/******************************************************************/
--
--
--#define SHA1_OPENSSL(output,m,len) SHA1(m,len,output)
--#define SHA256_OPENSSL(output,m,len) SHA256(m,len,output)
--#define SHA384_OPENSSL(output,m,len) SHA384(m,len,output)
--#define SHA512_OPENSSL(output,m,len) SHA512(m,len,output)
--
--#define SHA256_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA256_OPENSSL(output,m,len);
--#define SHA384_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA384_OPENSSL(output,m,len);
--#define SHA512_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA512_OPENSSL(output,m,len);
--
--/* Format: SHA3_*(output,m,len) */
--#if 0
--    #define SHA3_256_XKCP SHA3_256
--    #define SHA3_384_XKCP SHA3_384
--    #define SHA3_512_XKCP SHA3_512
--#else
--    /* SHA3_* is inlined from SimpleFIPS202.c */
--    #define SHA3_256_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(1088, 512, m, len, 0x06, output, 32)
--    #define SHA3_384_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(832, 768, m, len, 0x06, output, 48)
--    #define SHA3_512_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(576, 1024, m, len, 0x06, output, 64)
--#endif
--
--/* Format: SHAKE*(output,outputByteLen,input,inputByteLen) */
--#if 0
--    #define SHAKE128_XKCP SHAKE128
--    #define SHAKE256_XKCP SHAKE256
--#else
--    /* SHAKE* is inlined from SimpleFIPS202.c */
--    #define SHAKE128_XKCP(output,outputByteLen,m,len) \
--        KeccakWidth1600_Sponge(1344, 256, m, len, 0x1F, output, outputByteLen)
--    #define SHAKE256_XKCP(output,outputByteLen,m,len) \
--        KeccakWidth1600_Sponge(1088, 512, m, len, 0x1F, output, outputByteLen)
--#endif
--
--/* To call with:
--        Keccak_HashInstance hashInstance;
--        Keccak_HashIUF_SHAKE*_XKCP(&hashInstance,data,databitlen);
--   And after a call to Keccak_HashIUF_SHAKE*_XKCP, to use one or several times:
--        Keccak_HashSqueeze(&hashInstance,output,outputbitlen);
--  XXX Here, length in bits XXX
--*/
--#define Keccak_HashIUF_SHAKE128_XKCP(hashInstance,data,databitlen) \
--    Keccak_HashInitialize_SHAKE128(hashInstance);\
--    Keccak_HashUpdate(hashInstance,data,databitlen);\
--    Keccak_HashFinal(hashInstance,0);
--#define Keccak_HashIUF_SHAKE256_XKCP(hashInstance,data,databitlen) \
--    Keccak_HashInitialize_SHAKE256(hashInstance);\
--    Keccak_HashUpdate(hashInstance,data,databitlen);\
--    Keccak_HashFinal(hashInstance,0);
--
--#define TUPLEHASH128_XKCP TupleHash128
--#define TUPLEHASH256_XKCP TupleHash256
--
--
--/************************************************************************/
--/* Macro to call the hash functions corresponding to the security level */
--/************************************************************************/
-+#include "fips202.h"
- 
-+#define CHOICE_HASH_SHA3
- 
- /* Choice of the hash function */
- #if (K<=128)
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA256_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA256_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_256_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_256
--    #endif
--#elif (K<=192)
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA384_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA384_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_384_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_384
--    #endif
--#else
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA512_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA512_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_512_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_512
--    #endif
--#endif
--
--
--/* Choice of SHAKE and TupleHash */
--#if (K<=128)
--    #define SHAKE SHAKE128_XKCP
--    #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE128_XKCP
--    #define TUPLEHASH TUPLEHASH128_XKCP
--#else
--    #define SHAKE SHAKE256_XKCP
--    #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE256_XKCP
--    #define TUPLEHASH TUPLEHASH256_XKCP
--#endif
--
--
--/******************************************************************/
--/******** Macro to call the chosen hash function of MQsoft ********/
--/******************************************************************/
--
--
--#ifdef CHOICE_HASH_SHA1
--    #define HASH SHA1
--#elif defined(CHOICE_HASH_SHA2)
--    #define HASH SHA2
--#else
--    #define HASH SHA3
--#endif
--
--
--/******************************************************************/
--/************************** Other tools ***************************/
--/******************************************************************/
--
--
--BEGIN_EXTERNC
--    /* For KeccakWidth1600_Sponge */
--    #include <libkeccak.a.headers/KeccakSpongeWidth1600.h>
--END_EXTERNC
--
--
--#if (K<=80)
--    #define SIZE_DIGEST 20
--    #define SIZE_DIGEST_UINT 3
--#elif (K<=128)
--    #define SIZE_DIGEST 32
--    #define SIZE_DIGEST_UINT 4
--    #define SIZE_2_DIGEST 64
--    #define EQUALHASH_NOCST ISEQUAL4_NOCST
--    #define COPYHASH COPY4
-+  #define HASH sha3_256
-+  #define SHAKE shake128
-+  #define SIZE_DIGEST 32
-+  #define SIZE_DIGEST_UINT 4
-+  #define SIZE_2_DIGEST 64
-+  #define EQUALHASH_NOCST ISEQUAL4_NOCST
-+  #define COPYHASH COPY4
- #elif (K<=192)
--    #define SIZE_DIGEST 48
--    #define SIZE_DIGEST_UINT 6
--    #define SIZE_2_DIGEST 96
--    #define EQUALHASH_NOCST ISEQUAL6_NOCST
--    #define COPYHASH COPY6
--#else
--    #define SIZE_DIGEST 64
--    #define SIZE_DIGEST_UINT 8
--    #define SIZE_2_DIGEST 128
--    #define EQUALHASH_NOCST ISEQUAL8_NOCST
--    #define COPYHASH COPY8
-+  #define HASH sha3_384
-+  #define SHAKE shake256
-+  #define SIZE_DIGEST 48
-+  #define SIZE_DIGEST_UINT 6
-+  #define SIZE_2_DIGEST 96
-+  #define EQUALHASH_NOCST ISEQUAL6_NOCST
-+  #define COPYHASH COPY6
-+#else
-+  #define HASH sha3_512
-+  #define SHAKE shake256
-+  #define SIZE_DIGEST 64
-+  #define SIZE_DIGEST_UINT 8
-+  #define SIZE_2_DIGEST 128
-+  #define EQUALHASH_NOCST ISEQUAL8_NOCST
-+  #define COPYHASH COPY8
- #endif
- 
--
- #define EQUALHASH(a,b) f_ISEQUAL(a,b,SIZE_DIGEST_UINT)
- 
--
--/* XXX Bytes XXX */
- #define expandSeed(output,outputByteLen,seed,seedByteLen) \
-         SHAKE(output,outputByteLen,seed,seedByteLen)
- 
-@@ -313,6 +42,4 @@
- #define expandSeedIUF Keccak_HashIUF_SHAKE
- #define expandSeedSqueeze Keccak_HashSqueeze
- 
--
- #endif
--
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h
deleted file mode 100644
index 25b6547..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_init.h
+++ /dev/null
@@ -1,32 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/init.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/init.h
-@@ -116,23 +116,23 @@
- 
- #define SET1_2(c) \
-     SET1_1(c);\
--    SET0_1(c+1);
-+    SET0_1((c)+1);
- 
- #define SET1_3(c) \
-     SET1_1(c);\
--    SET0_2(c+1);
-+    SET0_2((c)+1);
- 
- #define SET1_4(c) \
-     SET1_1(c);\
--    SET0_3(c+1);
-+    SET0_3((c)+1);
- 
- #define SET1_5(c) \
-     SET1_1(c);\
--    SET0_4(c+1);
-+    SET0_4((c)+1);
- 
- #define SET1_6(c) \
-     SET1_1(c);\
--    SET0_5(c+1);
-+    SET0_5((c)+1);
- 
- 
- #endif
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h
deleted file mode 100644
index 213fc47..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_macro.h
+++ /dev/null
@@ -1,33 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/macro.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/macro.h
-@@ -7,29 +7,5 @@
- /** This macro permits to concat the names. */
- #define CONCAT(a,b) CONCAT2(a,b)
- 
--
--/** Print a name as a string. */
--#define PRINTF_NAME(name) puts(#name);
--#define PRINTF_NAME1(name) PRINTF_NAME(name)
--#define PRINTF_NAME2(name) PRINTF_NAME1(name)
--
--
--/** Compute Floor(a/b) with a and b positive integers, a can be zero. */
--#define DIV_FLOOR(a,b) ((a)/(b))
--#define DIV_CEIL1(a,b) (((a)/(b))+(((a)%(b))?1:0))
--/* Faster but overflow if (a+b-1) >= 2^x for x=size_of_the_type_in_bits */
--#define DIV_CEIL2(a,b) (((a)+(b)-1)/(b))
--/* Faster but incorrect only when a == 0 and b>1 */
--#define DIV_CEIL3(a,b) ((((a)-1)/(b))+1)
--/** Compute Ceiling(a/b) with a and b positive integers, a can be zero. */
--#define DIV_CEIL DIV_CEIL2
--
--
--/** Return the minimum. */
--#define MINI(a,b) (((a)<(b))?(a):(b))
--/** Return the maximum. */
--#define MAXI(a,b) (((a)>(b))?(a):(b))
--
--
- #endif
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h
deleted file mode 100644
index d580554..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2n.h
+++ /dev/null
@@ -1,16 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/mul_gf2n.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/mul_gf2n.h
-@@ -79,11 +79,7 @@
- 
- 
- /* Function mul in GF(2^x), then modular reduction */
--#define MUL_THEN_REM_GF2N void \
--            PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                    const uint64_t A[NB_WORD_GFqn], \
--                                    const uint64_t B[NB_WORD_GFqn])
--MUL_THEN_REM_GF2N;
-+void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn]);
- #define mul_then_rem_gf2n PREFIX_NAME(mul_then_rem_gf2n)
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h
deleted file mode 100644
index d2d6c57..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_mul_gf2x.h
+++ /dev/null
@@ -1,4798 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/mul_gf2x.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/mul_gf2x.h
-@@ -76,9 +76,9 @@
- /* Classical, 189 = 31×6+3 instructions */
- #define MUL32_NO_UNROLLED_NO_SIMD_GF2X(C,A,B,i) \
-     (C)=(-((B)&ONE64))&(A);\
--    for(i=1;i<32;++i)\
-+    for((i)=1;(i)<32;++(i))\
-     {\
--        (C)^=((-(((B)>>i)&ONE64))&(A))<<i;\
-+        (C)^=((-(((B)>>(i))&ONE64))&(A))<<(i);\
-     }
- 
- 
-@@ -123,9 +123,9 @@
-     (C)=(-((B)&ONE64))&(A);\
-     /* Optimization: the '&1' is removed */\
-     (C)^=((-((B)>>63))&(A))<<63;\
--    for(i=1;i<63;++i)\
-+    for((i)=1;(i)<63;++(i))\
-     {\
--        (C)^=((-(((B)>>i)&ONE64))&(A))<<i;\
-+        (C)^=((-(((B)>>(i))&ONE64))&(A))<<(i);\
-     }
- 
- 
-@@ -203,14 +203,14 @@
- #define MUL64_NO_UNROLLED_NO_SIMD_GF2X(C,A,B,i,tmp) \
-     (C)[0]=(-((B)&ONE64))&(A);\
-     /* Optimization: the '&1' is removed */\
--    tmp=((-((B)>>63))&(A));\
--    (C)[0]^=tmp<<63;\
--    (C)[1]=tmp>>1;\
--    for(i=1;i<63;++i)\
-+    (tmp)=((-((B)>>63))&(A));\
-+    (C)[0]^=(tmp)<<63;\
-+    (C)[1]=(tmp)>>1;\
-+    for((i)=1;(i)<63;++(i))\
-     {\
--        tmp=((-(((B)>>i)&ONE64))&(A));\
--        (C)[0]^=tmp<<i;\
--        (C)[1]^=tmp>>(64-i);\
-+        (tmp)=((-(((B)>>(i))&ONE64))&(A));\
-+        (C)[0]^=(tmp)<<(i);\
-+        (C)[1]^=(tmp)>>(64-(i));\
-     }
- 
- 
-@@ -218,197 +218,197 @@
- #define MUL64_NO_SIMD_GF2X(C,A,B,tmp) \
-     (C)[0]=(-((B)&ONE64))&(A);\
-     /* Optimization: the '&1' is removed */\
--    tmp=((-((B)>>63))&(A));\
--    (C)[0]^=tmp<<63;\
--    (C)[1]=tmp>>1;\
--\
--    tmp=((-(((B)>>1)&ONE64))&(A));\
--    (C)[0]^=tmp<<1;\
--    (C)[1]^=tmp>>63;\
--    tmp=((-(((B)>>2)&ONE64))&(A));\
--    (C)[0]^=tmp<<2;\
--    (C)[1]^=tmp>>62;\
--    tmp=((-(((B)>>3)&ONE64))&(A));\
--    (C)[0]^=tmp<<3;\
--    (C)[1]^=tmp>>61;\
--    tmp=((-(((B)>>4)&ONE64))&(A));\
--    (C)[0]^=tmp<<4;\
--    (C)[1]^=tmp>>60;\
--    tmp=((-(((B)>>5)&ONE64))&(A));\
--    (C)[0]^=tmp<<5;\
--    (C)[1]^=tmp>>59;\
--    tmp=((-(((B)>>6)&ONE64))&(A));\
--    (C)[0]^=tmp<<6;\
--    (C)[1]^=tmp>>58;\
--    tmp=((-(((B)>>7)&ONE64))&(A));\
--    (C)[0]^=tmp<<7;\
--    (C)[1]^=tmp>>57;\
--    tmp=((-(((B)>>8)&ONE64))&(A));\
--    (C)[0]^=tmp<<8;\
--    (C)[1]^=tmp>>56;\
--    tmp=((-(((B)>>9)&ONE64))&(A));\
--    (C)[0]^=tmp<<9;\
--    (C)[1]^=tmp>>55;\
--    tmp=((-(((B)>>10)&ONE64))&(A));\
--    (C)[0]^=tmp<<10;\
--    (C)[1]^=tmp>>54;\
--    tmp=((-(((B)>>11)&ONE64))&(A));\
--    (C)[0]^=tmp<<11;\
--    (C)[1]^=tmp>>53;\
--    tmp=((-(((B)>>12)&ONE64))&(A));\
--    (C)[0]^=tmp<<12;\
--    (C)[1]^=tmp>>52;\
--    tmp=((-(((B)>>13)&ONE64))&(A));\
--    (C)[0]^=tmp<<13;\
--    (C)[1]^=tmp>>51;\
--    tmp=((-(((B)>>14)&ONE64))&(A));\
--    (C)[0]^=tmp<<14;\
--    (C)[1]^=tmp>>50;\
--    tmp=((-(((B)>>15)&ONE64))&(A));\
--    (C)[0]^=tmp<<15;\
--    (C)[1]^=tmp>>49;\
--    tmp=((-(((B)>>16)&ONE64))&(A));\
--    (C)[0]^=tmp<<16;\
--    (C)[1]^=tmp>>48;\
--    tmp=((-(((B)>>17)&ONE64))&(A));\
--    (C)[0]^=tmp<<17;\
--    (C)[1]^=tmp>>47;\
--    tmp=((-(((B)>>18)&ONE64))&(A));\
--    (C)[0]^=tmp<<18;\
--    (C)[1]^=tmp>>46;\
--    tmp=((-(((B)>>19)&ONE64))&(A));\
--    (C)[0]^=tmp<<19;\
--    (C)[1]^=tmp>>45;\
--    tmp=((-(((B)>>20)&ONE64))&(A));\
--    (C)[0]^=tmp<<20;\
--    (C)[1]^=tmp>>44;\
--    tmp=((-(((B)>>21)&ONE64))&(A));\
--    (C)[0]^=tmp<<21;\
--    (C)[1]^=tmp>>43;\
--    tmp=((-(((B)>>22)&ONE64))&(A));\
--    (C)[0]^=tmp<<22;\
--    (C)[1]^=tmp>>42;\
--    tmp=((-(((B)>>23)&ONE64))&(A));\
--    (C)[0]^=tmp<<23;\
--    (C)[1]^=tmp>>41;\
--    tmp=((-(((B)>>24)&ONE64))&(A));\
--    (C)[0]^=tmp<<24;\
--    (C)[1]^=tmp>>40;\
--    tmp=((-(((B)>>25)&ONE64))&(A));\
--    (C)[0]^=tmp<<25;\
--    (C)[1]^=tmp>>39;\
--    tmp=((-(((B)>>26)&ONE64))&(A));\
--    (C)[0]^=tmp<<26;\
--    (C)[1]^=tmp>>38;\
--    tmp=((-(((B)>>27)&ONE64))&(A));\
--    (C)[0]^=tmp<<27;\
--    (C)[1]^=tmp>>37;\
--    tmp=((-(((B)>>28)&ONE64))&(A));\
--    (C)[0]^=tmp<<28;\
--    (C)[1]^=tmp>>36;\
--    tmp=((-(((B)>>29)&ONE64))&(A));\
--    (C)[0]^=tmp<<29;\
--    (C)[1]^=tmp>>35;\
--    tmp=((-(((B)>>30)&ONE64))&(A));\
--    (C)[0]^=tmp<<30;\
--    (C)[1]^=tmp>>34;\
--    tmp=((-(((B)>>31)&ONE64))&(A));\
--    (C)[0]^=tmp<<31;\
--    (C)[1]^=tmp>>33;\
--\
--    tmp=((-(((B)>>32)&ONE64))&(A));\
--    (C)[0]^=tmp<<32;\
--    (C)[1]^=tmp>>32;\
--    tmp=((-(((B)>>33)&ONE64))&(A));\
--    (C)[0]^=tmp<<33;\
--    (C)[1]^=tmp>>31;\
--    tmp=((-(((B)>>34)&ONE64))&(A));\
--    (C)[0]^=tmp<<34;\
--    (C)[1]^=tmp>>30;\
--    tmp=((-(((B)>>35)&ONE64))&(A));\
--    (C)[0]^=tmp<<35;\
--    (C)[1]^=tmp>>29;\
--    tmp=((-(((B)>>36)&ONE64))&(A));\
--    (C)[0]^=tmp<<36;\
--    (C)[1]^=tmp>>28;\
--    tmp=((-(((B)>>37)&ONE64))&(A));\
--    (C)[0]^=tmp<<37;\
--    (C)[1]^=tmp>>27;\
--    tmp=((-(((B)>>38)&ONE64))&(A));\
--    (C)[0]^=tmp<<38;\
--    (C)[1]^=tmp>>26;\
--    tmp=((-(((B)>>39)&ONE64))&(A));\
--    (C)[0]^=tmp<<39;\
--    (C)[1]^=tmp>>25;\
--    tmp=((-(((B)>>40)&ONE64))&(A));\
--    (C)[0]^=tmp<<40;\
--    (C)[1]^=tmp>>24;\
--    tmp=((-(((B)>>41)&ONE64))&(A));\
--    (C)[0]^=tmp<<41;\
--    (C)[1]^=tmp>>23;\
--    tmp=((-(((B)>>42)&ONE64))&(A));\
--    (C)[0]^=tmp<<42;\
--    (C)[1]^=tmp>>22;\
--    tmp=((-(((B)>>43)&ONE64))&(A));\
--    (C)[0]^=tmp<<43;\
--    (C)[1]^=tmp>>21;\
--    tmp=((-(((B)>>44)&ONE64))&(A));\
--    (C)[0]^=tmp<<44;\
--    (C)[1]^=tmp>>20;\
--    tmp=((-(((B)>>45)&ONE64))&(A));\
--    (C)[0]^=tmp<<45;\
--    (C)[1]^=tmp>>19;\
--    tmp=((-(((B)>>46)&ONE64))&(A));\
--    (C)[0]^=tmp<<46;\
--    (C)[1]^=tmp>>18;\
--    tmp=((-(((B)>>47)&ONE64))&(A));\
--    (C)[0]^=tmp<<47;\
--    (C)[1]^=tmp>>17;\
--    tmp=((-(((B)>>48)&ONE64))&(A));\
--    (C)[0]^=tmp<<48;\
--    (C)[1]^=tmp>>16;\
--    tmp=((-(((B)>>49)&ONE64))&(A));\
--    (C)[0]^=tmp<<49;\
--    (C)[1]^=tmp>>15;\
--    tmp=((-(((B)>>50)&ONE64))&(A));\
--    (C)[0]^=tmp<<50;\
--    (C)[1]^=tmp>>14;\
--    tmp=((-(((B)>>51)&ONE64))&(A));\
--    (C)[0]^=tmp<<51;\
--    (C)[1]^=tmp>>13;\
--    tmp=((-(((B)>>52)&ONE64))&(A));\
--    (C)[0]^=tmp<<52;\
--    (C)[1]^=tmp>>12;\
--    tmp=((-(((B)>>53)&ONE64))&(A));\
--    (C)[0]^=tmp<<53;\
--    (C)[1]^=tmp>>11;\
--    tmp=((-(((B)>>54)&ONE64))&(A));\
--    (C)[0]^=tmp<<54;\
--    (C)[1]^=tmp>>10;\
--    tmp=((-(((B)>>55)&ONE64))&(A));\
--    (C)[0]^=tmp<<55;\
--    (C)[1]^=tmp>>9;\
--    tmp=((-(((B)>>56)&ONE64))&(A));\
--    (C)[0]^=tmp<<56;\
--    (C)[1]^=tmp>>8;\
--    tmp=((-(((B)>>57)&ONE64))&(A));\
--    (C)[0]^=tmp<<57;\
--    (C)[1]^=tmp>>7;\
--    tmp=((-(((B)>>58)&ONE64))&(A));\
--    (C)[0]^=tmp<<58;\
--    (C)[1]^=tmp>>6;\
--    tmp=((-(((B)>>59)&ONE64))&(A));\
--    (C)[0]^=tmp<<59;\
--    (C)[1]^=tmp>>5;\
--    tmp=((-(((B)>>60)&ONE64))&(A));\
--    (C)[0]^=tmp<<60;\
--    (C)[1]^=tmp>>4;\
--    tmp=((-(((B)>>61)&ONE64))&(A));\
--    (C)[0]^=tmp<<61;\
--    (C)[1]^=tmp>>3;\
--    tmp=((-(((B)>>62)&ONE64))&(A));\
--    (C)[0]^=tmp<<62;\
--    (C)[1]^=tmp>>2;
-+    (tmp)=((-((B)>>63))&(A));\
-+    (C)[0]^=(tmp)<<63;\
-+    (C)[1]=(tmp)>>1;\
-+\
-+    (tmp)=((-(((B)>>1)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<1;\
-+    (C)[1]^=(tmp)>>63;\
-+    (tmp)=((-(((B)>>2)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<2;\
-+    (C)[1]^=(tmp)>>62;\
-+    (tmp)=((-(((B)>>3)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<3;\
-+    (C)[1]^=(tmp)>>61;\
-+    (tmp)=((-(((B)>>4)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<4;\
-+    (C)[1]^=(tmp)>>60;\
-+    (tmp)=((-(((B)>>5)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<5;\
-+    (C)[1]^=(tmp)>>59;\
-+    (tmp)=((-(((B)>>6)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<6;\
-+    (C)[1]^=(tmp)>>58;\
-+    (tmp)=((-(((B)>>7)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<7;\
-+    (C)[1]^=(tmp)>>57;\
-+    (tmp)=((-(((B)>>8)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<8;\
-+    (C)[1]^=(tmp)>>56;\
-+    (tmp)=((-(((B)>>9)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<9;\
-+    (C)[1]^=(tmp)>>55;\
-+    (tmp)=((-(((B)>>10)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<10;\
-+    (C)[1]^=(tmp)>>54;\
-+    (tmp)=((-(((B)>>11)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<11;\
-+    (C)[1]^=(tmp)>>53;\
-+    (tmp)=((-(((B)>>12)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<12;\
-+    (C)[1]^=(tmp)>>52;\
-+    (tmp)=((-(((B)>>13)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<13;\
-+    (C)[1]^=(tmp)>>51;\
-+    (tmp)=((-(((B)>>14)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<14;\
-+    (C)[1]^=(tmp)>>50;\
-+    (tmp)=((-(((B)>>15)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<15;\
-+    (C)[1]^=(tmp)>>49;\
-+    (tmp)=((-(((B)>>16)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<16;\
-+    (C)[1]^=(tmp)>>48;\
-+    (tmp)=((-(((B)>>17)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<17;\
-+    (C)[1]^=(tmp)>>47;\
-+    (tmp)=((-(((B)>>18)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<18;\
-+    (C)[1]^=(tmp)>>46;\
-+    (tmp)=((-(((B)>>19)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<19;\
-+    (C)[1]^=(tmp)>>45;\
-+    (tmp)=((-(((B)>>20)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<20;\
-+    (C)[1]^=(tmp)>>44;\
-+    (tmp)=((-(((B)>>21)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<21;\
-+    (C)[1]^=(tmp)>>43;\
-+    (tmp)=((-(((B)>>22)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<22;\
-+    (C)[1]^=(tmp)>>42;\
-+    (tmp)=((-(((B)>>23)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<23;\
-+    (C)[1]^=(tmp)>>41;\
-+    (tmp)=((-(((B)>>24)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<24;\
-+    (C)[1]^=(tmp)>>40;\
-+    (tmp)=((-(((B)>>25)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<25;\
-+    (C)[1]^=(tmp)>>39;\
-+    (tmp)=((-(((B)>>26)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<26;\
-+    (C)[1]^=(tmp)>>38;\
-+    (tmp)=((-(((B)>>27)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<27;\
-+    (C)[1]^=(tmp)>>37;\
-+    (tmp)=((-(((B)>>28)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<28;\
-+    (C)[1]^=(tmp)>>36;\
-+    (tmp)=((-(((B)>>29)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<29;\
-+    (C)[1]^=(tmp)>>35;\
-+    (tmp)=((-(((B)>>30)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<30;\
-+    (C)[1]^=(tmp)>>34;\
-+    (tmp)=((-(((B)>>31)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<31;\
-+    (C)[1]^=(tmp)>>33;\
-+\
-+    (tmp)=((-(((B)>>32)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<32;\
-+    (C)[1]^=(tmp)>>32;\
-+    (tmp)=((-(((B)>>33)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<33;\
-+    (C)[1]^=(tmp)>>31;\
-+    (tmp)=((-(((B)>>34)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<34;\
-+    (C)[1]^=(tmp)>>30;\
-+    (tmp)=((-(((B)>>35)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<35;\
-+    (C)[1]^=(tmp)>>29;\
-+    (tmp)=((-(((B)>>36)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<36;\
-+    (C)[1]^=(tmp)>>28;\
-+    (tmp)=((-(((B)>>37)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<37;\
-+    (C)[1]^=(tmp)>>27;\
-+    (tmp)=((-(((B)>>38)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<38;\
-+    (C)[1]^=(tmp)>>26;\
-+    (tmp)=((-(((B)>>39)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<39;\
-+    (C)[1]^=(tmp)>>25;\
-+    (tmp)=((-(((B)>>40)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<40;\
-+    (C)[1]^=(tmp)>>24;\
-+    (tmp)=((-(((B)>>41)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<41;\
-+    (C)[1]^=(tmp)>>23;\
-+    (tmp)=((-(((B)>>42)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<42;\
-+    (C)[1]^=(tmp)>>22;\
-+    (tmp)=((-(((B)>>43)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<43;\
-+    (C)[1]^=(tmp)>>21;\
-+    (tmp)=((-(((B)>>44)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<44;\
-+    (C)[1]^=(tmp)>>20;\
-+    (tmp)=((-(((B)>>45)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<45;\
-+    (C)[1]^=(tmp)>>19;\
-+    (tmp)=((-(((B)>>46)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<46;\
-+    (C)[1]^=(tmp)>>18;\
-+    (tmp)=((-(((B)>>47)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<47;\
-+    (C)[1]^=(tmp)>>17;\
-+    (tmp)=((-(((B)>>48)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<48;\
-+    (C)[1]^=(tmp)>>16;\
-+    (tmp)=((-(((B)>>49)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<49;\
-+    (C)[1]^=(tmp)>>15;\
-+    (tmp)=((-(((B)>>50)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<50;\
-+    (C)[1]^=(tmp)>>14;\
-+    (tmp)=((-(((B)>>51)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<51;\
-+    (C)[1]^=(tmp)>>13;\
-+    (tmp)=((-(((B)>>52)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<52;\
-+    (C)[1]^=(tmp)>>12;\
-+    (tmp)=((-(((B)>>53)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<53;\
-+    (C)[1]^=(tmp)>>11;\
-+    (tmp)=((-(((B)>>54)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<54;\
-+    (C)[1]^=(tmp)>>10;\
-+    (tmp)=((-(((B)>>55)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<55;\
-+    (C)[1]^=(tmp)>>9;\
-+    (tmp)=((-(((B)>>56)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<56;\
-+    (C)[1]^=(tmp)>>8;\
-+    (tmp)=((-(((B)>>57)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<57;\
-+    (C)[1]^=(tmp)>>7;\
-+    (tmp)=((-(((B)>>58)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<58;\
-+    (C)[1]^=(tmp)>>6;\
-+    (tmp)=((-(((B)>>59)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<59;\
-+    (C)[1]^=(tmp)>>5;\
-+    (tmp)=((-(((B)>>60)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<60;\
-+    (C)[1]^=(tmp)>>4;\
-+    (tmp)=((-(((B)>>61)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<61;\
-+    (C)[1]^=(tmp)>>3;\
-+    (tmp)=((-(((B)>>62)&ONE64))&(A));\
-+    (C)[0]^=(tmp)<<62;\
-+    (C)[1]^=(tmp)>>2;
- 
- 
- /* Karatsuba, 1205 = 505*2+189+6 instructions */
-@@ -422,8 +422,8 @@
-     /*  C[0] = C0
-         C[1] = C0^C1^C2
-         C[2] = C1^C2 */\
--    AA=(A)[0]^(A)[1];\
--    BB=(B)[0]^(B)[1];\
-+    (AA)=(A)[0]^(A)[1];\
-+    (BB)=(B)[0]^(B)[1];\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[1]^=RESERVED_BUF2[0];\
-     (C)[2]^=RESERVED_BUF2[1];}
-@@ -442,8 +442,8 @@
-         C[1] = C0^C1^C2
-         C[2] = C1^C2^C3
-         C[3] = C3 */\
--    AA=(A)[0]^(A)[1];\
--    BB=(B)[0]^(B)[1];\
-+    (AA)=(A)[0]^(A)[1];\
-+    (BB)=(B)[0]^(B)[1];\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[1]^=RESERVED_BUF2[0];\
-     (C)[2]^=RESERVED_BUF2[1];}
-@@ -475,20 +475,20 @@
-         C[2] = (C0^C1^C2)^(C3^C4)
-         C[3] = (C1^C2)^(C3^C4)
-         C[4] = C3^C4 */\
--    AA=(A)[0]^(A)[1];\
--    BB=(B)[0]^(B)[1];\
-+    (AA)=(A)[0]^(A)[1];\
-+    (BB)=(B)[0]^(B)[1];\
-     /* (A0+A1)*(B0+B1) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[1]^=RESERVED_BUF2[0];\
-     (C)[2]^=RESERVED_BUF2[1];\
--    AA=(A)[1]^(A)[2];\
--    BB=(B)[1]^(B)[2];\
-+    (AA)=(A)[1]^(A)[2];\
-+    (BB)=(B)[1]^(B)[2];\
-     /* (A1+A2)*(B1+B2) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[3]^=RESERVED_BUF2[0];\
-     (C)[4]^=RESERVED_BUF2[1];\
--    AA=(A)[0]^(A)[2];\
--    BB=(B)[0]^(B)[2];\
-+    (AA)=(A)[0]^(A)[2];\
-+    (BB)=(B)[0]^(B)[2];\
-     /* (A0+A2)*(B0+B2) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[2]^=RESERVED_BUF2[0];\
-@@ -524,20 +524,20 @@
-         C[3] = (C1^C2)^(C3^C4^C5)
-         C[4] = (C3^C4)^C5
-         C[5] = C5 */\
--    AA=(A)[0]^(A)[1];\
--    BB=(B)[0]^(B)[1];\
-+    (AA)=(A)[0]^(A)[1];\
-+    (BB)=(B)[0]^(B)[1];\
-     /* (A0+A1)*(B0+B1) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[1]^=RESERVED_BUF2[0];\
-     (C)[2]^=RESERVED_BUF2[1];\
--    AA=(A)[1]^(A)[2];\
--    BB=(B)[1]^(B)[2];\
-+    (AA)=(A)[1]^(A)[2];\
-+    (BB)=(B)[1]^(B)[2];\
-     /* (A1+A2)*(B1+B2) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[3]^=RESERVED_BUF2[0];\
-     (C)[4]^=RESERVED_BUF2[1];\
--    AA=(A)[0]^(A)[2];\
--    BB=(B)[0]^(B)[2];\
-+    (AA)=(A)[0]^(A)[2];\
-+    (BB)=(B)[0]^(B)[2];\
-     /* (A0+A2)*(B0+B2) */\
-     MUL64_NO_SIMD_GF2X(RESERVED_BUF2,AA,BB,tmp);\
-     (C)[2]^=RESERVED_BUF2[0];\
-@@ -998,15 +998,15 @@
- /* ~ 2*8 = 16 instructions */
- /* Classical, 31 = 9+10+12 instructions */
- #define PMUL16_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one16) \
--    RA=PSET1_16(A0);\
--    RB=PSET_16((B0)>>14,(B0)>>10,(B0)>>6,(B0)>>2,(B0)>>12,(B0)>>8,(B0)>>4,B0);\
-+    (RA)=PSET1_16(A0);\
-+    (RB)=PSET_16((B0)>>14,(B0)>>10,(B0)>>6,(B0)>>2,(B0)>>12,(B0)>>8,(B0)>>4,B0);\
- \
--    CL=PAND_(RA,PMASK16_ONE(PAND_(RB,one16),one16));\
--    M=PAND_(RA,PMASK16_ONE(PAND_(PSRLI_16(RB,1),one16),one16));\
-+    (CL)=PAND_(RA,PMASK16_ONE(PAND_(RB,one16),one16));\
-+    (M)=PAND_(RA,PMASK16_ONE(PAND_(PSRLI_16(RB,1),one16),one16));\
-     PXOR1_2(CL,PSLLI_16(M,1));\
--    CH=PSRLI_16(M,15);\
-+    (CH)=PSRLI_16(M,15);\
- \
--    C1=PXOR_(PUNPACKLO_16(CL,CH),PSLLI_32(PUNPACKHI_16(CL,CH),2));\
-+    (C1)=PXOR_(PUNPACKLO_16(CL,CH),PSLLI_32(PUNPACKHI_16(CL,CH),2));\
-     PXOR1_2(C1,PSRLI_128(C1,7));\
-     PXOR1_2(C1,PSLLI_32(PSRLI_128(C1,4),4));\
-     PAND1_2(C1,PSET_32(0,0,0,MONE32));
-@@ -1015,37 +1015,37 @@
- /* ~ 4*6 = 24 instructions */
- /* Classical, 33 = 5+21+7 instructions */
- #define PMUL16_WS_CLAS0_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32) \
--    RA=PSET1_32(A0);\
--    RB=PSET_32((B0)>>12,(B0)>>4,(B0)>>8,B0);\
-+    (RA)=PSET1_32(A0);\
-+    (RB)=PSET_32((B0)>>12,(B0)>>4,(B0)>>8,B0);\
- \
--    CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\
-+    (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,1));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,2));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,3));\
- \
--    CH=PXOR_(CL,PSRLI_128(PSLLI_32(CL,4),8));\
--    C1=PXOR_(CH,PSRLI_128(CH,3));\
-+    (CH)=PXOR_(CL,PSRLI_128(PSLLI_32(CL,4),8));\
-+    (C1)=PXOR_(CH,PSRLI_128(CH,3));\
-     PAND1_2(C1,PSET_32(0,0,0,MONE32));
- 
- 
- /* ~ 8*8 = 64 instructions */
- /* Classical, 72 = 5+59+8 instructions */
- #define PMUL32_NO_UNROLLED_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32,i) \
--    RA=PSET1_32(A0);\
--    RB=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\
-+    (RA)=PSET1_32(A0);\
-+    (RB)=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\
- \
--    CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
--    CH=PSETZERO();\
--    for(i=1;i<8;++i)\
-+    (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
-+    (CH)=PSETZERO();\
-+    for((i)=1;(i)<8;++(i))\
-     {\
--        M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32));\
-+        (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32));\
-         PXOR1_2(CL,PSLLI_32(M,i));\
--        PXOR1_2(CH,PSRLI_32(M,32-i));\
-+        PXOR1_2(CH,PSRLI_32(M,32-(i)));\
-     }\
--    C1=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\
-+    (C1)=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\
-     PXOR1_2(C1,PSRLI_128(C1,6));\
-     PAND1_2(C1,PSET_64(0,MONE64));
- 
-@@ -1054,34 +1054,34 @@
- /* Classical, 71 = 5+58+8 instructions */
- /* The fastest method */
- #define PMUL32_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32) \
--    RA=PSET1_32(A0);\
--    RB=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\
-+    (RA)=PSET1_32(A0);\
-+    (RB)=PSET_32((B0)>>24,(B0)>>8,(B0)>>16,B0);\
- \
--    CL=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\
-+    (CL)=PAND_(RA,PMASK32_ONE(PAND_(RB,one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,1));\
--    CH=PSRLI_32(M,31);\
-+    (CH)=PSRLI_32(M,31);\
- \
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,2));\
-     PXOR1_2(CH,PSRLI_32(M,30));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,3));\
-     PXOR1_2(CH,PSRLI_32(M,29));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,4));\
-     PXOR1_2(CH,PSRLI_32(M,28));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,5));\
-     PXOR1_2(CH,PSRLI_32(M,27));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,6));\
-     PXOR1_2(CH,PSRLI_32(M,26));\
--    M=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32));\
-+    (M)=PAND_(RA,PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32));\
-     PXOR1_2(CL,PSLLI_32(M,7));\
-     PXOR1_2(CH,PSRLI_32(M,25));\
- \
--    C1=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\
-+    (C1)=PXOR_(PUNPACKLO_32(CL,CH),PSLLI_128(PUNPACKHI_32(CL,CH),1));\
-     PXOR1_2(C1,PSRLI_128(C1,6));\
-     PAND1_2(C1,PSET_64(0,MONE64));
- 
-@@ -1091,11 +1091,11 @@
- #define PMUL32_WS_KAR_CLAS16_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one16) \
-     PMUL16_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one16);\
-     PMUL16_WS_CLAS_GF2X(M,(A0)>>16,(B0)>>16,RA,RB,CL,CH,M,one16);\
--    CM=PXOR_(C1,M);\
-+    (CM)=PXOR_(C1,M);\
-     PXOR1_2(C1,PSLLI_128(M,4));\
-     PMUL16_WS_CLAS_GF2X(M,(A0)^((A0)>>16),(B0)^((B0)>>16),RA,RB,CL,CH,M,one16);\
- \
--    M=PXOR_(CM,M);\
-+    (M)=PXOR_(CM,M);\
-     PXOR1_2(C1,PSLLI_128(M,2));
- 
- 
-@@ -1104,165 +1104,165 @@
- #define PMUL32_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one32) \
-     PMUL16_WS_CLAS0_GF2X(C1,(A0)&MONE16,B0,RA,RB,CL,CH,M,one32);\
-     PMUL16_WS_CLAS0_GF2X(M,((A0)>>16)&MONE16,(B0)>>16,RA,RB,CL,CH,M,one32);\
--    CM=PXOR_(C1,M);\
-+    (CM)=PXOR_(C1,M);\
-     PXOR1_2(C1,PSLLI_128(M,4));\
-     PMUL16_WS_CLAS0_GF2X(M,((A0)^((A0)>>16))&MONE16,(B0)^((B0)>>16),\
-                          RA,RB,CL,CH,M,one32);\
- \
--    M=PXOR_(CM,M);\
-+    (M)=PXOR_(CM,M);\
-     PXOR1_2(C1,PSLLI_128(M,2));
- 
- 
- /* ~ 32*8 = 256 instructions */
- /* Classical, 258 = 3+251+4 instructions */
- #define PMUL64_NO_UNROLLED_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one64,i) \
--    RA=PSET1_64(A0);\
--    RB=PSET_64((B0)>>32,B0);\
-+    (RA)=PSET1_64(A0);\
-+    (RB)=PSET_64((B0)>>32,B0);\
- \
--    CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
--    CH=PSETZERO();\
--    for(i=1;i<32;++i)\
-+    (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
-+    (CH)=PSETZERO();\
-+    for((i)=1;(i)<32;++(i))\
-     {\
--        M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\
-+        (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\
-         PXOR1_2(CL,PSLLI_64(M,i));\
--        PXOR1_2(CH,PSRLI_64(M,64-i));\
-+        PXOR1_2(CH,PSRLI_64(M,64-(i)));\
-     }\
--    C1=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4));
-+    (C1)=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4));
- 
- 
- /* ~ 32*8 = 256 instructions */
- /* Classical, 257 = 3+250+4 instructions */
- #define PMUL64_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one64) \
--    RA=PSET1_64(A0);\
--    RB=PSET_64((B0)>>32,B0);\
-+    (RA)=PSET1_64(A0);\
-+    (RB)=PSET_64((B0)>>32,B0);\
- \
--    CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\
-+    (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,1));\
--    CH=PSRLI_64(M,63);\
-+    (CH)=PSRLI_64(M,63);\
- \
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,2));\
-     PXOR1_2(CH,PSRLI_64(M,62));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,3));\
-     PXOR1_2(CH,PSRLI_64(M,61));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,4));\
-     PXOR1_2(CH,PSRLI_64(M,60));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,5));\
-     PXOR1_2(CH,PSRLI_64(M,59));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,6));\
-     PXOR1_2(CH,PSRLI_64(M,58));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,7));\
-     PXOR1_2(CH,PSRLI_64(M,57));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,8));\
-     PXOR1_2(CH,PSRLI_64(M,56));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,9));\
-     PXOR1_2(CH,PSRLI_64(M,55));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,10));\
-     PXOR1_2(CH,PSRLI_64(M,54));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,11));\
-     PXOR1_2(CH,PSRLI_64(M,53));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,12));\
-     PXOR1_2(CH,PSRLI_64(M,52));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,13));\
-     PXOR1_2(CH,PSRLI_64(M,51));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,14));\
-     PXOR1_2(CH,PSRLI_64(M,50));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,15));\
-     PXOR1_2(CH,PSRLI_64(M,49));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,16));\
-     PXOR1_2(CH,PSRLI_64(M,48));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,17));\
-     PXOR1_2(CH,PSRLI_64(M,47));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,18));\
-     PXOR1_2(CH,PSRLI_64(M,46));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,19));\
-     PXOR1_2(CH,PSRLI_64(M,45));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,20));\
-     PXOR1_2(CH,PSRLI_64(M,44));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,21));\
-     PXOR1_2(CH,PSRLI_64(M,43));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,22));\
-     PXOR1_2(CH,PSRLI_64(M,42));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,23));\
-     PXOR1_2(CH,PSRLI_64(M,41));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,24));\
-     PXOR1_2(CH,PSRLI_64(M,40));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,25));\
-     PXOR1_2(CH,PSRLI_64(M,39));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,26));\
-     PXOR1_2(CH,PSRLI_64(M,38));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,27));\
-     PXOR1_2(CH,PSRLI_64(M,37));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,28));\
-     PXOR1_2(CH,PSRLI_64(M,36));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,29));\
-     PXOR1_2(CH,PSRLI_64(M,35));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,30));\
-     PXOR1_2(CH,PSRLI_64(M,34));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,31));\
-     PXOR1_2(CH,PSRLI_64(M,33));\
- \
--    C1=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4));
-+    (C1)=PXOR_(PUNPACKLO_64(CL,CH),PSLLI_128(PUNPACKHI_64(CL,CH),4));
- 
- 
- /* ~ 16*13 = 208 instructions */
- /* Classical, 220 = 7+199+14 instructions */
- #define PMUL64_NO_UNROLLED_WS_CLAS2_GF2X(C1,A0,B0,RA,RAM,RB,CL,CH,CLM,CHM,M,\
-                                          one32,i) \
--    RA=PSET_32((A0)>>32,(A0)>>32,A0,A0);\
--    RAM=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\
--    RB=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\
--\
--    C1=PMASK32_ONE(PAND_(RB,one32),one32);\
--    CL=PAND_(RA,C1);\
--    CH=PSETZERO();\
--    CLM=PAND_(RAM,C1);\
--    CHM=PSETZERO();\
--    for(i=1;i<16;++i)\
-+    (RA)=PSET_32((A0)>>32,(A0)>>32,A0,A0);\
-+    (RAM)=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\
-+    (RB)=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\
-+\
-+    (C1)=PMASK32_ONE(PAND_(RB,one32),one32);\
-+    (CL)=PAND_(RA,C1);\
-+    (CH)=PSETZERO();\
-+    (CLM)=PAND_(RAM,C1);\
-+    (CHM)=PSETZERO();\
-+    for((i)=1;(i)<16;++(i))\
-     {\
--        C1=PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32);\
--        M=PAND_(RA,C1);\
-+        (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,i),one32),one32);\
-+        (M)=PAND_(RA,C1);\
-         PXOR1_2(CL,PSLLI_32(M,i));\
--        PXOR1_2(CH,PSRLI_32(M,32-i));\
--        M=PAND_(RAM,C1);\
-+        PXOR1_2(CH,PSRLI_32(M,32-(i)));\
-+        (M)=PAND_(RAM,C1);\
-         PXOR1_2(CLM,PSLLI_32(M,i));\
--        PXOR1_2(CHM,PSRLI_32(M,32-i));\
-+        PXOR1_2(CHM,PSRLI_32(M,32-(i)));\
-     }\
- \
--    RAM=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\
--    CLM=PUNPACKLO_32(CL,CH);\
--    CHM=PUNPACKHI_32(CL,CH);\
--    C1=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\
-+    (RAM)=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\
-+    (CLM)=PUNPACKLO_32(CL,CH);\
-+    (CHM)=PUNPACKHI_32(CL,CH);\
-+    (C1)=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\
-     PXOR1_2(C1,\
-             PSLLI_128(PAND_(PXOR_(RAM,PSRLI_128(RAM,6)),PSET_64(0,MONE64)),4));
- 
-@@ -1271,124 +1271,124 @@
- /* Classical, 219 = 7+198+14 instructions */
- /* The fastest method */
- #define PMUL64_WS_CLAS2_GF2X(C1,A0,B0,RA,RAM,RB,CL,CH,CLM,CHM,M,one32) \
--    RA=PSET_32((A0)>>32,(A0)>>32,A0,A0);\
--    RAM=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\
--    RB=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\
--\
--    C1=PMASK32_ONE(PAND_(RB,one32),one32);\
--    CL=PAND_(RA,C1);\
--    CLM=PAND_(RAM,C1);\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (RA)=PSET_32((A0)>>32,(A0)>>32,A0,A0);\
-+    (RAM)=PSET_32(A0,A0,(A0)>>32,(A0)>>32);\
-+    (RB)=PSET_32((B0)>>48,(B0)>>32,(B0)>>16,B0);\
-+\
-+    (C1)=PMASK32_ONE(PAND_(RB,one32),one32);\
-+    (CL)=PAND_(RA,C1);\
-+    (CLM)=PAND_(RAM,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,1),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,1));\
--    CH=PSRLI_32(M,31);\
--    M=PAND_(RAM,C1);\
-+    (CH)=PSRLI_32(M,31);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,1));\
--    CHM=PSRLI_32(M,31);\
-+    (CHM)=PSRLI_32(M,31);\
- \
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,2),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,2));\
-     PXOR1_2(CH,PSRLI_32(M,30));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,2));\
-     PXOR1_2(CHM,PSRLI_32(M,30));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,3),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,3));\
-     PXOR1_2(CH,PSRLI_32(M,29));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,3));\
-     PXOR1_2(CHM,PSRLI_32(M,29));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,4),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,4));\
-     PXOR1_2(CH,PSRLI_32(M,28));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,4));\
-     PXOR1_2(CHM,PSRLI_32(M,28));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,5),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,5));\
-     PXOR1_2(CH,PSRLI_32(M,27));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,5));\
-     PXOR1_2(CHM,PSRLI_32(M,27));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,6),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,6));\
-     PXOR1_2(CH,PSRLI_32(M,26));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,6));\
-     PXOR1_2(CHM,PSRLI_32(M,26));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,7),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,7));\
-     PXOR1_2(CH,PSRLI_32(M,25));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,7));\
-     PXOR1_2(CHM,PSRLI_32(M,25));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,8),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,8),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,8));\
-     PXOR1_2(CH,PSRLI_32(M,24));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,8));\
-     PXOR1_2(CHM,PSRLI_32(M,24));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,9),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,9),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,9));\
-     PXOR1_2(CH,PSRLI_32(M,23));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,9));\
-     PXOR1_2(CHM,PSRLI_32(M,23));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,10),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,10),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,10));\
-     PXOR1_2(CH,PSRLI_32(M,22));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,10));\
-     PXOR1_2(CHM,PSRLI_32(M,22));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,11),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,11),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,11));\
-     PXOR1_2(CH,PSRLI_32(M,21));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,11));\
-     PXOR1_2(CHM,PSRLI_32(M,21));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,12),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,12),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,12));\
-     PXOR1_2(CH,PSRLI_32(M,20));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,12));\
-     PXOR1_2(CHM,PSRLI_32(M,20));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,13),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,13),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,13));\
-     PXOR1_2(CH,PSRLI_32(M,19));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,13));\
-     PXOR1_2(CHM,PSRLI_32(M,19));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,14),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,14),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,14));\
-     PXOR1_2(CH,PSRLI_32(M,18));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,14));\
-     PXOR1_2(CHM,PSRLI_32(M,18));\
--    C1=PMASK32_ONE(PAND_(PSRLI_32(RB,15),one32),one32);\
--    M=PAND_(RA,C1);\
-+    (C1)=PMASK32_ONE(PAND_(PSRLI_32(RB,15),one32),one32);\
-+    (M)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_32(M,15));\
-     PXOR1_2(CH,PSRLI_32(M,17));\
--    M=PAND_(RAM,C1);\
-+    (M)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_32(M,15));\
-     PXOR1_2(CHM,PSRLI_32(M,17));\
- \
--    RAM=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\
--    CLM=PUNPACKLO_32(CL,CH);\
--    CHM=PUNPACKHI_32(CL,CH);\
--    C1=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\
-+    (RAM)=PXOR_(PUNPACKLO_32(CLM,CHM),PUNPACKHI_32(CLM,CHM));\
-+    (CLM)=PUNPACKLO_32(CL,CH);\
-+    (CHM)=PUNPACKHI_32(CL,CH);\
-+    (C1)=PXOR_(PUNPACKLO_64(CLM,CHM),PSLLI_128(PUNPACKHI_64(CLM,CHM),2));\
-     PXOR1_2(C1,\
-             PSLLI_128(PAND_(PXOR_(RAM,PSRLI_128(RAM,6)),PSET_64(0,MONE64)),4));
- 
-@@ -1397,12 +1397,12 @@
- #define PMUL64_WS_KAR_KAR32_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,CM1,CM2,one32) \
-     PMUL32_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM1,one32);\
-     PMUL32_WS_KAR_GF2X(M,(A0)>>32,(B0)>>32,RA,RB,CL,CH,CM1,CM2,one32);\
--    CM=PXOR_(C1,M);\
-+    (CM)=PXOR_(C1,M);\
-     PXOR1_2(C1,PSLLI_128(M,8));\
-     PMUL32_WS_KAR_GF2X(M,(A0)^((A0)>>32),(B0)^((B0)>>32),\
-                        RA,RB,CL,CH,CM1,CM2,one32);\
- \
--    M=PXOR_(CM,M);\
-+    (M)=PXOR_(CM,M);\
-     PXOR1_2(C1,PSLLI_128(M,4));
- 
- 
-@@ -1411,11 +1411,11 @@
- #define PMUL64_WS_KAR_GF2X(C1,A0,B0,RA,RB,CL,CH,M,CM,one32) \
-     PMUL32_WS_CLAS_GF2X(C1,A0,B0,RA,RB,CL,CH,M,one32);\
-     PMUL32_WS_CLAS_GF2X(M,(A0)>>32,(B0)>>32,RA,RB,CL,CH,M,one32);\
--    CM=PXOR_(C1,M);\
-+    (CM)=PXOR_(C1,M);\
-     PXOR1_2(C1,PSLLI_128(M,8));\
-     PMUL32_WS_CLAS_GF2X(M,(A0)^((A0)>>32),(B0)^((B0)>>32),RA,RB,CL,CH,M,one32);\
- \
--    M=PXOR_(CM,M);\
-+    (M)=PXOR_(CM,M);\
-     PXOR1_2(C1,PSLLI_128(M,4));
- 
- 
-@@ -1424,223 +1424,223 @@
- /* Classical, 511 = 2+507+2 instructions */
- #define PMUL64x2_NO_UNROLLED_WS_CLAS_GF2X(C1,C2,A0,A1,B0,B1,RA,RB,CL,CH,M,\
-                                           one64,i) \
--    RA=PSET_64(A1,A0);\
--    RB=PSET_64(B1,B0);\
-+    (RA)=PSET_64(A1,A0);\
-+    (RB)=PSET_64(B1,B0);\
- \
--    CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
--    CH=PSETZERO();\
--    for(i=1;i<64;++i)\
-+    (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
-+    (CH)=PSETZERO();\
-+    for((i)=1;(i)<64;++(i))\
-     {\
--        M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\
-+        (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64));\
-         PXOR1_2(CL,PSLLI_64(M,i));\
--        PXOR1_2(CH,PSRLI_64(M,64-i));\
-+        PXOR1_2(CH,PSRLI_64(M,64-(i)));\
-     }\
--    C1=PUNPACKLO_64(CL,CH);\
--    C2=PUNPACKHI_64(CL,CH);
-+    (C1)=PUNPACKLO_64(CL,CH);\
-+    (C2)=PUNPACKHI_64(CL,CH);
- 
- 
- /* ~ 64*8 = 512 instructions */
- /* Classical, 509 = 2+505+2 instructions */
- #define PMUL64x2_WS_CLAS_GF2X(C1,C2,A0,A1,B0,B1,RA,RB,M,one64) \
--    RA=PSET_64(A1,A0);\
--    RB=PSET_64(B1,B0);\
-+    (RA)=PSET_64(A1,A0);\
-+    (RB)=PSET_64(B1,B0);\
- \
--    CL=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\
-+    (CL)=PAND_(RA,PMASK64_ONE(PAND_(RB,one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,1));\
--    CH=PSRLI_64(M,63);\
-+    (CH)=PSRLI_64(M,63);\
- \
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,2));\
-     PXOR1_2(CH,PSRLI_64(M,62));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,3));\
-     PXOR1_2(CH,PSRLI_64(M,61));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,4));\
-     PXOR1_2(CH,PSRLI_64(M,60));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,5));\
-     PXOR1_2(CH,PSRLI_64(M,59));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,6));\
-     PXOR1_2(CH,PSRLI_64(M,58));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,7));\
-     PXOR1_2(CH,PSRLI_64(M,57));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,8));\
-     PXOR1_2(CH,PSRLI_64(M,56));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,9));\
-     PXOR1_2(CH,PSRLI_64(M,55));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,10));\
-     PXOR1_2(CH,PSRLI_64(M,54));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,11));\
-     PXOR1_2(CH,PSRLI_64(M,53));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,12));\
-     PXOR1_2(CH,PSRLI_64(M,52));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,13));\
-     PXOR1_2(CH,PSRLI_64(M,51));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,14));\
-     PXOR1_2(CH,PSRLI_64(M,50));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,15));\
-     PXOR1_2(CH,PSRLI_64(M,49));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,16));\
-     PXOR1_2(CH,PSRLI_64(M,48));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,17));\
-     PXOR1_2(CH,PSRLI_64(M,47));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,18));\
-     PXOR1_2(CH,PSRLI_64(M,46));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,19));\
-     PXOR1_2(CH,PSRLI_64(M,45));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,20));\
-     PXOR1_2(CH,PSRLI_64(M,44));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,21));\
-     PXOR1_2(CH,PSRLI_64(M,43));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,22));\
-     PXOR1_2(CH,PSRLI_64(M,42));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,23));\
-     PXOR1_2(CH,PSRLI_64(M,41));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,24));\
-     PXOR1_2(CH,PSRLI_64(M,40));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,25));\
-     PXOR1_2(CH,PSRLI_64(M,39));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,26));\
-     PXOR1_2(CH,PSRLI_64(M,38));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,27));\
-     PXOR1_2(CH,PSRLI_64(M,37));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,28));\
-     PXOR1_2(CH,PSRLI_64(M,36));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,29));\
-     PXOR1_2(CH,PSRLI_64(M,35));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,30));\
-     PXOR1_2(CH,PSRLI_64(M,34));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,31));\
-     PXOR1_2(CH,PSRLI_64(M,33));\
- \
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,32));\
-     PXOR1_2(CH,PSRLI_64(M,32));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,33));\
-     PXOR1_2(CH,PSRLI_64(M,31));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,34));\
-     PXOR1_2(CH,PSRLI_64(M,30));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,35));\
-     PXOR1_2(CH,PSRLI_64(M,29));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,36));\
-     PXOR1_2(CH,PSRLI_64(M,28));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,37));\
-     PXOR1_2(CH,PSRLI_64(M,27));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,38));\
-     PXOR1_2(CH,PSRLI_64(M,26));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,39));\
-     PXOR1_2(CH,PSRLI_64(M,25));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,40));\
-     PXOR1_2(CH,PSRLI_64(M,24));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,41));\
-     PXOR1_2(CH,PSRLI_64(M,23));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,42));\
-     PXOR1_2(CH,PSRLI_64(M,22));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,43));\
-     PXOR1_2(CH,PSRLI_64(M,21));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,44));\
-     PXOR1_2(CH,PSRLI_64(M,20));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,45));\
-     PXOR1_2(CH,PSRLI_64(M,19));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,46));\
-     PXOR1_2(CH,PSRLI_64(M,18));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,47));\
-     PXOR1_2(CH,PSRLI_64(M,17));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,48));\
-     PXOR1_2(CH,PSRLI_64(M,16));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,49));\
-     PXOR1_2(CH,PSRLI_64(M,15));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,50));\
-     PXOR1_2(CH,PSRLI_64(M,14));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,51));\
-     PXOR1_2(CH,PSRLI_64(M,13));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,52));\
-     PXOR1_2(CH,PSRLI_64(M,12));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,53));\
-     PXOR1_2(CH,PSRLI_64(M,11));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,54));\
-     PXOR1_2(CH,PSRLI_64(M,10));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,55));\
-     PXOR1_2(CH,PSRLI_64(M,9));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,56));\
-     PXOR1_2(CH,PSRLI_64(M,8));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,57));\
-     PXOR1_2(CH,PSRLI_64(M,7));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,58));\
-     PXOR1_2(CH,PSRLI_64(M,6));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,59));\
-     PXOR1_2(CH,PSRLI_64(M,5));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,60));\
-     PXOR1_2(CH,PSRLI_64(M,4));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,61));\
-     PXOR1_2(CH,PSRLI_64(M,3));\
--    M=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64));\
-     PXOR1_2(CL,PSLLI_64(M,62));\
-     PXOR1_2(CH,PSRLI_64(M,2));\
-     /* Optimization: the '&1' is removed */\
--    M=PAND_(RA,PMASK64_ONE(PSRLI_64(RB,63),one64));\
-+    (M)=PAND_(RA,PMASK64_ONE(PSRLI_64(RB,63),one64));\
-     PXOR1_2(CL,PSLLI_64(M,63));\
-     PXOR1_2(CH,PSRLI_64(M,1));\
- \
--    C1=PUNPACKLO_64(CL,CH);\
--    C2=PUNPACKHI_64(CL,CH);\
-+    (C1)=PUNPACKLO_64(CL,CH);\
-+    (C2)=PUNPACKHI_64(CL,CH);\
- 
- 
- /* Karatsuba, 525 = 223*2+71*1+8 instructions */
-@@ -1660,29 +1660,29 @@
- /* The fastest classical method */
- #define PMUL128_NO_UNROLLED_WS_CLAS_GF2X(C1,C2,A,B,RA,RAM,RB,CL,CH,CLM,CHM,\
-                                          one64,i) \
--    RA=PSET_64((A)[1],(A)[0]);\
--    RAM=PSET_64((A)[0],(A)[1]);\
--    RB=PSET_64((B)[1],(B)[0]);\
--\
--    C1=PMASK64_ONE(PAND_(RB,one64),one64);\
--    CL=PAND_(RA,C1);\
--    CH=PSETZERO();\
--    CLM=PAND_(RAM,C1);\
--    CHM=PSETZERO();\
--    for(i=1;i<64;++i)\
-+    (RA)=PSET_64((A)[1],(A)[0]);\
-+    (RAM)=PSET_64((A)[0],(A)[1]);\
-+    (RB)=PSET_64((B)[1],(B)[0]);\
-+\
-+    (C1)=PMASK64_ONE(PAND_(RB,one64),one64);\
-+    (CL)=PAND_(RA,C1);\
-+    (CH)=PSETZERO();\
-+    (CLM)=PAND_(RAM,C1);\
-+    (CHM)=PSETZERO();\
-+    for((i)=1;(i)<64;++(i))\
-     {\
--        C1=PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64);\
--        C2=PAND_(RA,C1);\
-+        (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,i),one64),one64);\
-+        (C2)=PAND_(RA,C1);\
-         PXOR1_2(CL,PSLLI_64(C2,i));\
--        PXOR1_2(CH,PSRLI_64(C2,64-i));\
--        C2=PAND_(RAM,C1);\
-+        PXOR1_2(CH,PSRLI_64(C2,64-(i)));\
-+        (C2)=PAND_(RAM,C1);\
-         PXOR1_2(CLM,PSLLI_64(C2,i));\
--        PXOR1_2(CHM,PSRLI_64(C2,64-i));\
-+        PXOR1_2(CHM,PSRLI_64(C2,64-(i)));\
-     }\
--    C1=PUNPACKLO_64(CL,CH);\
--    C2=PUNPACKHI_64(CL,CH);\
-+    (C1)=PUNPACKLO_64(CL,CH);\
-+    (C2)=PUNPACKHI_64(CL,CH);\
- \
--    RAM=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\
-+    (RAM)=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\
-     PXOR1_2(C1,PLSHIFT64(RAM));\
-     PXOR1_2(C2,PRSHIFT64(RAM));
- 
-@@ -1690,462 +1690,462 @@
- /* ~ 64*13 = 832 instructions */
- /* Classical, 832 = 3+820+9 instructions */
- #define PMUL128_WS_CLAS_GF2X(C1,C2,A,B,RA,RAM,RB,CL,CH,CLM,CHM,one64) \
--    RA=PSET_64((A)[1],(A)[0]);\
--    RAM=PSET_64((A)[0],(A)[1]);\
--    RB=PSET_64((B)[1],(B)[0]);\
--\
--    C1=PMASK64_ONE(PAND_(RB,one64),one64);\
--    CL=PAND_(RA,C1);\
--    CLM=PAND_(RAM,C1);\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (RA)=PSET_64((A)[1],(A)[0]);\
-+    (RAM)=PSET_64((A)[0],(A)[1]);\
-+    (RB)=PSET_64((B)[1],(B)[0]);\
-+\
-+    (C1)=PMASK64_ONE(PAND_(RB,one64),one64);\
-+    (CL)=PAND_(RA,C1);\
-+    (CLM)=PAND_(RAM,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,1),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,1));\
--    CH=PSRLI_64(C2,63);\
--    C2=PAND_(RAM,C1);\
-+    (CH)=PSRLI_64(C2,63);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,1));\
--    CHM=PSRLI_64(C2,63);\
-+    (CHM)=PSRLI_64(C2,63);\
- \
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,2),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,2));\
-     PXOR1_2(CH,PSRLI_64(C2,62));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,2));\
-     PXOR1_2(CHM,PSRLI_64(C2,62));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,3),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,3));\
-     PXOR1_2(CH,PSRLI_64(C2,61));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,3));\
-     PXOR1_2(CHM,PSRLI_64(C2,61));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,4),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,4));\
-     PXOR1_2(CH,PSRLI_64(C2,60));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,4));\
-     PXOR1_2(CHM,PSRLI_64(C2,60));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,5),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,5));\
-     PXOR1_2(CH,PSRLI_64(C2,59));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,5));\
-     PXOR1_2(CHM,PSRLI_64(C2,59));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,6),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,6));\
-     PXOR1_2(CH,PSRLI_64(C2,58));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,6));\
-     PXOR1_2(CHM,PSRLI_64(C2,58));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,7),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,7));\
-     PXOR1_2(CH,PSRLI_64(C2,57));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,7));\
-     PXOR1_2(CHM,PSRLI_64(C2,57));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,8),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,8));\
-     PXOR1_2(CH,PSRLI_64(C2,56));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,8));\
-     PXOR1_2(CHM,PSRLI_64(C2,56));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,9),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,9));\
-     PXOR1_2(CH,PSRLI_64(C2,55));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,9));\
-     PXOR1_2(CHM,PSRLI_64(C2,55));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,10),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,10));\
-     PXOR1_2(CH,PSRLI_64(C2,54));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,10));\
-     PXOR1_2(CHM,PSRLI_64(C2,54));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,11),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,11));\
-     PXOR1_2(CH,PSRLI_64(C2,53));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,11));\
-     PXOR1_2(CHM,PSRLI_64(C2,53));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,12),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,12));\
-     PXOR1_2(CH,PSRLI_64(C2,52));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,12));\
-     PXOR1_2(CHM,PSRLI_64(C2,52));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,13),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,13));\
-     PXOR1_2(CH,PSRLI_64(C2,51));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,13));\
-     PXOR1_2(CHM,PSRLI_64(C2,51));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,14),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,14));\
-     PXOR1_2(CH,PSRLI_64(C2,50));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,14));\
-     PXOR1_2(CHM,PSRLI_64(C2,50));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,15),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,15));\
-     PXOR1_2(CH,PSRLI_64(C2,49));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,15));\
-     PXOR1_2(CHM,PSRLI_64(C2,49));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,16),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,16));\
-     PXOR1_2(CH,PSRLI_64(C2,48));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,16));\
-     PXOR1_2(CHM,PSRLI_64(C2,48));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,17),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,17));\
-     PXOR1_2(CH,PSRLI_64(C2,47));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,17));\
-     PXOR1_2(CHM,PSRLI_64(C2,47));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,18),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,18));\
-     PXOR1_2(CH,PSRLI_64(C2,46));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,18));\
-     PXOR1_2(CHM,PSRLI_64(C2,46));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,19),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,19));\
-     PXOR1_2(CH,PSRLI_64(C2,45));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,19));\
-     PXOR1_2(CHM,PSRLI_64(C2,45));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,20),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,20));\
-     PXOR1_2(CH,PSRLI_64(C2,44));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,20));\
-     PXOR1_2(CHM,PSRLI_64(C2,44));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,21),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,21));\
-     PXOR1_2(CH,PSRLI_64(C2,43));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,21));\
-     PXOR1_2(CHM,PSRLI_64(C2,43));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,22),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,22));\
-     PXOR1_2(CH,PSRLI_64(C2,42));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,22));\
-     PXOR1_2(CHM,PSRLI_64(C2,42));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,23),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,23));\
-     PXOR1_2(CH,PSRLI_64(C2,41));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,23));\
-     PXOR1_2(CHM,PSRLI_64(C2,41));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,24),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,24));\
-     PXOR1_2(CH,PSRLI_64(C2,40));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,24));\
-     PXOR1_2(CHM,PSRLI_64(C2,40));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,25),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,25));\
-     PXOR1_2(CH,PSRLI_64(C2,39));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,25));\
-     PXOR1_2(CHM,PSRLI_64(C2,39));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,26),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,26));\
-     PXOR1_2(CH,PSRLI_64(C2,38));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,26));\
-     PXOR1_2(CHM,PSRLI_64(C2,38));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,27),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,27));\
-     PXOR1_2(CH,PSRLI_64(C2,37));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,27));\
-     PXOR1_2(CHM,PSRLI_64(C2,37));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,28),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,28));\
-     PXOR1_2(CH,PSRLI_64(C2,36));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,28));\
-     PXOR1_2(CHM,PSRLI_64(C2,36));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,29),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,29));\
-     PXOR1_2(CH,PSRLI_64(C2,35));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,29));\
-     PXOR1_2(CHM,PSRLI_64(C2,35));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,30),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,30));\
-     PXOR1_2(CH,PSRLI_64(C2,34));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,30));\
-     PXOR1_2(CHM,PSRLI_64(C2,34));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,31),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,31));\
-     PXOR1_2(CH,PSRLI_64(C2,33));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,31));\
-     PXOR1_2(CHM,PSRLI_64(C2,33));\
- \
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,32),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,32));\
-     PXOR1_2(CH,PSRLI_64(C2,32));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,32));\
-     PXOR1_2(CHM,PSRLI_64(C2,32));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,33),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,33));\
-     PXOR1_2(CH,PSRLI_64(C2,31));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,33));\
-     PXOR1_2(CHM,PSRLI_64(C2,31));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,34),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,34));\
-     PXOR1_2(CH,PSRLI_64(C2,30));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,34));\
-     PXOR1_2(CHM,PSRLI_64(C2,30));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,35),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,35));\
-     PXOR1_2(CH,PSRLI_64(C2,29));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,35));\
-     PXOR1_2(CHM,PSRLI_64(C2,29));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,36),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,36));\
-     PXOR1_2(CH,PSRLI_64(C2,28));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,36));\
-     PXOR1_2(CHM,PSRLI_64(C2,28));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,37),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,37));\
-     PXOR1_2(CH,PSRLI_64(C2,27));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,37));\
-     PXOR1_2(CHM,PSRLI_64(C2,27));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,38),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,38));\
-     PXOR1_2(CH,PSRLI_64(C2,26));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,38));\
-     PXOR1_2(CHM,PSRLI_64(C2,26));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,39),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,39));\
-     PXOR1_2(CH,PSRLI_64(C2,25));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,39));\
-     PXOR1_2(CHM,PSRLI_64(C2,25));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,40),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,40));\
-     PXOR1_2(CH,PSRLI_64(C2,24));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,40));\
-     PXOR1_2(CHM,PSRLI_64(C2,24));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,41),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,41));\
-     PXOR1_2(CH,PSRLI_64(C2,23));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,41));\
-     PXOR1_2(CHM,PSRLI_64(C2,23));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,42),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,42));\
-     PXOR1_2(CH,PSRLI_64(C2,22));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,42));\
-     PXOR1_2(CHM,PSRLI_64(C2,22));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,43),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,43));\
-     PXOR1_2(CH,PSRLI_64(C2,21));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,43));\
-     PXOR1_2(CHM,PSRLI_64(C2,21));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,44),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,44));\
-     PXOR1_2(CH,PSRLI_64(C2,20));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,44));\
-     PXOR1_2(CHM,PSRLI_64(C2,20));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,45),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,45));\
-     PXOR1_2(CH,PSRLI_64(C2,19));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,45));\
-     PXOR1_2(CHM,PSRLI_64(C2,19));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,46),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,46));\
-     PXOR1_2(CH,PSRLI_64(C2,18));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,46));\
-     PXOR1_2(CHM,PSRLI_64(C2,18));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,47),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,47));\
-     PXOR1_2(CH,PSRLI_64(C2,17));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,47));\
-     PXOR1_2(CHM,PSRLI_64(C2,17));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,48),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,48));\
-     PXOR1_2(CH,PSRLI_64(C2,16));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,48));\
-     PXOR1_2(CHM,PSRLI_64(C2,16));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,49),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,49));\
-     PXOR1_2(CH,PSRLI_64(C2,15));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,49));\
-     PXOR1_2(CHM,PSRLI_64(C2,15));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,50),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,50));\
-     PXOR1_2(CH,PSRLI_64(C2,14));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,50));\
-     PXOR1_2(CHM,PSRLI_64(C2,14));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,51),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,51));\
-     PXOR1_2(CH,PSRLI_64(C2,13));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,51));\
-     PXOR1_2(CHM,PSRLI_64(C2,13));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,52),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,52));\
-     PXOR1_2(CH,PSRLI_64(C2,12));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,52));\
-     PXOR1_2(CHM,PSRLI_64(C2,12));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,53),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,53));\
-     PXOR1_2(CH,PSRLI_64(C2,11));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,53));\
-     PXOR1_2(CHM,PSRLI_64(C2,11));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,54),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,54));\
-     PXOR1_2(CH,PSRLI_64(C2,10));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,54));\
-     PXOR1_2(CHM,PSRLI_64(C2,10));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,55),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,55));\
-     PXOR1_2(CH,PSRLI_64(C2,9));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,55));\
-     PXOR1_2(CHM,PSRLI_64(C2,9));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,56),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,56));\
-     PXOR1_2(CH,PSRLI_64(C2,8));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,56));\
-     PXOR1_2(CHM,PSRLI_64(C2,8));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,57),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,57));\
-     PXOR1_2(CH,PSRLI_64(C2,7));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,57));\
-     PXOR1_2(CHM,PSRLI_64(C2,7));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,58),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,58));\
-     PXOR1_2(CH,PSRLI_64(C2,6));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,58));\
-     PXOR1_2(CHM,PSRLI_64(C2,6));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,59),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,59));\
-     PXOR1_2(CH,PSRLI_64(C2,5));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,59));\
-     PXOR1_2(CHM,PSRLI_64(C2,5));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,60),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,60));\
-     PXOR1_2(CH,PSRLI_64(C2,4));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,60));\
-     PXOR1_2(CHM,PSRLI_64(C2,4));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,61),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,61));\
-     PXOR1_2(CH,PSRLI_64(C2,3));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,61));\
-     PXOR1_2(CHM,PSRLI_64(C2,3));\
--    C1=PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PAND_(PSRLI_64(RB,62),one64),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,62));\
-     PXOR1_2(CH,PSRLI_64(C2,2));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,62));\
-     PXOR1_2(CHM,PSRLI_64(C2,2));\
-     /* Optimization: the '&1' is removed */\
--    C1=PMASK64_ONE(PSRLI_64(RB,63),one64);\
--    C2=PAND_(RA,C1);\
-+    (C1)=PMASK64_ONE(PSRLI_64(RB,63),one64);\
-+    (C2)=PAND_(RA,C1);\
-     PXOR1_2(CL,PSLLI_64(C2,63));\
-     PXOR1_2(CH,PSRLI_64(C2,1));\
--    C2=PAND_(RAM,C1);\
-+    (C2)=PAND_(RAM,C1);\
-     PXOR1_2(CLM,PSLLI_64(C2,63));\
-     PXOR1_2(CHM,PSRLI_64(C2,1));\
- \
--    C1=PUNPACKLO_64(CL,CH);\
--    C2=PUNPACKHI_64(CL,CH);\
-+    (C1)=PUNPACKLO_64(CL,CH);\
-+    (C2)=PUNPACKHI_64(CL,CH);\
- \
--    RAM=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\
-+    (RAM)=PXOR_(PUNPACKLO_64(CLM,CHM),PUNPACKHI_64(CLM,CHM));\
-     PXOR1_2(C1,PLSHIFT64(RAM));\
-     PXOR1_2(C2,PRSHIFT64(RAM));
- 
-@@ -2197,9 +2197,9 @@
-     /* A2*B2 */\
-     PMUL32_WS_CLAS_GF2X(z3,(A)[2],(B)[2],RA,RB,CL,CH,M,one32);\
- \
--    res1=PXOR_(z1,z2);\
--    res2=PXOR_(z3,z2);\
--    z2=PXOR_(res1,z3);\
-+    (res1)=PXOR_(z1,z2);\
-+    (res2)=PXOR_(z3,z2);\
-+    (z2)=PXOR_(res1,z3);\
-     /*  C[0] = C0
-         C[1] = C1^(C0^C2)
-         C[2] = C2^(C1^C3)^C0^C4
-@@ -2233,9 +2233,9 @@
-     /* A2*B2 */\
-     PMUL64_WS_KAR_GF2X(z3,(A)[2],(B)[2],RA,RB,CL,CH,M,CM,one32);\
- \
--    res1=PXOR_(z1,z2);\
--    res2=PXOR_(z3,z2);\
--    z2=PXOR_(res1,z3);\
-+    (res1)=PXOR_(z1,z2);\
-+    (res2)=PXOR_(z3,z2);\
-+    (z2)=PXOR_(res1,z3);\
-     /*  C[0] = C0
-         C[1] = C1^(C0^C2)
-         C[2] = C2^(C1^C3)^C0^C4
-@@ -2287,7 +2287,7 @@
-     PMUL128_WS_KAR_GF2X(C3,C4,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,one32);\
- \
-     PXOR1_2(C3,C2);\
--    C2=PXOR_(C3,C1);\
-+    (C2)=PXOR_(C3,C1);\
-     PXOR1_2(C3,C4);\
- \
-     RESERVED_BUF4[0]=(A)[0]^(A)[2];\
-@@ -2309,7 +2309,7 @@
-     PMUL160_WS_KAR6_GF2X(C3,C4,C5,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,\
-                          res1,res2,one32)\
-     PXOR1_2(C3,C2);\
--    C2=PXOR_(C3,C1);\
-+    (C2)=PXOR_(C3,C1);\
-     PXOR1_2(C3,C4);\
-     PXOR1_2(C4,C5);\
- \
-@@ -2335,7 +2335,7 @@
-     PMUL192_WS_KAR6_GF2X(C3,C4,C5,(A)+2,(B)+2,RA,RB,CL,CH,M,MM,CM,\
-                          res1,res2,one32)\
-     PXOR1_2(C3,C2);\
--    C2=PXOR_(C3,C1);\
-+    (C2)=PXOR_(C3,C1);\
-     PXOR1_2(C3,C4);\
-     PXOR1_2(C4,C5);\
- \
-@@ -2418,11 +2418,11 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\
-     PMUL160_WS_KAR6_GF2X(C5,C6,C7,(A)+4,(B)+4,RA,RB,CL,CH,M,M2,CM,\
-                          CM1,CM2,one32)\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
- \
-     RESERVED_BUF8[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF8[1]=(A)[1]^(A)[5];\
-@@ -2481,11 +2481,11 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\
-     PMUL192_WS_KAR6_GF2X(C5,C6,C7,(A)+4,(B)+4,RA,RB,CL,CH,M,M2,CM,\
-                          CM1,CM2,one32)\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
- \
-     RESERVED_BUF8[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF8[1]=(A)[1]^(A)[5];\
-@@ -2511,12 +2511,12 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32);\
-     PMUL224_WS_KAR_GF2X(C5,C6,C7,C8,(A)+4,(B)+4,\
-                         RA,RB,CL,CH,M,MM,CM,M3,M4,one32);\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
--    C6=PXOR_(C6,C8);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
-+    (C6)=PXOR_(C6,C8);\
- \
-     RESERVED_BUF8[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF8[1]=(A)[1]^(A)[5];\
-@@ -2542,12 +2542,12 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32);\
-     PMUL256_WS_KAR_GF2X(C5,C6,C7,C8,(A)+4,(B)+4,\
-                         RA,RB,CL,CH,M,MM,CM,M3,M4,one32);\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
--    C6=PXOR_(C6,C8);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
-+    (C6)=PXOR_(C6,C8);\
- \
-     RESERVED_BUF8[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF8[1]=(A)[1]^(A)[5];\
-@@ -2573,13 +2573,13 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\
-     PMUL288_WS_KAR_GF2X(C5,C6,C7,C8,C9,(A)+4,(B)+4,RA,RB,CL,CH,M,MM,CM,\
-                         M1,M2,M3,res1,res2,one32)\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
--    C6=PXOR_(C6,C8);\
--    C7=PXOR_(C7,C9);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
-+    (C6)=PXOR_(C6,C8);\
-+    (C7)=PXOR_(C7,C9);\
- \
-     RESERVED_BUF10[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF10[1]=(A)[1]^(A)[5];\
-@@ -2608,13 +2608,13 @@
-     PMUL256_WS_KAR_GF2X(C1,C2,C3,C4,A,B,RA,RB,CL,CH,M,MM,CM,M1,M2,one32)\
-     PMUL320_WS_KAR_GF2X(C5,C6,C7,C8,C9,(A)+4,(B)+4,RA,RB,CL,CH,M,MM,CM,\
-                         M1,M2,M3,res1,res2,one32)\
--    C5=PXOR_(C3,C5);\
--    C6=PXOR_(C4,C6);\
--    C3=PXOR_(C5,C1);\
--    C4=PXOR_(C6,C2);\
--    C5=PXOR_(C5,C7);\
--    C6=PXOR_(C6,C8);\
--    C7=PXOR_(C7,C9);\
-+    (C5)=PXOR_(C3,C5);\
-+    (C6)=PXOR_(C4,C6);\
-+    (C3)=PXOR_(C5,C1);\
-+    (C4)=PXOR_(C6,C2);\
-+    (C5)=PXOR_(C5,C7);\
-+    (C6)=PXOR_(C6,C8);\
-+    (C7)=PXOR_(C7,C9);\
- \
-     RESERVED_BUF10[0]=(A)[0]^(A)[4];\
-     RESERVED_BUF10[1]=(A)[1]^(A)[5];\
-@@ -2729,62 +2729,62 @@
- /* Classical: 4 mul64, 5 other instructions */
- #define PCLMUL128_WS_CLAS_GF2X(z1,z2,x,y,sum,res_low,res_high) \
-     /* X^0 */\
--    res_low=PCLMUL(x,y,0);\
-+    (res_low)=PCLMUL(x,y,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x, y, 1);\
--    z2=PCLMUL(x, y, 0x10);\
--    res_high=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x, y, 1);\
-+    (z2)=PCLMUL(x, y, 0x10);\
-+    (res_high)=PXOR_(z1,z2);\
- \
-     /* mid2_low: x^64 ... x^127 */\
--    sum=PLSHIFT64(res_high);\
-+    (sum)=PLSHIFT64(res_high);\
-     /* mid2_low + L */\
--    z1=PXOR_(res_low,sum);\
-+    (z1)=PXOR_(res_low,sum);\
- \
-     /* X^128 */\
--    res_low=PCLMUL(x,y,0x11);\
-+    (res_low)=PCLMUL(x,y,0x11);\
- \
-     /* mid2_high: x^128 ... x^191 */\
--    sum=PRSHIFT64(res_high);\
-+    (sum)=PRSHIFT64(res_high);\
-     /* mid2_high + H */\
--    z2=PXOR_(res_low,sum);
-+    (z2)=PXOR_(res_low,sum);
- 
- 
- /* Classical: 4 mul64, 7 other instructions */
- #define PCLMUL128_ADD_CLAS_GF2X(z3,z4,z1,z2,x,y,sum,res_low,res_high) \
-     /* X^0 */\
--    res_low=PCLMUL(x,y,0);\
-+    (res_low)=PCLMUL(x,y,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x, y, 1);\
--    z2=PCLMUL(x, y, 0x10);\
--    res_high=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x, y, 1);\
-+    (z2)=PCLMUL(x, y, 0x10);\
-+    (res_high)=PXOR_(z1,z2);\
- \
-     /* mid2_low: x^64 ... x^127 */\
--    sum=PLSHIFT64(res_high);\
-+    (sum)=PLSHIFT64(res_high);\
-     /* mid2_low + L */\
--    z3^=PXOR_(res_low,sum);\
-+    (z3)^=PXOR_(res_low,sum);\
- \
-     /* X^128 */\
--    res_low=PCLMUL(x,y,0x11);\
-+    (res_low)=PCLMUL(x,y,0x11);\
- \
-     /* mid2_high: x^128 ... x^191 */\
--    sum=PRSHIFT64(res_high);\
-+    (sum)=PRSHIFT64(res_high);\
-     /* mid2_high + H */\
--    z4^=PXOR_(res_low,sum);
-+    (z4)^=PXOR_(res_low,sum);
- 
- 
- /* Karatsuba: 3 mul64, 10 other instructions */
- #define PCLMUL128_WS_KAR_GF2X(z1,z2,x,y,sum,res_low,res_high) \
-     /* X^0 */\
--    z1=PCLMUL(x,y,0);\
-+    (z1)=PCLMUL(x,y,0);\
-     /* X^128 */\
--    z2=PCLMUL(x,y,0x11);\
-+    (z2)=PCLMUL(x,y,0x11);\
- \
--    res_low=PXOR_(x,PRSHIFT64(x));\
--    res_high=PXOR_(y,PRSHIFT64(y));\
-+    (res_low)=PXOR_(x,PRSHIFT64(x));\
-+    (res_high)=PXOR_(y,PRSHIFT64(y));\
- \
--    sum=PCLMUL(res_low,res_high,0);\
-+    (sum)=PCLMUL(res_low,res_high,0);\
-     PXOR1_2(sum,z1);\
-     PXOR1_2(sum,z2);\
- \
-@@ -2795,14 +2795,14 @@
- /* Karatsuba: 3 mul64, 12 other instructions */
- #define PCLMUL128_ADD_KAR_GF2X(z3,z4,z1,z2,x,y,sum,res_low,res_high) \
-     /* X^0 */\
--    z1=PCLMUL(x,y,0);\
-+    (z1)=PCLMUL(x,y,0);\
-     /* X^128 */\
--    z2=PCLMUL(x,y,0x11);\
-+    (z2)=PCLMUL(x,y,0x11);\
- \
--    res_low=PXOR_(x,PRSHIFT64(x));\
--    res_high=PXOR_(y,PRSHIFT64(y));\
-+    (res_low)=PXOR_(x,PRSHIFT64(x));\
-+    (res_high)=PXOR_(y,PRSHIFT64(y));\
- \
--    sum=PCLMUL(res_low,res_high,0);\
-+    (sum)=PCLMUL(res_low,res_high,0);\
-     PXOR1_2(sum,z1);\
-     PXOR1_2(sum,z2);\
- \
-@@ -2815,50 +2815,50 @@
- /* Classical: 9 mul64, 1 PMIDDLE, 9 other instructions */
- #define PCLMUL192_WS_CLAS_GF2X(z3,z1,z2,x1,x2,y1,y2,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PCLMUL(x1, y2, 1);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PCLMUL(x1, y2, 1);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    res1=PCLMUL(x2,y2,0);\
--    res2=PRSHIFT64(sum);\
--    z2=PXOR_(res1,res2);
-+    (res1)=PCLMUL(x2,y2,0);\
-+    (res2)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,res2);
- 
- 
- /* Karatsuba: 6 mul64, 1 PMIDDLE, 19 other instructions */
- #define PCLMUL192_WS_KAR_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2) \
-     {__m128i u31;\
-     /* A0*B0 */\
--    z1=PCLMUL(x1,y1,0);\
-+    (z1)=PCLMUL(x1,y1,0);\
-     /* A1*B1 */\
--    z2=PCLMUL(x1,y1,0x11);\
-+    (z2)=PCLMUL(x1,y1,0x11);\
-     /* A2*B2 */\
--    z3=PCLMUL(x2,y2,0);\
-+    (z3)=PCLMUL(x2,y2,0);\
- \
--    res1=PXOR_(z1,z2);\
--    res2=PXOR_(z3,z2);\
--    z2=PXOR_(res1,z3);\
-+    (res1)=PXOR_(z1,z2);\
-+    (res2)=PXOR_(z3,z2);\
-+    (z2)=PXOR_(res1,z3);\
-     /*  C[0] = C0
-         C[1] = C1^(C0^C2)
-         C[2] = C2^(C1^C3)^C0^C4
-@@ -2866,9 +2866,9 @@
-         C[4] = C4^(C5^C3)
-         C[5] = C5 */\
-     /* (A2 A2) */\
--    u31=PSHUFFLE_32_1010(x2);\
-+    (u31)=PSHUFFLE_32_1010(x2);\
-     /* (B2 B2) */\
--    sum=PSHUFFLE_32_1010(y2);\
-+    (sum)=PSHUFFLE_32_1010(y2);\
-     /* (A2 A2) ^ (A0 A1) */\
-     PXOR1_2(u31,x1);\
-     /* (B2 B2) ^ (B0 B1) */\
-@@ -2890,13 +2890,13 @@
- #define PCLMUL192_WS_KAR256_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2) \
-     {__m128i x,y,u31,u32;\
-     PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\
--    z3=PCLMUL(x2,y2,0);\
-+    (z3)=PCLMUL(x2,y2,0);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z3,z2);\
--    z2=PXOR_(z3,z1);\
-+    (z2)=PXOR_(z3,z1);\
- \
-     PCLMUL128_ADD_GF2X(z2,z3,u31,u32,x,y,sum,res1,res2);}
- 
-@@ -2904,108 +2904,108 @@
- /* Classical: 16 mul64, 2 PMIDDLE, 15 other instructions */
- #define PCLMUL256_WS_CLAS_GF2X(z3,z4,z1,z2,x1,x2,y1,y2,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(y1, x2, 0x11);\
--    z2=PCLMUL(y2, x2, 0);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0x11);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x11);\
-+    (z2)=PCLMUL(y2, x2, 0);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0x11);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    res1=PCLMUL(x2,y2,0x11);\
--    sum=PRSHIFT64(res2);\
--    z2=PXOR_(res1,sum);
-+    (res1)=PCLMUL(x2,y2,0x11);\
-+    (sum)=PRSHIFT64(res2);\
-+    (z2)=PXOR_(res1,sum);
- 
- 
- /* Classical: 16 mul64, 2 PMIDDLE, 19 other instructions */
- /* xor the res to z3,z4,z5,z6 */
- #define PCLMUL256_ADD_CLAS_GF2X(z3,z4,z5,z6,z1,z2,x1,x2,y1,y2,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3^=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)^=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4^=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)^=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(y1, x2, 0x11);\
--    z2=PCLMUL(y2, x2, 0);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0x11);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x11);\
-+    (z2)=PCLMUL(y2, x2, 0);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0x11);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z5^=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z5)^=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    res1=PCLMUL(x2,y2,0x11);\
--    sum=PRSHIFT64(res2);\
--    z6^=PXOR_(res1,sum);
-+    (res1)=PCLMUL(x2,y2,0x11);\
-+    (sum)=PRSHIFT64(res2);\
-+    (z6)^=PXOR_(res1,sum);
- 
- 
- /* Karatsuba: 3 mul128, 5 other instructions */
-@@ -3014,11 +3014,11 @@
-     PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\
-     PCLMUL128_WS_GF2X(z3,z4,x2,y2,sum,res1,res2);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z3,z2);\
--    z2=PXOR_(z3,z1);\
-+    (z2)=PXOR_(z3,z1);\
-     PXOR1_2(z3,z4);\
- \
-     PCLMUL128_ADD_GF2X(z2,z3,u41,u42,x,y,sum,res1,res2);}
-@@ -3030,8 +3030,8 @@
-     PCLMUL128_WS_GF2X(u41,u42,x1,y1,sum,res1,res2);\
-     PCLMUL128_WS_GF2X(z5,z6,x2,y2,sum,res1,res2);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z1,u41);\
-     PXOR1_2(z4,z6);\
-@@ -3049,11 +3049,11 @@
-     PCLMUL128_WS_KAR_GF2X(z1,z2,x1,y1,sum,res1,res2);\
-     PCLMUL128_WS_KAR_GF2X(z3,z4,x2,y2,sum,res1,res2);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z3,z2);\
--    z2=PXOR_(z3,z1);\
-+    (z2)=PXOR_(z3,z1);\
-     PXOR1_2(z3,z4);\
- \
-     PCLMUL128_ADD_KAR_GF2X(z2,z3,u41,u42,x,y,sum,res1,res2);}
-@@ -3065,8 +3065,8 @@
-     PCLMUL128_WS_KAR_GF2X(u41,u42,x1,y1,sum,res1,res2);\
-     PCLMUL128_WS_KAR_GF2X(z5,z6,x2,y2,sum,res1,res2);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z1,u41);\
-     PXOR1_2(z4,z6);\
-@@ -3081,77 +3081,77 @@
- /* Classical: 25 mul64, 3 PMIDDLE, 23 other instructions */
- #define PCLMUL320_WS_CLAS_GF2X(z3,z4,z5,z1,z2,x1,x2,x3,y1,y2,y3,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y3, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z5=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z5)=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x2, y3, 0);\
--    z2=PCLMUL(x2, y2, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y2, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x2, y3, 0);\
-+    (z2)=PCLMUL(x2, y2, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x2, y3, 1);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y3, 1);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
- \
-     /* X^512 */\
--    res1=PCLMUL(x3,y3,0);\
--    res2=PRSHIFT64(sum);\
--    z2=PXOR_(res1,res2);
-+    (res1)=PCLMUL(x3,y3,0);\
-+    (res2)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,res2);
- 
- 
- /* Classical: 25 mul64, 3 PMIDDLE, 28 other instructions */
-@@ -3159,77 +3159,77 @@
- #define PCLMUL320_ADD_CLAS_GF2X(z3,z4,z5,z6,z7,z1,z2,x1,x2,x3,y1,y2,y3,\
-                                 sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3^=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)^=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4^=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)^=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y3, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z5^=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z5)^=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x2, y3, 0);\
--    z2=PCLMUL(x2, y2, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y2, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x2, y3, 0);\
-+    (z2)=PCLMUL(x2, y2, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x2, y3, 1);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y3, 1);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z6^=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z6)^=PXOR_(res1,z2);\
- \
-     /* X^512 */\
--    res1=PCLMUL(x3,y3,0);\
--    res2=PRSHIFT64(sum);\
--    z7^=PXOR_(res1,res2);
-+    (res1)=PCLMUL(x3,y3,0);\
-+    (res2)=PRSHIFT64(sum);\
-+    (z7)^=PXOR_(res1,res2);
- 
- 
- /* Karatsuba: 2 mul192, 1 mul128, 9 other instructions */
-@@ -3238,11 +3238,11 @@
-     PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\
-     PCLMUL192_WS_GF2X(z3,z4,z5,x2,x3,y2,y3,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x2);\
--    y1m=PXOR_(y1,y2);\
-+    (x1m)=PXOR_(x1,x2);\
-+    (y1m)=PXOR_(y1,y2);\
- \
-     PXOR1_2(z3,z2);\
--    z2=PXOR_(z3,z1);\
-+    (z2)=PXOR_(z3,z1);\
-     PXOR1_2(z3,z4);\
-     PXOR1_2(z4,z5);\
- \
-@@ -3261,11 +3261,11 @@
-     PCLMUL128_WS_GF2X(z6,z7,x1,y1,sum,res1,res2);\
-     PCLMUL192_WS_GF2X(R1,R2,R3,x2,x3,y2,y3,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x2);\
--    y1m=PXOR_(y1,y2);\
-+    (x1m)=PXOR_(x1,x2);\
-+    (y1m)=PXOR_(y1,y2);\
- \
-     PXOR1_2(R1,z7);\
--    z7=PXOR_(R1,z6);\
-+    (z7)=PXOR_(R1,z6);\
-     PXOR1_2(R1,R2);\
-     PXOR1_2(R2,R3);\
- \
-@@ -3286,104 +3286,104 @@
- #define PCLMUL384_WS_CLAS_GF2X(z3,z4,z5,z6,z1,z2,x1,x2,x3,y1,y2,y3,\
-                                sum,res1,res2)\
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 0x10);\
--    z2=PCLMUL(x1, y3, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y2, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 1);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y3, 0x10);\
-+    (z2)=PCLMUL(x1, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 1);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z5=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z5)=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x1, y3, 0x11);\
--    z2=PCLMUL(x2, y3, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x11);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x3, y2, 0);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x3, y1, 0x11);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x1, y3, 0x11);\
-+    (z2)=PCLMUL(x2, y3, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x11);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x3, y2, 0);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x11);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x2, y3, 0x10);\
--    z2=PCLMUL(x2, y3, 1);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x3, y2, 1);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y3, 0x10);\
-+    (z2)=PCLMUL(x2, y3, 1);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x3, y2, 1);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z6=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z6)=PXOR_(res1,z2);\
- \
-     /* X^512 */\
--    z1=PCLMUL(x2, y3, 0x11);\
--    z2=PCLMUL(x3, y3, 0);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y2, 0x11);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y3, 0x11);\
-+    (z2)=PCLMUL(x3, y3, 0);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0x11);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^576 */\
--    z1=PCLMUL(x3, y3, 0x10);\
--    z2=PCLMUL(x3, y3, 1);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y3, 0x10);\
-+    (z2)=PCLMUL(x3, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
- \
-     /* X^640 */\
--    res1=PCLMUL(x3,y3,0x11);\
--    sum=PRSHIFT64(res2);\
--    z2=PXOR_(res1,sum);
-+    (res1)=PCLMUL(x3,y3,0x11);\
-+    (sum)=PRSHIFT64(res2);\
-+    (z2)=PXOR_(res1,sum);
- 
- 
- /* Karatsuba: 3 mul192, 4 PMIDDLE, 18 other instructions */
-@@ -3392,10 +3392,10 @@
-     {__m128i x1m,x2m,y1m,y2m,R1,R2,R3;\
-     PCLMUL192_WS_GF2X(z1,z2,z3,x1,x2,y1,y2,sum,res1,res2);\
- \
--    x1m=PMIDDLE(x2,x3);\
--    x2m=PRSHIFT64(x3);\
--    y1m=PMIDDLE(y2,y3);\
--    y2m=PRSHIFT64(y3);\
-+    (x1m)=PMIDDLE(x2,x3);\
-+    (x2m)=PRSHIFT64(x3);\
-+    (y1m)=PMIDDLE(y2,y3);\
-+    (y2m)=PRSHIFT64(y3);\
- \
-     PCLMUL192_WS_GF2X(z4,z5,z6,x1m,x2m,y1m,y2m,sum,res1,res2);\
- \
-@@ -3419,135 +3419,135 @@
- #define PCLMUL448_WS_CLAS_GF2X(z3,z4,z5,z6,z7,z1,z2,x1,x2,x3,x4,y1,y2,y3,y4,\
-                                sum,res1,res2)\
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z3=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z3)=PXOR_(res1,sum);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z4=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z4)=PXOR_(res1,z2);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 0x10);\
--    z2=PCLMUL(x1, y3, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y2, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 1);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y3, 0x10);\
-+    (z2)=PCLMUL(x1, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 1);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z5=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z5)=PXOR_(res1,z2);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x1, y4, 0);\
--    z2=PCLMUL(x1, y3, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y3, 0);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x2, y2, 0x11);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x3, y2, 0);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x3, y1, 0x11);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x4, y1, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x1, y4, 0);\
-+    (z2)=PCLMUL(x1, y3, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y3, 0);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x2, y2, 0x11);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x3, y1, 0x11);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x4, y1, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x1, y4, 1);\
--    z2=PCLMUL(x2, y3, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x2, y3, 1);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x3, y2, 1);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x4, y1, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y4, 1);\
-+    (z2)=PCLMUL(x2, y3, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x2, y3, 1);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x3, y2, 1);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x4, y1, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z6=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z6)=PXOR_(res1,z2);\
- \
-     /* X^512 */\
--    z1=PCLMUL(x2, y4, 0);\
--    z2=PCLMUL(x2, y3, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y3, 0);\
--    res1=PXOR_(res2,z1);\
--    z2=PCLMUL(x3, y2, 0x11);\
--    res2=PXOR_(res1,z2);\
--    z1=PCLMUL(x4, y2, 0);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y4, 0);\
-+    (z2)=PCLMUL(x2, y3, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y3, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z2)=PCLMUL(x3, y2, 0x11);\
-+    (res2)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x4, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^576 */\
--    z1=PCLMUL(x2, y4, 1);\
--    z2=PCLMUL(x3, y3, 0x10);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y3, 1);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x4, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y4, 1);\
-+    (z2)=PCLMUL(x3, y3, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y3, 1);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x4, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(sum,res2);\
--    z7=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z7)=PXOR_(res1,z2);\
- \
-     /* X^640 */\
--    z1=PCLMUL(x3, y4, 0);\
--    z2=PCLMUL(x3, y3, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x4, y3, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x3, y4, 0);\
-+    (z2)=PCLMUL(x3, y3, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x4, y3, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^704 */\
--    z1=PCLMUL(x3, y4, 1);\
--    z2=PCLMUL(x4, y3, 0x10);\
--    sum=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y4, 1);\
-+    (z2)=PCLMUL(x4, y3, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
- \
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
- \
-     /* X^768 */\
--    res1=PCLMUL(x4,y4,0);\
--    res2=PRSHIFT64(sum);\
--    z2=PXOR_(res1,res2);
-+    (res1)=PCLMUL(x4,y4,0);\
-+    (res2)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,res2);
- 
- 
- /* Karatsuba: 2 mul256, 1 mul192, 9 other instructions */
-@@ -3557,16 +3557,16 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL192_WS_GF2X(z5,z6,z7,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
- \
-     PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);}
- 
-@@ -3578,17 +3578,17 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL256_WS_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
- \
-     PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);}
- 
-@@ -3600,17 +3600,17 @@
-     PCLMUL256_WS_KAR2_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL256_WS_KAR2_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
- \
-     PCLMUL256_ADD_KAR2_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);}
- 
-@@ -3622,18 +3622,18 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL320_WS_GF2X(z5,z6,z7,z8,z9,x3,x4,x5,y3,y4,y5,sum,res1,res2);\
- \
--    x11m=PXOR_(x1,x3);\
--    x22m=PXOR_(x2,x4);\
--    y11m=PXOR_(y1,y3);\
--    y22m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
--    z7=PXOR_(z7,z9);\
-+    (x11m)=PXOR_(x1,x3);\
-+    (x22m)=PXOR_(x2,x4);\
-+    (y11m)=PXOR_(y1,y3);\
-+    (y22m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
-+    (z7)=PXOR_(z7,z9);\
- \
-     PCLMUL320_ADD_GF2X(z3,z4,z5,z6,z7,t1,t2,x11m,x22m,x5,y11m,y22m,y5,\
-                        sum,res1,res2);}
-@@ -3683,54 +3683,54 @@
- 
- /* 1 mul64 */
- #define PCLMUL64_GF2X(C,x,y,z,pos) \
--    z=PCLMUL(x, y, pos);\
-+    (z)=PCLMUL(x, y, pos);\
-     PSTORE128(C,z);
- 
- 
- /* Classical: 4 mul64, 5 other instructions */
- #define PCLMUL128_CLAS_FINAL(FINAL_STORE,C,x,y,z1,z2,sum,res_low,res_high) \
-     /* X^0 */\
--    res_low=PCLMUL(x,y,0);\
-+    (res_low)=PCLMUL(x,y,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x, y, 1);\
--    z2=PCLMUL(x, y, 0x10);\
--    res_high=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x, y, 1);\
-+    (z2)=PCLMUL(x, y, 0x10);\
-+    (res_high)=PXOR_(z1,z2);\
- \
-     /* mid2_low: x^64 ... x^127 */\
--    sum=PLSHIFT64(res_high);\
-+    (sum)=PLSHIFT64(res_high);\
-     /* mid2_low + L */\
--    z1=PXOR_(res_low,sum);\
-+    (z1)=PXOR_(res_low,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    res_low=PCLMUL(x,y,0x11);\
-+    (res_low)=PCLMUL(x,y,0x11);\
- \
-     /* mid2_high: x^128 ... x^191 */\
--    sum=PRSHIFT64(res_high);\
-+    (sum)=PRSHIFT64(res_high);\
-     /* mid2_high + H */\
--    z2=PXOR_(res_low,sum);\
-+    (z2)=PXOR_(res_low,sum);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL96_CLAS_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \
--        PCLMUL128_CLAS_FINAL(PSTOREL(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
-+        PCLMUL128_CLAS_FINAL(PSTOREL((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
- #define PCLMUL128_CLAS_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \
--        PCLMUL128_CLAS_FINAL(PSTORE128(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
-+        PCLMUL128_CLAS_FINAL(PSTORE128((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
- 
- 
- /* Karatsuba: 3 mul64, 10 other instructions */
- #define PCLMUL128_KAR_FINAL(FINAL_STORE,C,x,y,z1,z2,sum,res_low,res_high) \
-     /* X^0 */\
--    z1=PCLMUL(x,y,0);\
-+    (z1)=PCLMUL(x,y,0);\
-     /* X^128 */\
--    z2=PCLMUL(x,y,0x11);\
-+    (z2)=PCLMUL(x,y,0x11);\
- \
--    res_low=PXOR_(x,PRSHIFT64(x));\
--    res_high=PXOR_(y,PRSHIFT64(y));\
-+    (res_low)=PXOR_(x,PRSHIFT64(x));\
-+    (res_high)=PXOR_(y,PRSHIFT64(y));\
- \
-     /* X^64 */\
--    sum=PCLMUL(res_low,res_high,0);\
-+    (sum)=PCLMUL(res_low,res_high,0);\
-     PXOR1_2(sum,z1);\
-     PXOR1_2(sum,z2);\
- \
-@@ -3742,52 +3742,52 @@
- 
- 
- #define PCLMUL96_KAR_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \
--        PCLMUL128_KAR_FINAL(PSTOREL(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
-+        PCLMUL128_KAR_FINAL(PSTOREL((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
- #define PCLMUL128_KAR_GF2X(C,x,y,z1,z2,sum,res_low,res_high) \
--        PCLMUL128_KAR_FINAL(PSTORE128(C+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
-+        PCLMUL128_KAR_FINAL(PSTORE128((C)+2,z2),C,x,y,z1,z2,sum,res_low,res_high)
- 
- 
- /* Classical: 9 mul64, 1 PMIDDLE, 9 other instructions */
- #define PCLMUL192_CLAS_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z1=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z1)=PXOR_(res1,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PCLMUL(x1, y2, 1);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+2,z1);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PCLMUL(x1, y2, 1);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+2,z1);\
- \
-     /* X^256 */\
--    res1=PCLMUL(x2,y2,0);\
--    z1=PRSHIFT64(sum);\
--    z2=PXOR_(res1,z1);\
-+    (res1)=PCLMUL(x2,y2,0);\
-+    (z1)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,z1);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL160_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL192_CLAS_FINAL(PSTOREL(C+4,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
-+        PCLMUL192_CLAS_FINAL(PSTOREL((C)+4,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
- #define PCLMUL192_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL192_CLAS_FINAL(PSTORE128(C+4,z2),C,x1,x2,y1,y2,\
-+        PCLMUL192_CLAS_FINAL(PSTORE128((C)+4,z2),C,x1,x2,y1,y2,\
-                              z1,z2,sum,res1,res2)
- 
- 
-@@ -3795,15 +3795,15 @@
- #define PCLMUL192_KAR_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
-     {__m128i u31,u333;\
-     /* A0*B0 */\
--    z1=PCLMUL(x1,y1,0);\
-+    (z1)=PCLMUL(x1,y1,0);\
-     /* A1*B1 */\
--    z2=PCLMUL(x1,y1,0x11);\
-+    (z2)=PCLMUL(x1,y1,0x11);\
-     /* A2*B2 */\
--    u333=PCLMUL(x2,y2,0);\
-+    (u333)=PCLMUL(x2,y2,0);\
- \
--    res1=PXOR_(z1,z2);\
--    res2=PXOR_(u333,z2);\
--    z2=PXOR_(res1,u333);\
-+    (res1)=PXOR_(z1,z2);\
-+    (res2)=PXOR_(u333,z2);\
-+    (z2)=PXOR_(res1,u333);\
-     /*  C[0] = C0
-         C[1] = C1^(C0^C2)
-         C[2] = C2^(C1^C3)^C0^C4
-@@ -3811,9 +3811,9 @@
-         C[4] = C4^(C5^C3)
-         C[5] = C5 */\
-     /* (A2 A2) */\
--    u31=PSHUFFLE_32_1010(x2);\
-+    (u31)=PSHUFFLE_32_1010(x2);\
-     /* (B2 B2) */\
--    sum=PSHUFFLE_32_1010(y2);\
-+    (sum)=PSHUFFLE_32_1010(y2);\
-     /* (A2 A2) ^ (A0 A1) */\
-     PXOR1_2(u31,x1);\
-     /* (B2 B2) ^ (B0 B1) */\
-@@ -3832,77 +3832,77 @@
- \
-     PSTORE128(C,z1);\
-     FINAL_STORE;\
--    PSTORE128(C+2,z2);}
-+    PSTORE128((C)+2,z2);}
- 
- 
- #define PCLMUL160_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL192_KAR_FINAL(PSTOREL(C+4,u333),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
-+        PCLMUL192_KAR_FINAL(PSTOREL((C)+4,u333),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
- #define PCLMUL192_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL192_KAR_FINAL(PSTORE128(C+4,u333),C,x1,x2,y1,y2,\
-+        PCLMUL192_KAR_FINAL(PSTORE128((C)+4,u333),C,x1,x2,y1,y2,\
-                             z1,z2,sum,res1,res2)
- 
- 
- /* Classical: 16 mul64, 2 PMIDDLE, 15 other instructions */
- #define PCLMUL256_CLAS_FINAL(FINAL_STORE,C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z1=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z1)=PXOR_(res1,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+2,z1);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+2,z1);\
- \
-     /* X^256 */\
--    z1=PCLMUL(y1, x2, 0x11);\
--    z2=PCLMUL(y2, x2, 0);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0x11);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x11);\
-+    (z2)=PCLMUL(y2, x2, 0);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0x11);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+4,z1);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+4,z1);\
- \
-     /* X^384 */\
--    res1=PCLMUL(x2,y2,0x11);\
--    z1=PRSHIFT64(res2);\
--    z2=PXOR_(res1,z1);\
-+    (res1)=PCLMUL(x2,y2,0x11);\
-+    (z1)=PRSHIFT64(res2);\
-+    (z2)=PXOR_(res1,z1);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL224_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL256_CLAS_FINAL(PSTOREL(C+6,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
-+        PCLMUL256_CLAS_FINAL(PSTOREL((C)+6,z2),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
- #define PCLMUL256_CLAS_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL256_CLAS_FINAL(PSTORE128(C+6,z2),C,x1,x2,y1,y2,\
-+        PCLMUL256_CLAS_FINAL(PSTORE128((C)+6,z2),C,x1,x2,y1,y2,\
-                              z1,z2,sum,res1,res2)
- 
- 
-@@ -3912,25 +3912,25 @@
-     PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\
-     PCLMUL128_WS_GF2X(u43,u44,x2,y2,sum,res1,res2);\
- \
--    x=PXOR_(x1,x2);\
--    y=PXOR_(y1,y2);\
-+    (x)=PXOR_(x1,x2);\
-+    (y)=PXOR_(y1,y2);\
- \
-     PXOR1_2(u43,z2);\
--    z2=PXOR_(u43,z1);\
-+    (z2)=PXOR_(u43,z1);\
-     PXOR1_2(u43,u44);\
- \
-     PCLMUL128_ADD_GF2X(z2,u43,u41,u42,x,y,sum,res1,res2);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,u43);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,u43);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL224_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL256_KAR_FINAL(PSTOREL(C+6,u44),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
-+        PCLMUL256_KAR_FINAL(PSTOREL((C)+6,u44),C,x1,x2,y1,y2,z1,z2,sum,res1,res2)
- #define PCLMUL256_KAR_GF2X(C,x1,x2,y1,y2,z1,z2,sum,res1,res2) \
--        PCLMUL256_KAR_FINAL(PSTORE128(C+6,u44),C,x1,x2,y1,y2,z1,z2,\
-+        PCLMUL256_KAR_FINAL(PSTORE128((C)+6,u44),C,x1,x2,y1,y2,z1,z2,\
-                             sum,res1,res2)
- 
- 
-@@ -3938,89 +3938,89 @@
- #define PCLMUL320_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z1=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z1)=PXOR_(res1,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+2,z1);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+2,z1);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 1);\
--    z2=PCLMUL(x2, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 1);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+4,z1);\
-+    (z1)=PCLMUL(x1, y3, 1);\
-+    (z2)=PCLMUL(x2, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+4,z1);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x2, y3, 0);\
--    z2=PCLMUL(x2, y2, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y2, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x2, y3, 0);\
-+    (z2)=PCLMUL(x2, y2, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x2, y3, 1);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+6,z1);\
-+    (z1)=PCLMUL(x2, y3, 1);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+6,z1);\
- \
-     /* X^512 */\
--    res1=PCLMUL(x3,y3,0);\
--    z1=PRSHIFT64(sum);\
--    z2=PXOR_(res1,z1);\
-+    (res1)=PCLMUL(x3,y3,0);\
-+    (z1)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,z1);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL288_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL320_CLAS_FINAL(PSTOREL(C+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL320_CLAS_FINAL(PSTOREL((C)+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2)
- #define PCLMUL320_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL320_CLAS_FINAL(PSTORE128(C+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL320_CLAS_FINAL(PSTORE128((C)+8,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2)
- 
- 
-@@ -4031,11 +4031,11 @@
-     PCLMUL128_WS_GF2X(z1,z2,x1,y1,sum,res1,res2);\
-     PCLMUL192_WS_GF2X(u53,u54,u55,x2,x3,y2,y3,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x2);\
--    y1m=PXOR_(y1,y2);\
-+    (x1m)=PXOR_(x1,x2);\
-+    (y1m)=PXOR_(y1,y2);\
- \
-     PXOR1_2(u53,z2);\
--    z2=PXOR_(u53,z1);\
-+    (z2)=PXOR_(u53,z1);\
-     PXOR1_2(u53,u54);\
-     PXOR1_2(u54,u55);\
- \
-@@ -4046,17 +4046,17 @@
-     PXOR1_2(u54,R3);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,u53);\
--    PSTORE128(C+6,u54);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,u53);\
-+    PSTORE128((C)+6,u54);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL288_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL320_KAR_FINAL(PSTOREL(C+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL320_KAR_FINAL(PSTOREL((C)+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                             sum,res1,res2)
- #define PCLMUL320_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL320_KAR_FINAL(PSTORE128(C+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL320_KAR_FINAL(PSTORE128((C)+8,u55),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                             sum,res1,res2)
- 
- 
-@@ -4064,117 +4064,117 @@
- #define PCLMUL384_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z1=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z1)=PXOR_(res1,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+2,z1);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+2,z1);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 0x10);\
--    z2=PCLMUL(x1, y3, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y2, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 1);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+4,z1);\
-+    (z1)=PCLMUL(x1, y3, 0x10);\
-+    (z2)=PCLMUL(x1, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+4,z1);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x1, y3, 0x11);\
--    z2=PCLMUL(x2, y3, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x11);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x3, y2, 0);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x3, y1, 0x11);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x1, y3, 0x11);\
-+    (z2)=PCLMUL(x2, y3, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x11);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x3, y2, 0);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x11);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x2, y3, 0x10);\
--    z2=PCLMUL(x2, y3, 1);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x3, y2, 1);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+6,z1);\
-+    (z1)=PCLMUL(x2, y3, 0x10);\
-+    (z2)=PCLMUL(x2, y3, 1);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x3, y2, 1);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+6,z1);\
- \
-     /* X^512 */\
--    z1=PCLMUL(x2, y3, 0x11);\
--    z2=PCLMUL(x3, y3, 0);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y2, 0x11);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y3, 0x11);\
-+    (z2)=PCLMUL(x3, y3, 0);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0x11);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^576 */\
--    z1=PCLMUL(x3, y3, 0x10);\
--    z2=PCLMUL(x3, y3, 1);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+8,z1);\
-+    (z1)=PCLMUL(x3, y3, 0x10);\
-+    (z2)=PCLMUL(x3, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+8,z1);\
- \
-     /* X^640 */\
--    res1=PCLMUL(x3,y3,0x11);\
--    z1=PRSHIFT64(res2);\
--    z2=PXOR_(res1,z1);\
-+    (res1)=PCLMUL(x3,y3,0x11);\
-+    (z1)=PRSHIFT64(res2);\
-+    (z2)=PXOR_(res1,z1);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL352_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL384_CLAS_FINAL(PSTOREL(C+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL384_CLAS_FINAL(PSTOREL((C)+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2)
- #define PCLMUL384_CLAS_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL384_CLAS_FINAL(PSTORE128(C+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL384_CLAS_FINAL(PSTORE128((C)+10,z2),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                              sum,res1,res2)
- 
- 
-@@ -4184,10 +4184,10 @@
-     {__m128i x1m,x2m,y1m,y2m,R1,R2,R3,u63,u64,u65,u66;\
-     PCLMUL192_WS_GF2X(z1,z2,u63,x1,x2,y1,y2,sum,res1,res2);\
- \
--    x1m=PMIDDLE(x2,x3);\
--    x2m=PRSHIFT64(x3);\
--    y1m=PMIDDLE(y2,y3);\
--    y2m=PRSHIFT64(y3);\
-+    (x1m)=PMIDDLE(x2,x3);\
-+    (x2m)=PRSHIFT64(x3);\
-+    (y1m)=PMIDDLE(y2,y3);\
-+    (y2m)=PRSHIFT64(y3);\
- \
-     PCLMUL192_WS_GF2X(u64,u65,u66,x1m,x2m,y1m,y2m,sum,res1,res2);\
- \
-@@ -4207,18 +4207,18 @@
-     PXOR1_2(u64,PMIDDLE(R2,R3));\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,u63);\
--    PSTORE128(C+6,u64);\
--    PSTORE128(C+8,u65);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,u63);\
-+    PSTORE128((C)+6,u64);\
-+    PSTORE128((C)+8,u65);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL352_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL384_KAR_FINAL(PSTOREL(C+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL384_KAR_FINAL(PSTOREL((C)+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                             sum,res1,res2)
- #define PCLMUL384_KAR_GF2X(C,x1,x2,x3,y1,y2,y3,z1,z2,sum,res1,res2) \
--        PCLMUL384_KAR_FINAL(PSTORE128(C+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-+        PCLMUL384_KAR_FINAL(PSTORE128((C)+10,u66),C,x1,x2,x3,y1,y2,y3,z1,z2,\
-                             sum,res1,res2)
- 
- 
-@@ -4226,150 +4226,150 @@
- #define PCLMUL448_CLAS_FINAL(FINAL_STORE,C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,\
-                              sum,res1,res2) \
-     /* X^0 */\
--    res1=PCLMUL(x1,y1,0);\
-+    (res1)=PCLMUL(x1,y1,0);\
- \
-     /* X^64 */\
--    z1=PCLMUL(x1, y1, 1);\
--    z2=PCLMUL(x1, y1, 0x10);\
--    res2=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y1, 1);\
-+    (z2)=PCLMUL(x1, y1, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
- \
--    sum=PLSHIFT64(res2);\
--    z1=PXOR_(res1,sum);\
-+    (sum)=PLSHIFT64(res2);\
-+    (z1)=PXOR_(res1,sum);\
-     PSTORE128(C,z1);\
- \
-     /* X^128 */\
--    z1=PCLMUL(x1, y1, 0x11);\
--    z2=PCLMUL(x2, y1, 0);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x1, y2, 0);\
--    res1=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(x1, y1, 0x11);\
-+    (z2)=PCLMUL(x2, y1, 0);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x1, y2, 0);\
-+    (res1)=PXOR_(sum,z1);\
- \
-     /* X^192 */\
--    z1=PCLMUL(x1, y2, 1);\
--    z2=PCLMUL(x1, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(y1, x2, 1);\
--    z2=PXOR_(sum,z1);\
--    z1=PCLMUL(y1, x2, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+2,z1);\
-+    (z1)=PCLMUL(x1, y2, 1);\
-+    (z2)=PCLMUL(x1, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(y1, x2, 1);\
-+    (z2)=PXOR_(sum,z1);\
-+    (z1)=PCLMUL(y1, x2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+2,z1);\
- \
-     /* X^256 */\
--    z1=PCLMUL(x1, y3, 0);\
--    z2=PCLMUL(x1, y2, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0);\
--    res1=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y1, 0x11);\
--    res2=PXOR_(z1,res1);\
--    z2=PCLMUL(x3, y1, 0);\
--    res1=PXOR_(res2,z2);\
-+    (z1)=PCLMUL(x1, y3, 0);\
-+    (z2)=PCLMUL(x1, y2, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y1, 0x11);\
-+    (res2)=PXOR_(z1,res1);\
-+    (z2)=PCLMUL(x3, y1, 0);\
-+    (res1)=PXOR_(res2,z2);\
- \
-     /* X^320 */\
--    z1=PCLMUL(x1, y3, 0x10);\
--    z2=PCLMUL(x1, y3, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y2, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x2, y2, 1);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y1, 0x10);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x3, y1, 1);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+4,z1);\
-+    (z1)=PCLMUL(x1, y3, 0x10);\
-+    (z2)=PCLMUL(x1, y3, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y2, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y2, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y1, 0x10);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x3, y1, 1);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+4,z1);\
- \
-     /* X^384 */\
--    z1=PCLMUL(x1, y4, 0);\
--    z2=PCLMUL(x1, y3, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x2, y3, 0);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x2, y2, 0x11);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x3, y2, 0);\
--    res1=PXOR_(z1,sum);\
--    z2=PCLMUL(x3, y1, 0x11);\
--    sum=PXOR_(res1,z2);\
--    z1=PCLMUL(x4, y1, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x1, y4, 0);\
-+    (z2)=PCLMUL(x1, y3, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x2, y3, 0);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x2, y2, 0x11);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x3, y2, 0);\
-+    (res1)=PXOR_(z1,sum);\
-+    (z2)=PCLMUL(x3, y1, 0x11);\
-+    (sum)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x4, y1, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^448 */\
--    z1=PCLMUL(x1, y4, 1);\
--    z2=PCLMUL(x2, y3, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x2, y3, 1);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x3, y2, 0x10);\
--    sum=PXOR_(z1,z2);\
--    z2=PCLMUL(x3, y2, 1);\
--    z1=PXOR_(sum,z2);\
--    z2=PCLMUL(x4, y1, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+6,z1);\
-+    (z1)=PCLMUL(x1, y4, 1);\
-+    (z2)=PCLMUL(x2, y3, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x2, y3, 1);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x3, y2, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z2)=PCLMUL(x3, y2, 1);\
-+    (z1)=PXOR_(sum,z2);\
-+    (z2)=PCLMUL(x4, y1, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+6,z1);\
- \
-     /* X^512 */\
--    z1=PCLMUL(x2, y4, 0);\
--    z2=PCLMUL(x2, y3, 0x11);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y3, 0);\
--    res1=PXOR_(res2,z1);\
--    z2=PCLMUL(x3, y2, 0x11);\
--    res2=PXOR_(res1,z2);\
--    z1=PCLMUL(x4, y2, 0);\
--    res1=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x2, y4, 0);\
-+    (z2)=PCLMUL(x2, y3, 0x11);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y3, 0);\
-+    (res1)=PXOR_(res2,z1);\
-+    (z2)=PCLMUL(x3, y2, 0x11);\
-+    (res2)=PXOR_(res1,z2);\
-+    (z1)=PCLMUL(x4, y2, 0);\
-+    (res1)=PXOR_(res2,z1);\
- \
-     /* X^576 */\
--    z1=PCLMUL(x2, y4, 1);\
--    z2=PCLMUL(x3, y3, 0x10);\
--    res2=PXOR_(z1,z2);\
--    z1=PCLMUL(x3, y3, 1);\
--    z2=PXOR_(res2,z1);\
--    z1=PCLMUL(x4, y2, 0x10);\
--    res2=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(sum,res2);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+8,z1);\
-+    (z1)=PCLMUL(x2, y4, 1);\
-+    (z2)=PCLMUL(x3, y3, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x3, y3, 1);\
-+    (z2)=PXOR_(res2,z1);\
-+    (z1)=PCLMUL(x4, y2, 0x10);\
-+    (res2)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(sum,res2);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+8,z1);\
- \
-     /* X^640 */\
--    z1=PCLMUL(x3, y4, 0);\
--    z2=PCLMUL(x3, y3, 0x11);\
--    sum=PXOR_(z1,z2);\
--    z1=PCLMUL(x4, y3, 0);\
--    res1=PXOR_(z1,sum);\
-+    (z1)=PCLMUL(x3, y4, 0);\
-+    (z2)=PCLMUL(x3, y3, 0x11);\
-+    (sum)=PXOR_(z1,z2);\
-+    (z1)=PCLMUL(x4, y3, 0);\
-+    (res1)=PXOR_(z1,sum);\
- \
-     /* X^704 */\
--    z1=PCLMUL(x3, y4, 1);\
--    z2=PCLMUL(x4, y3, 0x10);\
--    sum=PXOR_(z1,z2);\
--\
--    z2=PMIDDLE(res2,sum);\
--    z1=PXOR_(res1,z2);\
--    PSTORE128(C+10,z1);\
-+    (z1)=PCLMUL(x3, y4, 1);\
-+    (z2)=PCLMUL(x4, y3, 0x10);\
-+    (sum)=PXOR_(z1,z2);\
-+\
-+    (z2)=PMIDDLE(res2,sum);\
-+    (z1)=PXOR_(res1,z2);\
-+    PSTORE128((C)+10,z1);\
- \
-     /* X^768 */\
--    res1=PCLMUL(x4,y4,0);\
--    res2=PRSHIFT64(sum);\
--    z2=PXOR_(res1,res2);\
-+    (res1)=PCLMUL(x4,y4,0);\
-+    (res2)=PRSHIFT64(sum);\
-+    (z2)=PXOR_(res1,res2);\
-     FINAL_STORE;
- 
- 
- #define PCLMUL416_CLAS_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL448_CLAS_FINAL(PSTOREL(C+12,z2),C,x1,x2,x3,x4,\
-+        PCLMUL448_CLAS_FINAL(PSTOREL((C)+12,z2),C,x1,x2,x3,x4,\
-                              y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- #define PCLMUL448_CLAS_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL448_CLAS_FINAL(PSTORE128(C+12,z2),C,x1,x2,x3,x4,\
-+        PCLMUL448_CLAS_FINAL(PSTORE128((C)+12,z2),C,x1,x2,x3,x4,\
-                              y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- 
-@@ -4380,34 +4380,34 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL192_WS_GF2X(z5,z6,z7,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
- \
-     PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,z3);\
--    PSTORE128(C+6,z4);\
--    PSTORE128(C+8,z5);\
--    PSTORE128(C+10,z6);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,z3);\
-+    PSTORE128((C)+6,z4);\
-+    PSTORE128((C)+8,z5);\
-+    PSTORE128((C)+10,z6);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL416_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL448_KAR_FINAL(PSTOREL(C+12,z7),C,x1,x2,x3,x4,\
-+        PCLMUL448_KAR_FINAL(PSTOREL((C)+12,z7),C,x1,x2,x3,x4,\
-                             y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- #define PCLMUL448_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL448_KAR_FINAL(PSTORE128(C+12,z7),C,x1,x2,x3,x4,\
-+        PCLMUL448_KAR_FINAL(PSTORE128((C)+12,z7),C,x1,x2,x3,x4,\
-                             y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- 
-@@ -4418,36 +4418,36 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL256_WS_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
- \
-     PCLMUL256_ADD_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,z3);\
--    PSTORE128(C+6,z4);\
--    PSTORE128(C+8,z5);\
--    PSTORE128(C+10,z6);\
--    PSTORE128(C+12,z7);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,z3);\
-+    PSTORE128((C)+6,z4);\
-+    PSTORE128((C)+8,z5);\
-+    PSTORE128((C)+10,z6);\
-+    PSTORE128((C)+12,z7);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL480_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL512_KAR_FINAL(PSTOREL(C+14,z8),C,x1,x2,x3,x4,\
-+        PCLMUL512_KAR_FINAL(PSTOREL((C)+14,z8),C,x1,x2,x3,x4,\
-                             y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- #define PCLMUL512_KAR_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL512_KAR_FINAL(PSTORE128(C+14,z8),C,x1,x2,x3,x4,\
-+        PCLMUL512_KAR_FINAL(PSTORE128((C)+14,z8),C,x1,x2,x3,x4,\
-                             y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- 
-@@ -4458,36 +4458,36 @@
-     PCLMUL256_WS_KAR2_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL256_WS_KAR2_GF2X(z5,z6,z7,z8,x3,x4,y3,y4,sum,res1,res2);\
- \
--    x1m=PXOR_(x1,x3);\
--    x2m=PXOR_(x2,x4);\
--    y1m=PXOR_(y1,y3);\
--    y2m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
-+    (x1m)=PXOR_(x1,x3);\
-+    (x2m)=PXOR_(x2,x4);\
-+    (y1m)=PXOR_(y1,y3);\
-+    (y2m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
- \
-     PCLMUL256_ADD_KAR2_GF2X(z3,z4,z5,z6,t1,t2,x1m,x2m,y1m,y2m,sum,res1,res2);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,z3);\
--    PSTORE128(C+6,z4);\
--    PSTORE128(C+8,z5);\
--    PSTORE128(C+10,z6);\
--    PSTORE128(C+12,z7);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,z3);\
-+    PSTORE128((C)+6,z4);\
-+    PSTORE128((C)+8,z5);\
-+    PSTORE128((C)+10,z6);\
-+    PSTORE128((C)+12,z7);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL480_KAR2_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL512_KAR2_FINAL(PSTOREL(C+14,z8),C,x1,x2,x3,x4,\
-+        PCLMUL512_KAR2_FINAL(PSTOREL((C)+14,z8),C,x1,x2,x3,x4,\
-                              y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- #define PCLMUL512_KAR2_GF2X(C,x1,x2,x3,x4,y1,y2,y3,y4,z1,z2,sum,res1,res2) \
--        PCLMUL512_KAR2_FINAL(PSTORE128(C+14,z8),C,x1,x2,x3,x4,\
-+        PCLMUL512_KAR2_FINAL(PSTORE128((C)+14,z8),C,x1,x2,x3,x4,\
-                              y1,y2,y3,y4,z1,z2,sum,res1,res2)
- 
- 
-@@ -4498,39 +4498,39 @@
-     PCLMUL256_WS_GF2X(z1,z2,z3,z4,x1,x2,y1,y2,sum,res1,res2);\
-     PCLMUL320_WS_GF2X(z5,z6,z7,z8,z9,x3,x4,x5,y3,y4,y5,sum,res1,res2);\
- \
--    x11m=PXOR_(x1,x3);\
--    x22m=PXOR_(x2,x4);\
--    y11m=PXOR_(y1,y3);\
--    y22m=PXOR_(y2,y4);\
--\
--    z5=PXOR_(z3,z5);\
--    z6=PXOR_(z4,z6);\
--    z3=PXOR_(z5,z1);\
--    z4=PXOR_(z6,z2);\
--    z5=PXOR_(z5,z7);\
--    z6=PXOR_(z6,z8);\
--    z7=PXOR_(z7,z9);\
-+    (x11m)=PXOR_(x1,x3);\
-+    (x22m)=PXOR_(x2,x4);\
-+    (y11m)=PXOR_(y1,y3);\
-+    (y22m)=PXOR_(y2,y4);\
-+\
-+    (z5)=PXOR_(z3,z5);\
-+    (z6)=PXOR_(z4,z6);\
-+    (z3)=PXOR_(z5,z1);\
-+    (z4)=PXOR_(z6,z2);\
-+    (z5)=PXOR_(z5,z7);\
-+    (z6)=PXOR_(z6,z8);\
-+    (z7)=PXOR_(z7,z9);\
- \
-     PCLMUL320_ADD_GF2X(z3,z4,z5,z6,z7,t1,t2,x11m,x22m,x5,y11m,y22m,y5,\
-                        sum,res1,res2);\
- \
-     PSTORE128(C,z1);\
--    PSTORE128(C+2,z2);\
--    PSTORE128(C+4,z3);\
--    PSTORE128(C+6,z4);\
--    PSTORE128(C+8,z5);\
--    PSTORE128(C+10,z6);\
--    PSTORE128(C+12,z7);\
--    PSTORE128(C+14,z8);\
-+    PSTORE128((C)+2,z2);\
-+    PSTORE128((C)+4,z3);\
-+    PSTORE128((C)+6,z4);\
-+    PSTORE128((C)+8,z5);\
-+    PSTORE128((C)+10,z6);\
-+    PSTORE128((C)+12,z7);\
-+    PSTORE128((C)+14,z8);\
-     FINAL_STORE;}
- 
- 
- #define PCLMUL544_KAR_GF2X(C,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)\
--        PCLMUL576_KAR_FINAL(PSTOREL(C+16,z9),C,x1,x2,x3,x4,x5,\
-+        PCLMUL576_KAR_FINAL(PSTOREL((C)+16,z9),C,x1,x2,x3,x4,x5,\
-                             y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)
- 
- #define PCLMUL576_KAR_GF2X(C,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)\
--        PCLMUL576_KAR_FINAL(PSTORE128(C+16,z9),C,x1,x2,x3,x4,x5,\
-+        PCLMUL576_KAR_FINAL(PSTORE128((C)+16,z9),C,x1,x2,x3,x4,x5,\
-                             y1,y2,y3,y4,y5,z1,z2,sum,res1,res2)
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h
deleted file mode 100644
index 4db7d2f..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_parameters_HFE.h
+++ /dev/null
@@ -1,13 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/parameters_HFE.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/parameters_HFE.h
-@@ -11,9 +11,6 @@
-     #define GFq 2U
-     #define Log2_q 1
-     /* For HFE, the previous parameter is necessarily 2. */
--
--    /** This type stores an element of GF(q). */
--    typedef unsigned char gf2;
- #endif
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h
deleted file mode 100644
index 92284d6..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_rem_gf2n.h
+++ /dev/null
@@ -1,202 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/rem_gf2n.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/rem_gf2n.h
-@@ -7,7 +7,6 @@
- #include "gf2x.h"
- #include "tools_gf2n.h"
- #include "macro.h"
--#include "rem_gf2x.h"
- #include "rem5_gf2n.h"
- 
- 
-@@ -36,112 +35,84 @@
- 
- 
- /* Automatic choice of REM_GF2N */
--
--#if(NB_WORD_GFqn==1)
--    #ifdef __TRINOMIAL_GF2N__
--        /* Example: REM64_TRINOMIAL_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n) */
--        #if(K3==1)
--            #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\
--_TRINOMIAL_K31_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n))
--        #else
--            #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\
--_TRINOMIAL_GF2X(P,Pol,HFEn,K3mod64,Q,R,MASK_GF2n))
--        #endif
--    #endif
--
--    #ifdef __PENTANOMIAL_GF2N__
--        #if (HFEn!=64)
--            /* Example: REM64_PENTANOMIAL_GF2X(P,Pol,HFEn,K1,K2,K3mod64,
--                                               Q,R,MASK_GF2n) */
--            #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\
--_PENTANOMIAL_GF2X(P,Pol,HFEn,K1,K2,K3mod64,Q,R,MASK_GF2n))
--        #else
--            /* HFEn == 64 */
--            #define REM_GF2N(P,Pol,Q,R) \
--                REM64_PENTANOMIAL_K64_GF2X(P,Pol,64,K1,K2,K3mod64,R)
--        #endif
--    #endif
--
--#elif(NB_WORD_GFqn==2)
--    #if (HFEn<97)
--        #ifdef __TRINOMIAL_GF2N__
--            #define REM_GF2N(P,Pol,Q,R) REM96_TRINOMIAL_GF2X(P,Pol,\
--K3mod64,KI,Q,R,MASK_GF2n)
--        #endif
--
--        #ifdef __PENTANOMIAL_GF2N__
--            #define REM_GF2N(P,Pol,Q,R) REM96_PENTANOMIAL_GF2X(P,Pol,\
--K1,K2,K3mod64,KI,Q,R,MASK_GF2n)
--        #endif
--    #else
--        #ifdef __TRINOMIAL_GF2N__
--            #define REM_GF2N(P,Pol,Q,R) REM128_TRINOMIAL_GF2X(P,Pol,\
--K3mod64,KI,KI64,Q,R,MASK_GF2n)
--        #endif
--
--        #ifdef __PENTANOMIAL_GF2N__
--            #if (HFEnr)
--                #define REM_GF2N(P,Pol,Q,R) REM128_PENTANOMIAL_GF2X(P,Pol,\
--K1,K2,K3mod64,KI,KI64,Q,R,MASK_GF2n)
--            #else
--                /* HFEn == 128 */
--                #define REM_GF2N(P,Pol,Q,R) \
--                    REM128_PENTANOMIAL_K128_GF2X(P,Pol,K1,K2,K3mod64,R)
--            #endif
--        #endif
--    #endif
--
--#else
--    #ifdef __TRINOMIAL_GF2N__
--        #if ((HFEn>256)&&(HFEn<289)&&(K3>32)&&(K3<64))
--            #define REM_GF2N(P,Pol,Q,R) REM288_SPECIALIZED_TRINOMIAL_GF2X(P,Pol\
--,K3,KI,KI64,K364,Q,R,MASK_GF2n)
--        #elif (HFEn==313)
--            #define REM_GF2N(P,Pol,Q,R) REM320_SPECIALIZED_TRINOMIAL_GF2X(\
--P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n)
--        #elif (HFEn==354)
--            #define REM_GF2N(P,Pol,Q,R) REM384_SPECIALIZED_TRINOMIAL_GF2X(\
--P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n)
--        #elif (HFEn==358)
--            #define REM_GF2N(P,Pol,Q,R) REM384_SPECIALIZED358_TRINOMIAL_GF2X(\
--P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n)
--        #elif (HFEn==402)
--            #define REM_GF2N(P,Pol,Q,R) REM402_SPECIALIZED_TRINOMIAL_GF2X(\
--P,Pol,K3mod64,KI,KI64,K364,Q,R,MASK_GF2n)
--        #else
--            /* Example: REM192_TRINOMIAL_GF2X(P,Pol,K3mod64,KI,KI64,K364mod64,
--                                              Q,R,MASK_GF2n) */
--            #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\
--_TRINOMIAL_GF2X(P,Pol,K3mod64,KI,KI64,K364mod64,Q,R,MASK_GF2n))
--        #endif
--    #endif
--
--    #ifdef __PENTANOMIAL_GF2N__
--        #if ((HFEn==312)&&(K3==128))
--            #define REM_GF2N(P,Pol,Q,R) REM312_PENTANOMIAL_K3_IS_128_GF2X(\
--P,Pol,K1,K2,,KI,KI64,K164,K264,,Q,R,MASK_GF2n)
--        #elif ((HFEn==448)&&(K3==64))
--            #define REM_GF2N(P,Pol,Q,R) REM448_PENTANOMIAL_K448_K3_IS_64_GF2X(\
--P,Pol,K1,K2,,K164,K264,,R)
--        #elif ((HFEn==544)&&(K3==128))
--            #define REM_GF2N(P,Pol,Q,R) REM544_PENTANOMIAL_K3_IS_128_GF2X(\
--P,Pol,K1,K2,,KI,KI64,K164,K264,,Q,R,MASK_GF2n)
--        #elif (HFEnr)
--            /* Example: REM192_PENTANOMIAL_GF2X(P,Pol,K1,K2,K3mod64,KI,KI64,
--                        K164,K264,K364mod64,Q,R,MASK_GF2n) */
--            #define REM_GF2N(P,Pol,Q,R) CONCAT(CONCAT_NB_BITS_MMUL_SUP(REM),\
--_PENTANOMIAL_GF2X(P,Pol,K1,K2,K3mod64,KI,KI64,K164,K264,K364mod64,Q,R,\
--MASK_GF2n))
--        #else
--            /* HFEn == NB_WORD_GFqn*64 */
--            /* Example: REM192_PENTANOMIAL_K192_GF2X(P,Pol,K1,K2,K3mod64,\
--                                                     K164,K264,K364mod64,R) */
--            #define REM_GF2N_TMP CONCAT(CONCAT_NB_BITS_MMUL_SUP(CONCAT(\
--CONCAT_NB_BITS_MMUL_SUP(REM),_PENTANOMIAL_K)),_GF2X)
--            #define REM_GF2N(P,Pol,Q,R) \
--                REM_GF2N_TMP(P,Pol,K1,K2,K3mod64,K164,K264,K364mod64,R)
--        #endif
--    #endif
-+#if (HFEn==174||HFEn==175||HFEn==177)
-+/* Assumes KI >= K3, which it is for {Blue,Red,}GeMSS128 */
-+#define REM_GF2N(P,Pol,Q,R) \
-+    (Q)[0]=((Pol)[2]>>(KI))^((Pol)[3]<<(KI64));\
-+    (Q)[1]=((Pol)[3]>>(KI))^((Pol)[4]<<(KI64));\
-+    (Q)[2]=((Pol)[4]>>(KI))^((Pol)[5]<<(KI64));\
-+    XOR3(P,Pol,Q);\
-+    (P)[0]^=(Q)[0]<<(K3);\
-+    (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3));\
-+    (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3));\
-+    (R)=(Q)[2]>>((KI)-(K3));\
-+    (P)[0]^=(R);\
-+    (P)[0]^=(R)<<(K3);\
-+    (P)[2]&=(MASK_GF2n);
-+
-+#elif (HFEn==265||HFEn==266)
-+/* Assumes KI < K3, which it is for {Blue,Red,}GeMSS192 */
-+#define REM_GF2N(P,Pol,Q,R)\
-+    (Q)[0]=((Pol)[4]>>(KI))^((Pol)[5]<<(KI64));\
-+    (Q)[1]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\
-+    (Q)[2]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\
-+    (Q)[3]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\
-+    (Q)[4]=((Pol)[8]>>(KI));\
-+    XOR5(P,Pol,Q);\
-+    (P)[0]^=(Q)[0]<<(K3);\
-+    (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3));\
-+    (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3));\
-+    (P)[3]^=((Q)[2]>>(K364))^((Q)[3]<<(K3));\
-+    (P)[4]^=((Q)[3]>>(K364))^((Q)[4]<<(K3));\
-+    /* 64-((K364)+(KI)) == ((K3)-(KI)) */\
-+    (R)=((Q)[3]>>((K364)+(KI)))^((Q)[4]<<((K3)-(KI)));\
-+    (P)[0]^=(R);\
-+    (P)[0]^=(R)<<(K3);\
-+    /* This row is the unique difference with REM288_TRINOMIAL_GF2X */\
-+    (P)[1]^=(R)>>(K364);\
-+    (P)[4]&=(MASK_GF2n);
-+
-+#elif (HFEn==354)
-+#define REM_GF2N(P,Pol,Q,R) \
-+    {uint64_t R2;\
-+    (Q)[0]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\
-+    (Q)[1]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\
-+    (Q)[2]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\
-+    (Q)[3]=((Pol)[8]>>(KI))^((Pol)[9]<<(KI64));\
-+    (Q)[4]=((Pol)[9]>>(KI))^((Pol)[10]<<(KI64));\
-+    (Q)[5]=((Pol)[10]>>(KI))^((Pol)[11]<<(KI64));\
-+    XOR6(P,Pol,Q);\
-+    /* 64-((K364)+(KI)) == ((K3mod64)-(KI)) */\
-+    (R)=((Q)[3]>>((K364)+(KI)))^((Q)[4]<<((K3mod64)-(KI)));\
-+    (P)[0]^=(R);\
-+    (R2)=((Q)[4]>>((K364)+(KI)))^((Q)[5]<<((K3mod64)-(KI)));\
-+    (P)[1]^=(R2);\
-+    (P)[1]^=((R)^(Q)[0])<<(K3mod64);\
-+    (P)[2]^=(((R)^(Q)[0])>>(K364))^((R2^(Q)[1])<<(K3mod64));\
-+    (P)[3]^=((R2^(Q)[1])>>(K364))^((Q)[2]<<(K3mod64));\
-+    (P)[4]^=((Q)[2]>>(K364))^((Q)[3]<<(K3mod64));\
-+    (P)[5]^=(Q)[3]>>(K364);\
-+    (P)[5]&=(MASK_GF2n);}
-+
-+#elif (HFEn==358)
-+#define REM_GF2N(P,Pol,Q,R) \
-+    (Q)[0]=((Pol)[5]>>(KI))^((Pol)[6]<<(KI64));\
-+    (Q)[1]=((Pol)[6]>>(KI))^((Pol)[7]<<(KI64));\
-+    (Q)[2]=((Pol)[7]>>(KI))^((Pol)[8]<<(KI64));\
-+    (Q)[3]=((Pol)[8]>>(KI))^((Pol)[9]<<(KI64));\
-+    (Q)[4]=((Pol)[9]>>(KI))^((Pol)[10]<<(KI64));\
-+    (Q)[5]=((Pol)[10]>>(KI))^((Pol)[11]<<(KI64));\
-+    /* 64-((k364)+(KI)) == ((K3mod64)-(KI)) */\
-+    (R)=((Q)[4]>>((K364)+(KI)))^((Q)[5]<<((K3mod64)-(KI)));\
-+    (Q)[0]^=(R);\
-+    XOR6(P,Pol,Q);\
-+    (P)[0]^=(Q)[0]<<(K3mod64);\
-+    (P)[1]^=((Q)[0]>>(K364))^((Q)[1]<<(K3mod64));\
-+    (P)[2]^=((Q)[1]>>(K364))^((Q)[2]<<(K3mod64));\
-+    (P)[3]^=((Q)[2]>>(K364))^((Q)[3]<<(K3mod64));\
-+    (P)[4]^=((Q)[3]>>(K364))^((Q)[4]<<(K3mod64));\
-+    (P)[5]^=((Q)[4]>>(K364));\
-+    (P)[5]&=(MASK_GF2n);
- #endif
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h
deleted file mode 100644
index 5117bb5..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_signHFE.h
+++ /dev/null
@@ -1,23 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/signHFE.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/signHFE.h
-@@ -7,6 +7,7 @@
- #include "gf2nx.h"
- #include "config_HFE.h"
- #include "matrix_gf2.h"
-+#include "sizes_HFE.h"
- #include <stddef.h>
- 
- 
-@@ -30,7 +31,10 @@
-     #endif
- 
-     #if ENABLED_SEED_SK
--        UINT *sk_uncomp;
-+        UINT sk_uncomp[NB_UINT_HFEVPOLY
-+                       +(LTRIANGULAR_NV_SIZE<<1)
-+                       +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t
-+                       +MATRIXnv_SIZE+MATRIXn_SIZE];
-     #endif
- } secret_key_HFE;
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h
deleted file mode 100644
index 0343356..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_simd_intel.h
+++ /dev/null
@@ -1,342 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/simd_intel.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/simd_intel.h
-@@ -16,7 +16,7 @@
-     #define PSHUFFLE_PS(A,B,i) \
-                 ((__m128i)_mm_shuffle_ps((__m128)(A),(__m128)(B),i))
- 
--    #define PMOVE_PS(A,B) ((__m128i)_mm_move_ss((__m128)A,(__m128)B))
-+    #define PMOVE_PS(A,B) ((__m128i)_mm_move_ss((__m128)(A),(__m128)(B)))
- #endif
- 
- 
-@@ -200,67 +200,67 @@
- 
-     #define PINIT192(x1,x2,A) \
-         PINIT128(x1,A);\
--        PINIT64(x2,A+2);
-+        PINIT64(x2,(A)+2);
- 
-     #define PINIT256(x1,x2,A) \
-         PINIT128(x1,A);\
--        PINIT128(x2,A+2);
-+        PINIT128(x2,(A)+2);
- 
-     #define PINIT320(x1,x2,x3,A) \
-         PINIT256(x1,x2,A);\
--        PINIT64(x3,A+4);
-+        PINIT64(x3,(A)+4);
- 
-     #define PINIT384(x1,x2,x3,A) \
-         PINIT256(x1,x2,A);\
--        PINIT128(x3,A+4);
-+        PINIT128(x3,(A)+4);
- 
-     #define PINIT448(x1,x2,x3,x4,A) \
-         PINIT256(x1,x2,A);\
--        PINIT192(x3,x4,A+4);
-+        PINIT192(x3,x4,(A)+4);
- 
-     #define PINIT512(x1,x2,x3,x4,A) \
-         PINIT256(x1,x2,A);\
--        PINIT256(x3,x4,A+4);
-+        PINIT256(x3,x4,(A)+4);
- 
-     #define PINIT576(x1,x2,x3,x4,x5,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT64(x5,A+8);
-+        PINIT64(x5,(A)+8);
- 
-     #define PINIT640(x1,x2,x3,x4,x5,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT128(x5,A+8);
-+        PINIT128(x5,(A)+8);
- 
-     #define PINIT704(x1,x2,x3,x4,x5,x6,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT192(x5,x6,A+8);
-+        PINIT192(x5,x6,(A)+8);
- 
-     #define PINIT768(x1,x2,x3,x4,x5,x6,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT256(x5,x6,A+8);
-+        PINIT256(x5,x6,(A)+8);
- 
-     #define PINIT832(x1,x2,x3,x4,x5,x6,x7,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT320(x5,x6,x7,A+8);
-+        PINIT320(x5,x6,x7,(A)+8);
- 
-     #define PINIT896(x1,x2,x3,x4,x5,x6,x7,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT384(x5,x6,x7,A+8);
-+        PINIT384(x5,x6,x7,(A)+8);
- 
-     #define PINIT960(x1,x2,x3,x4,x5,x6,x7,x8,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT448(x5,x6,x7,x8,A+8);
-+        PINIT448(x5,x6,x7,x8,(A)+8);
- 
-     #define PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A) \
-         PINIT512(x1,x2,x3,x4,A);\
--        PINIT512(x5,x6,x7,x8,A+8);
-+        PINIT512(x5,x6,x7,x8,(A)+8);
- 
-     #define PINIT1088(x1,x2,x3,x4,x5,x6,x7,x8,x9,A) \
-         PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A);\
--        PINIT64(x9,A+16);
-+        PINIT64(x9,(A)+16);
- 
-     #define PINIT1152(x1,x2,x3,x4,x5,x6,x7,x8,x9,A) \
-         PINIT1024(x1,x2,x3,x4,x5,x6,x7,x8,A);\
--        PINIT128(x9,A+16);
-+        PINIT128(x9,(A)+16);
- 
- 
-     /* Store */
-@@ -268,67 +268,67 @@
- 
-     #define PSTORE192(A,x1,x2) \
-         PSTORE128(A,x1);\
--        PSTOREL(A+2,x2);
-+        PSTOREL((A)+2,x2);
- 
-     #define PSTORE256(A,x1,x2) \
-         PSTORE128(A,x1);\
--        PSTORE128(A+2,x2);
-+        PSTORE128((A)+2,x2);
- 
-     #define PSTORE320(A,x1,x2,x3) \
-         PSTORE256(A,x1,x2);\
--        PSTOREL(A+4,x3);
-+        PSTOREL((A)+4,x3);
- 
-     #define PSTORE384(A,x1,x2,x3) \
-         PSTORE256(A,x1,x2);\
--        PSTORE128(A+4,x3);
-+        PSTORE128((A)+4,x3);
- 
-     #define PSTORE448(A,x1,x2,x3,x4) \
-         PSTORE256(A,x1,x2);\
--        PSTORE192(A+4,x3,x4);
-+        PSTORE192((A)+4,x3,x4);
- 
-     #define PSTORE512(A,x1,x2,x3,x4) \
-         PSTORE256(A,x1,x2);\
--        PSTORE256(A+4,x3,x4);
-+        PSTORE256((A)+4,x3,x4);
- 
-     #define PSTORE576(A,x1,x2,x3,x4,x5) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTOREL(A+8,x5);
-+        PSTOREL((A)+8,x5);
- 
-     #define PSTORE640(A,x1,x2,x3,x4,x5) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE128(A+8,x5);
-+        PSTORE128((A)+8,x5);
- 
-     #define PSTORE704(A,x1,x2,x3,x4,x5,x6) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE192(A+8,x5,x6);
-+        PSTORE192((A)+8,x5,x6);
- 
-     #define PSTORE768(A,x1,x2,x3,x4,x5,x6) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE256(A+8,x5,x6);
-+        PSTORE256((A)+8,x5,x6);
- 
-     #define PSTORE832(A,x1,x2,x3,x4,x5,x6,x7) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE320(A+8,x5,x6,x7);
-+        PSTORE320((A)+8,x5,x6,x7);
- 
-     #define PSTORE896(A,x1,x2,x3,x4,x5,x6,x7) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE384(A+8,x5,x6,x7);
-+        PSTORE384((A)+8,x5,x6,x7);
- 
-     #define PSTORE960(A,x1,x2,x3,x4,x5,x6,x7,x8) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE448(A+8,x5,x6,x7,x8);
-+        PSTORE448((A)+8,x5,x6,x7,x8);
- 
-     #define PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8) \
-         PSTORE512(A,x1,x2,x3,x4);\
--        PSTORE512(A+8,x5,x6,x7,x8);
-+        PSTORE512((A)+8,x5,x6,x7,x8);
- 
-     #define PSTORE1088(A,x1,x2,x3,x4,x5,x6,x7,x8,x9) \
-         PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8);\
--        PSTOREL(A+16,x9);
-+        PSTOREL((A)+16,x9);
- 
-     #define PSTORE1152(A,x1,x2,x3,x4,x5,x6,x7,x8,x9) \
-         PSTORE1024(A,x1,x2,x3,x4,x5,x6,x7,x8);\
--        PSTORE128(A+16,x9);
-+        PSTORE128((A)+16,x9);
- 
- 
-     /* Bitwise operator */
-@@ -566,65 +566,65 @@
-         x=PCVT_256(PLOADU(A));
- 
-     #define VPINIT192(x,A) \
--        x=VPINSERT_128(PCVT_256(PLOADU(A)),PLOADL(A+2));
-+        x=VPINSERT_128(PCVT_256(PLOADU(A)),PLOADL((A)+2));
- 
-     #define VPINIT256(x,A) x=VPLOADU(A);
- 
-     #define VPINIT320(x1,x2,A) \
-         VPINIT256(x1,A);\
--        VPINIT64(x2,A+4);
-+        VPINIT64(x2,(A)+4);
- 
-     #define VPINIT384(x1,x2,A) \
-         VPINIT256(x1,A);\
--        VPINIT128(x2,A+4);
-+        VPINIT128(x2,(A)+4);
- 
-     #define VPINIT448(x1,x2,A) \
-         VPINIT256(x1,A);\
--        VPINIT192(x2,A+4);
-+        VPINIT192(x2,(A)+4);
- 
-     #define VPINIT512(x1,x2,A) \
-         VPINIT256(x1,A);\
--        VPINIT256(x2,A+4);
-+        VPINIT256(x2,(A)+4);
- 
-     #define VPINIT576(x1,x2,x3,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT64(x3,A+8);
-+        VPINIT64(x3,(A)+8);
- 
-     #define VPINIT640(x1,x2,x3,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT128(x3,A+8);
-+        VPINIT128(x3,(A)+8);
- 
-     #define VPINIT704(x1,x2,x3,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT192(x3,A+8);
-+        VPINIT192(x3,(A)+8);
- 
-     #define VPINIT768(x1,x2,x3,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT256(x3,A+8);
-+        VPINIT256(x3,(A)+8);
- 
-     #define VPINIT832(x1,x2,x3,x4,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT320(x3,x4,A+8);
-+        VPINIT320(x3,x4,(A)+8);
- 
-     #define VPINIT896(x1,x2,x3,x4,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT384(x3,x4,A+8);
-+        VPINIT384(x3,x4,(A)+8);
- 
-     #define VPINIT960(x1,x2,x3,x4,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT448(x3,x4,A+8);
-+        VPINIT448(x3,x4,(A)+8);
- 
-     #define VPINIT1024(x1,x2,x3,x4,A) \
-         VPINIT512(x1,x2,A);\
--        VPINIT512(x3,x4,A+8);
-+        VPINIT512(x3,x4,(A)+8);
- 
-     #define VPINIT1088(x1,x2,x3,x4,x5,A) \
-         VPINIT1024(x1,x2,x3,x4,A);\
--        VPINIT64(x5,A+16);
-+        VPINIT64(x5,(A)+16);
- 
-     #define VPINIT1152(x1,x2,x3,x4,x5,A) \
-         VPINIT1024(x1,x2,x3,x4,A);\
--        VPINIT128(x5,A+16);
-+        VPINIT128(x5,(A)+16);
- 
- 
-     /* Store */
-@@ -636,65 +636,65 @@
- 
-     #define VPSTORE192(A,x) \
-         PSTOREU(A,VPCVT_128(x));\
--        PSTOREL(A+2,VPEXTRACT128(x));
-+        PSTOREL((A)+2,VPEXTRACT128(x));
- 
-     #define VPSTORE256 VPSTOREU
- 
-     #define VPSTORE320(A,x1,x2) \
-         VPSTORE256(A,x1);\
--        VPSTORE64(A+4,x2);
-+        VPSTORE64((A)+4,x2);
- 
-     #define VPSTORE384(A,x1,x2) \
-         VPSTORE256(A,x1);\
--        VPSTORE128(A+4,x2);
-+        VPSTORE128((A)+4,x2);
- 
-     #define VPSTORE448(A,x1,x2) \
-         VPSTORE256(A,x1);\
--        VPSTORE192(A+4,x2);
-+        VPSTORE192((A)+4,x2);
- 
-     #define VPSTORE512(A,x1,x2) \
-         VPSTORE256(A,x1);\
--        VPSTORE256(A+4,x2);
-+        VPSTORE256((A)+4,x2);
- 
-     #define VPSTORE576(A,x1,x2,x3) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE64(A+8,x3);
-+        VPSTORE64((A)+8,x3);
- 
-     #define VPSTORE640(A,x1,x2,x3) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE128(A+8,x3);
-+        VPSTORE128((A)+8,x3);
- 
-     #define VPSTORE704(A,x1,x2,x3) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE192(A+8,x3);
-+        VPSTORE192((A)+8,x3);
- 
-     #define VPSTORE768(A,x1,x2,x3) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE256(A+8,x3);
-+        VPSTORE256((A)+8,x3);
- 
-     #define VPSTORE832(A,x1,x2,x3,x4) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE320(A+8,x3,x4);
-+        VPSTORE320((A)+8,x3,x4);
- 
-     #define VPSTORE896(A,x1,x2,x3,x4) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE384(A+8,x3,x4);
-+        VPSTORE384((A)+8,x3,x4);
- 
-     #define VPSTORE960(A,x1,x2,x3,x4) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE448(A+8,x3,x4);
-+        VPSTORE448((A)+8,x3,x4);
- 
-     #define VPSTORE1024(A,x1,x2,x3,x4) \
-         VPSTORE512(A,x1,x2);\
--        VPSTORE512(A+8,x3,x4);
-+        VPSTORE512((A)+8,x3,x4);
- 
-     #define VPSTORE1088(A,x1,x2,x3,x4,x5) \
-         VPSTORE1024(A,x1,x2,x3,x4);\
--        VPSTORE64(A+16,x5);
-+        VPSTORE64((A)+16,x5);
- 
-     #define VPSTORE1152(A,x1,x2,x3,x4,x5) \
-         VPSTORE1024(A,x1,x2,x3,x4);\
--        VPSTORE128(A+16,x5);
-+        VPSTORE128((A)+16,x5);
- #endif
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h
deleted file mode 100644
index 1292bde..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2n.h
+++ /dev/null
@@ -1,20 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2n.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2n.h
-@@ -92,14 +92,8 @@
- 
- 
- /* Function sqr in GF(2^x), then modular reduction */
--#define SQR_THEN_REM_GF2N void \
--            PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                     const uint64_t A[NB_WORD_GFqn])
--#define SQR_NOCST_THEN_REM_GF2N void \
--            PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                           const uint64_t A[NB_WORD_GFqn])
--SQR_THEN_REM_GF2N;
--SQR_NOCST_THEN_REM_GF2N;
-+void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]);
-+void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]);
- #define sqr_then_rem_gf2n PREFIX_NAME(sqr_then_rem_gf2n)
- #define sqr_nocst_then_rem_gf2n PREFIX_NAME(sqr_nocst_then_rem_gf2n)
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h
deleted file mode 100644
index ff71b18..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_sqr_gf2x.h
+++ /dev/null
@@ -1,372 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2x.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/sqr_gf2x.h
-@@ -66,7 +66,7 @@
-     SQR64_NO_SIMD_GF2X(C,(A)[0]);
- 
- #define SQR128_NO_SIMD_GF2X(C,A) \
--    SQR64_NO_SIMD_GF2X(C+2,(A)[1]);\
-+    SQR64_NO_SIMD_GF2X((C)+2,(A)[1]);\
-     SQR64_NO_SIMD_GF2X(C,(A)[0]);
- 
- #define SQR160_NO_SIMD_GF2X(C,A) \
-@@ -74,15 +74,15 @@
-     SQR128_NO_SIMD_GF2X(C,A);
- 
- #define SQR192_NO_SIMD_GF2X(C,A) \
--    SQR64_NO_SIMD_GF2X(C+4,(A)[2]);\
-+    SQR64_NO_SIMD_GF2X((C)+4,(A)[2]);\
-     SQR128_NO_SIMD_GF2X(C,A);
- 
- #define SQR224_NO_SIMD_GF2X(C,A) \
--    SQR96_NO_SIMD_GF2X(C+4,A+2);\
-+    SQR96_NO_SIMD_GF2X((C)+4,(A)+2);\
-     SQR128_NO_SIMD_GF2X(C,A);
- 
- #define SQR256_NO_SIMD_GF2X(C,A) \
--    SQR128_NO_SIMD_GF2X(C+4,A+2);\
-+    SQR128_NO_SIMD_GF2X((C)+4,(A)+2);\
-     SQR128_NO_SIMD_GF2X(C,A);
- 
- #define SQR288_NO_SIMD_GF2X(C,A) \
-@@ -90,31 +90,31 @@
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR320_NO_SIMD_GF2X(C,A) \
--    SQR64_NO_SIMD_GF2X(C+8,(A)[4]);\
-+    SQR64_NO_SIMD_GF2X((C)+8,(A)[4]);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR352_NO_SIMD_GF2X(C,A) \
--    SQR96_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR96_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR384_NO_SIMD_GF2X(C,A) \
--    SQR128_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR128_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR416_NO_SIMD_GF2X(C,A) \
--    SQR160_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR160_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR448_NO_SIMD_GF2X(C,A) \
--    SQR192_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR192_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR480_NO_SIMD_GF2X(C,A) \
--    SQR224_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR224_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR512_NO_SIMD_GF2X(C,A) \
--    SQR256_NO_SIMD_GF2X(C+8,A+4);\
-+    SQR256_NO_SIMD_GF2X((C)+8,(A)+4);\
-     SQR256_NO_SIMD_GF2X(C,A);
- 
- #define SQR544_NO_SIMD_GF2X(C,A) \
-@@ -122,7 +122,7 @@
-     SQR512_NO_SIMD_GF2X(C,A);
- 
- #define SQR576_NO_SIMD_GF2X(C,A) \
--    SQR64_NO_SIMD_GF2X(C+16,(A)[8]);\
-+    SQR64_NO_SIMD_GF2X((C)+16,(A)[8]);\
-     SQR512_NO_SIMD_GF2X(C,A);
- 
- 
-@@ -177,25 +177,25 @@
- 
- /* 11 instructions */
- #define PSQR64_LO_GF2X(C1,A1,RA,R1) \
--    R1=PUNPACKLO_8(A1,RA);\
--    C1=PAND_(R1,PSET1_8(0xF));\
--    R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
--    C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
-+    (R1)=PUNPACKLO_8(A1,RA);\
-+    (C1)=PAND_(R1,PSET1_8(0xF));\
-+    (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
-+    (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
- 
- /* 11 instructions */
- #define PSQR64_HI_GF2X(C1,A1,RA,R1) \
--    R1=PUNPACKHI_8(A1,RA);\
--    C1=PAND_(R1,PSET1_8(0xF));\
--    R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
--    C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
-+    (R1)=PUNPACKHI_8(A1,RA);\
-+    (C1)=PAND_(R1,PSET1_8(0xF));\
-+    (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
-+    (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
- 
- /* 12 instructions */
- #define PSQR64_GF2X(C1,A1,RA,R1) \
-     RA=PSRLI_16(A1,4);\
--    R1=PUNPACKLO_8(A1,RA);\
--    C1=PAND_(R1,PSET1_8(0xF));\
--    R1=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
--    C1=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
-+    (R1)=PUNPACKLO_8(A1,RA);\
-+    (C1)=PAND_(R1,PSET1_8(0xF));\
-+    (R1)=PAND_(PXOR_(C1,PSLLI_16(C1,2)),PSET1_8(0x33));\
-+    (C1)=PAND_(PXOR_(R1,PSLLI_16(R1,1)),PSET1_8(0x55));
- 
- #define PSQR128_GF2X(C1,C2,A1,RA,R1) \
-     RA=PSRLI_16(A1,4);\
-@@ -282,53 +282,53 @@
- 
- 
- #define PSQR_INIT_SHUFFLE_GF2X(M,T) \
--    M=PSET1_8(0x0F);\
--    T=PSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100);
-+    (M)=PSET1_8(0x0F);\
-+    (T)=PSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100);
- 
- /* 6 instructions */
- #define PSQR64_SHUFFLE_V1_GF2X(E0,A128,M,T) \
-     {__m128i C0,C1,D0,D1;\
--    C0=PAND_(A128,M);\
--    C1=PAND_(PSRLI_16(A128,4),M);\
-+    (C0)=PAND_(A128,M);\
-+    (C1)=PAND_(PSRLI_16(A128,4),M);\
- \
--    D0=PSHUFFLE_8(T,C0);\
--    D1=PSHUFFLE_8(T,C1);\
-+    (D0)=PSHUFFLE_8(T,C0);\
-+    (D1)=PSHUFFLE_8(T,C1);\
- \
--    E0=PUNPACKLO_8(D0,D1);}
-+    (E0)=PUNPACKLO_8(D0,D1);}
- 
- /* 4 instructions, faster than PSQR64_SHUFFLE_V1_GF2X */
- #define PSQR64_SHUFFLE_GF2X(E0,A128,M,T) \
-     {__m128i C0,D0;\
--    E0=PSRLI_16(A128,4);\
--    C0=PUNPACKLO_8(A128,E0);\
--    D0=PAND_(C0,M);\
--    E0=PSHUFFLE_8(T,D0);}
-+    (E0)=PSRLI_16(A128,4);\
-+    (C0)=PUNPACKLO_8(A128,E0);\
-+    (D0)=PAND_(C0,M);\
-+    (E0)=PSHUFFLE_8(T,D0);}
- 
- /* 7 instructions */
- #define PSQR128_SHUFFLE_V1_GF2X(E0,E1,A128,M,T) \
-     {__m128i C0,C1,D0,D1;\
--    E0=PSRLI_16(A128,4);\
-+    (E0)=PSRLI_16(A128,4);\
- \
--    C0=PUNPACKLO_8(A128,E0);\
--    C1=PUNPACKHI_8(A128,E0);\
-+    (C0)=PUNPACKLO_8(A128,E0);\
-+    (C1)=PUNPACKHI_8(A128,E0);\
- \
--    D0=PAND_(C0,M);\
--    D1=PAND_(C1,M);\
-+    (D0)=PAND_(C0,M);\
-+    (D1)=PAND_(C1,M);\
- \
--    E0=PSHUFFLE_8(T,D0);\
--    E1=PSHUFFLE_8(T,D1);}
-+    (E0)=PSHUFFLE_8(T,D0);\
-+    (E1)=PSHUFFLE_8(T,D1);}
- 
- /* 7 instructions, faster than PSQR128_SHUFFLE_V1_GF2X */
- #define PSQR128_SHUFFLE_GF2X(E0,E1,A128,M,T) \
-     {__m128i C0,C1,D0,D1;\
--    C0=PAND_(A128,M);\
--    C1=PAND_(PSRLI_16(A128,4),M);\
-+    (C0)=PAND_(A128,M);\
-+    (C1)=PAND_(PSRLI_16(A128,4),M);\
- \
--    D0=PSHUFFLE_8(T,C0);\
--    D1=PSHUFFLE_8(T,C1);\
-+    (D0)=PSHUFFLE_8(T,C0);\
-+    (D1)=PSHUFFLE_8(T,C1);\
- \
--    E0=PUNPACKLO_8(D0,D1);\
--    E1=PUNPACKHI_8(D0,D1);}
-+    (E0)=PUNPACKLO_8(D0,D1);\
-+    (E1)=PUNPACKHI_8(D0,D1);}
- 
- /* General macros */
- #define PSQR192_SHUFFLE_GF2X(E1,E2,E3,A1,A2,M,T) \
-@@ -403,58 +403,58 @@
- 
- 
- #define VPSQR_INIT_SHUFFLE_GF2X(M,T) \
--    M=VPSET1_8(0x0F);\
--    T=VPSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100,\
-+    (M)=VPSET1_8(0x0F);\
-+    (T)=VPSET_64((uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100,\
-                (uint64_t)0x5554515045444140,(uint64_t)0x1514111005040100);
- 
- /* 4 instructions */
- #define VPSQR64_SHUFFLE_GF2X(E0,A256,M,T) \
-     {__m256i B1,C0,D0;\
--    B1=VPSRLI_16(A256,4);\
--    C0=VPUNPACKLO_8(A256,B1);\
--    D0=VPAND_(C0,M);\
--    E0=VPSHUFFLE_8(T,D0);}
-+    (B1)=VPSRLI_16(A256,4);\
-+    (C0)=VPUNPACKLO_8(A256,B1);\
-+    (D0)=VPAND_(C0,M);\
-+    (E0)=VPSHUFFLE_8(T,D0);}
- 
- /* 5 instructions */
- #define VPSQR128_SHUFFLE_GF2X(E0,A256,M,T) \
-     {__m256i B0,B1,C0,D0;\
--    B0=VPPERMUTE4x64(A256,0xD8);\
--    B1=VPSRLI_16(B0,4);\
--    C0=VPUNPACKLO_8(B0,B1);\
--    D0=VPAND_(C0,M);\
--    E0=VPSHUFFLE_8(T,D0);}
-+    (B0)=VPPERMUTE4x64(A256,0xD8);\
-+    (B1)=VPSRLI_16(B0,4);\
-+    (C0)=VPUNPACKLO_8(B0,B1);\
-+    (D0)=VPAND_(C0,M);\
-+    (E0)=VPSHUFFLE_8(T,D0);}
- 
- /* unpack after */
- /* 9 instructions */
- #define VPSQR256_SHUFFLE_V1_GF2X(E0,E1,A256,M,T) \
-     {__m256i B0,B1,C0,C1,D0,D1;\
--    B0=VPAND_(A256,M);\
--    B1=VPAND_(VPSRLI_16(A256,4),M);\
-+    (B0)=VPAND_(A256,M);\
-+    (B1)=VPAND_(VPSRLI_16(A256,4),M);\
- \
--    C0=VPSHUFFLE_8(T,B0);\
--    C1=VPSHUFFLE_8(T,B1);\
-+    (C0)=VPSHUFFLE_8(T,B0);\
-+    (C1)=VPSHUFFLE_8(T,B1);\
- \
--    D0=VPUNPACKLO_8(C0,C1);\
--    D1=VPUNPACKHI_8(C0,C1);\
-+    (D0)=VPUNPACKLO_8(C0,C1);\
-+    (D1)=VPUNPACKHI_8(C0,C1);\
- \
--    E0=VPPERMUTE2x128(D0,D1,0x20);\
--    E1=VPPERMUTE2x128(D0,D1,0x31);}
-+    (E0)=VPPERMUTE2x128(D0,D1,0x20);\
-+    (E1)=VPPERMUTE2x128(D0,D1,0x31);}
- 
- /* unpack before */
- /* 8 instructions, faster than VPSQR256_SHUFFLE_V1_GF2X */
- #define VPSQR256_SHUFFLE_GF2X(E0,E1,A256,M,T) \
-     {__m256i B0,B1,C0,C1,D0,D1;\
--    B0=VPPERMUTE4x64(A256,0xD8);\
--    B1=VPSRLI_16(B0,4);\
-+    (B0)=VPPERMUTE4x64(A256,0xD8);\
-+    (B1)=VPSRLI_16(B0,4);\
- \
--    C0=VPUNPACKLO_8(B0,B1);\
--    C1=VPUNPACKHI_8(B0,B1);\
-+    (C0)=VPUNPACKLO_8(B0,B1);\
-+    (C1)=VPUNPACKHI_8(B0,B1);\
- \
--    D0=VPAND_(C0,M);\
--    D1=VPAND_(C1,M);\
-+    (D0)=VPAND_(C0,M);\
-+    (D1)=VPAND_(C1,M);\
- \
--    E0=VPSHUFFLE_8(T,D0);\
--    E1=VPSHUFFLE_8(T,D1);}
-+    (E0)=VPSHUFFLE_8(T,D0);\
-+    (E1)=VPSHUFFLE_8(T,D1);}
- 
- #define VPSQR192_SHUFFLE_GF2X VPSQR256_SHUFFLE_GF2X
- 
-@@ -615,74 +615,74 @@
- /* 2 pclmul */
- #define SQR96_PCLMUL_GF2X(C,x,z) \
-     SQR64_PCLMUL_GF2X(C,x,z,0);\
--    SQR64LOW_TAB_PCLMUL_GF2X(C+2,x,17);
-+    SQR64LOW_TAB_PCLMUL_GF2X((C)+2,x,17);
- 
- #define SQR128_PCLMUL_GF2X(C,x,z) \
-     SQR64_PCLMUL_GF2X(C,x,z,0);\
--    SQR64_PCLMUL_GF2X(C+2,x,z,17);
-+    SQR64_PCLMUL_GF2X((C)+2,x,z,17);
- 
- /* 3 pclmul */
- #define SQR160_PCLMUL_GF2X(C,x1,x2,z) \
-     SQR128_PCLMUL_GF2X(C,x1,z);\
--    SQR64LOW_TAB_PCLMUL_GF2X(C+4,x2,0);
-+    SQR64LOW_TAB_PCLMUL_GF2X((C)+4,x2,0);
- 
- #define SQR192_PCLMUL_GF2X(C,x1,x2,z) \
-     SQR128_PCLMUL_GF2X(C,x1,z);\
--    SQR64_PCLMUL_GF2X(C+4,x2,z,0);
-+    SQR64_PCLMUL_GF2X((C)+4,x2,z,0);
- 
- /* 4 pclmul */
- #define SQR224_PCLMUL_GF2X(C,x1,x2,z) \
-     SQR128_PCLMUL_GF2X(C,x1,z);\
--    SQR96_PCLMUL_GF2X(C+4,x2,z);
-+    SQR96_PCLMUL_GF2X((C)+4,x2,z);
- 
- #define SQR256_PCLMUL_GF2X(C,x1,x2,z) \
-     SQR128_PCLMUL_GF2X(C,x1,z);\
--    SQR128_PCLMUL_GF2X(C+4,x2,z);
-+    SQR128_PCLMUL_GF2X((C)+4,x2,z);
- 
- /* 5 pclmul */
- #define SQR288_PCLMUL_GF2X(C,x1,x2,x3,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR64LOW_TAB_PCLMUL_GF2X(C+8,x3,0);
-+    SQR64LOW_TAB_PCLMUL_GF2X((C)+8,x3,0);
- 
- #define SQR320_PCLMUL_GF2X(C,x1,x2,x3,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR64_PCLMUL_GF2X(C+8,x3,z,0);
-+    SQR64_PCLMUL_GF2X((C)+8,x3,z,0);
- 
- /* 6 pclmul */
- #define SQR352_PCLMUL_GF2X(C,x1,x2,x3,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR96_PCLMUL_GF2X(C+8,x3,z);
-+    SQR96_PCLMUL_GF2X((C)+8,x3,z);
- 
- #define SQR384_PCLMUL_GF2X(C,x1,x2,x3,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR128_PCLMUL_GF2X(C+8,x3,z);
-+    SQR128_PCLMUL_GF2X((C)+8,x3,z);
- 
- /* 7 pclmul */
- #define SQR416_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR160_PCLMUL_GF2X(C+8,x3,x4,z);
-+    SQR160_PCLMUL_GF2X((C)+8,x3,x4,z);
- 
- #define SQR448_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR192_PCLMUL_GF2X(C+8,x3,x4,z);
-+    SQR192_PCLMUL_GF2X((C)+8,x3,x4,z);
- 
- /* 8 pclmul */
- #define SQR480_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR224_PCLMUL_GF2X(C+8,x3,x4,z);
-+    SQR224_PCLMUL_GF2X((C)+8,x3,x4,z);
- 
- #define SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z) \
-     SQR256_PCLMUL_GF2X(C,x1,x2,z);\
--    SQR256_PCLMUL_GF2X(C+8,x3,x4,z);
-+    SQR256_PCLMUL_GF2X((C)+8,x3,x4,z);
- 
- /* 9 pclmul */
- #define SQR544_PCLMUL_GF2X(C,x1,x2,x3,x4,x5,z) \
-     SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z);\
--    SQR64LOW_TAB_PCLMUL_GF2X(C+16,x5,0);
-+    SQR64LOW_TAB_PCLMUL_GF2X((C)+16,x5,0);
- 
- #define SQR576_PCLMUL_GF2X(C,x1,x2,x3,x4,x5,z) \
-     SQR512_PCLMUL_GF2X(C,x1,x2,x3,x4,z);\
--    SQR64_PCLMUL_GF2X(C+16,x5,z,0);
-+    SQR64_PCLMUL_GF2X((C)+16,x5,z,0);
- 
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h
deleted file mode 100644
index 9a9fd27..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2m.h
+++ /dev/null
@@ -1,42 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2m.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2m.h
-@@ -25,12 +25,13 @@
- /* The number of word that an element of GF(2^m) needs */
- #if (HFEmr)
-     #define NB_WORD_GF2m_TMP (HFEmq+1)
-+    /* Mask to truncate the last word */
-+    #define MASK_GF2m ((UINT_1<<(HFEmr))-UINT_1)
- #else
-     #define NB_WORD_GF2m_TMP HFEmq
-+    #define MASK_GF2m UINT_M1
- #endif
- 
--/* Mask to truncate the last word */
--#define MASK_GF2m maskUINT(HFEmr)
- 
- #define HFEmq8 (HFEm>>3)
- #define HFEmr8 (HFEm&7U)
-@@ -75,19 +76,18 @@
- 
- #define isEqual_gf2m(a,b) f_ISEQUAL(a,b,NB_WORD_GF2m)
- 
-+#define set0_gf2m(c) SET0((unsigned char *)(c),8*NB_WORD_GF2m)
-+#define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1((unsigned char *)(res),(unsigned char *)(a),b,8*NB_WORD_GF2m)
-+
- #if (NB_WORD_GF2m<7)
-     #define add_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_GF2X)
-     #define add2_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_2_GF2X)
-     #define copy_gf2m CONCAT_NB_WORD_GF2m_SUP(COPY)
--    #define set0_gf2m CONCAT_NB_WORD_GF2m_SUP(SET0_)
--    #define xorLoadMask1_gf2m CONCAT_NB_WORD_GF2m_SUP(XORLOADMASK1_)
-     #define dotProduct_gf2_m CONCAT_NB_WORD_GF2m_SUP(DOTPRODUCT)
- #else
-     #define add_gf2m(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2m); 
-     #define add2_gf2m(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2m); 
-     #define copy_gf2m(c,a) COPY(c,a,NB_WORD_GF2m)
--    #define set0_gf2m(c) SET0(c,NB_WORD_GF2m)
--    #define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2m)
-     #define dotProduct_gf2_m(res,a,b) DOTPRODUCT(res,a,b,NB_WORD_GF2m)
- #endif
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h
deleted file mode 100644
index 2103631..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2n.h
+++ /dev/null
@@ -1,52 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2n.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2n.h
-@@ -52,13 +52,13 @@
- /* The number of word that an element of GF(2^n) needs */
- #if (HFEnr)
-     #define NB_WORD_GFqn_TMP (HFEnq+1)
-+    /* Mask for arithmetic in GF(2^n) */
-+    #define MASK_GF2n ((UINT_1<<(HFEnr))-UINT_1)
- #else
-     #define NB_WORD_GFqn_TMP HFEnq
-+    #define MASK_GF2n UINT_M1
- #endif
- 
--/* Mask for arithmetic in GF(2^n) */
--#define MASK_GF2n maskUINT(HFEnr)
--
- #define HFEnr8 (HFEn&7)
- #define MASK8_GF2n ((1U<<HFEnr8)-1)
- /* Number of bytes that an element of GF(2^n) needs */
-@@ -112,16 +112,16 @@
- #define cmp_lt_gf2n(a,b) f_CMP_LT(a,b,NB_WORD_GFqn)
- #define cmp_gt_gf2n(a,b) f_CMP_GT(a,b,NB_WORD_GFqn)
- 
-+#define set0_gf2n(c) SET0((unsigned char *)(c),8*NB_WORD_GFqn)
-+
- #if (NB_WORD_GFqn<7)
-     #define swap_gf2n CONCAT_NB_WORD_GFqn_SUP(SWAP)
-     #define copy_gf2n CONCAT_NB_WORD_GFqn_SUP(COPY)
--    #define set0_gf2n CONCAT_NB_WORD_GFqn_SUP(SET0_)
-     #define set1_gf2n CONCAT_NB_WORD_GFqn_SUP(SET1_)
-     #define xorLoadMask1_gf2n CONCAT_NB_WORD_GFqn_SUP(XORLOADMASK1_)
- #else
-     #define swap_gf2n(a,b) SWAP(XOR_2,a,b,NB_WORD_GFqn)
-     #define copy_gf2n(c,a) COPY(c,a,NB_WORD_GFqn)
--    #define set0_gf2n(c) SET0(c,NB_WORD_GFqn)
-     #define set1_gf2n(c) SET1(c,NB_WORD_GFqn)
-     #define xorLoadMask1_gf2n(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GFqn)
- #endif
-@@ -328,11 +328,7 @@
- 
- 
- 
--#if (NB_WORD_MMUL<7)
--    #define set0_product_gf2n CONCAT_NB_WORD_MMUL(SET0_)
--#else
--    #define set0_product_gf2n(c) SET0(c,NB_WORD_MMUL)
--#endif
-+#define set0_product_gf2n(c) SET0((unsigned char*)(c),8*NB_WORD_MMUL)
- 
- #if (NB_WORD_MMUL==NB_WORD_GFqn)
-     /* Nothing to set to 0 */
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2nv.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2nv.h
deleted file mode 100644
index 25f1152..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2nv.h
+++ /dev/null
@@ -1,40 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2nv.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2nv.h
-@@ -24,13 +24,13 @@
- /* The number of word that an element of GF(2^(n+v)) needs */
- #if (HFEnvr)
-     #define NB_WORD_GF2nv_TMP (HFEnvq+1)
-+    /* Mask for arithmetic in GF(2^(n+v)) */
-+    #define MASK_GF2nv ((UINT_1<<(HFEnvr))-UINT_1)
- #else
-     #define NB_WORD_GF2nv_TMP HFEnvq
-+    #define MASK_GF2nv UINT_M1
- #endif
- 
--/* Mask for arithmetic in GF(2^(n+v)) */
--#define MASK_GF2nv maskUINT(HFEnvr)
--
- #define HFEnvq8 (HFEnv>>3)
- #define HFEnvr8 (HFEnv&7)
- #define MASK8_GF2nv ((1U<<HFEnvr8)-1)
-@@ -79,18 +79,17 @@
- 
- 
- 
-+#define set0_gf2nv(c) SET0((unsigned char *)(c),8*NB_WORD_GF2nv)
- 
- #if (NB_WORD_GF2nv<7)
-     #define add_gf2nv CONCAT(CONCAT_NB_WORD_GF2nv_SUP(ADD),_GF2X)
-     #define add2_gf2nv CONCAT(CONCAT_NB_WORD_GF2nv_SUP(ADD),_2,_GF2X)
-     #define swap_gf2nv CONCAT_NB_WORD_GF2nv_SUP(SWAP)
--    #define set0_gf2nv CONCAT_NB_WORD_GF2nv_SUP(SET0_)
-     #define xorLoadMask1_gf2nv CONCAT_NB_WORD_GF2nv_SUP(XORLOADMASK1_)
- #else
-     #define add_gf2nv(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2nv)
-     #define add2_gf2nv(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2nv)
-     #define swap_gf2nv(a,b) SWAP(XOR_2,a,b,NB_WORD_GF2nv)
--    #define set0_gf2nv(c) SET0(c,NB_WORD_GF2nv)
-     #define xorLoadMask1_gf2nv(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2nv)
- #endif
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2v.h b/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2v.h
deleted file mode 100644
index fe2bcff..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_include_tools_gf2v.h
+++ /dev/null
@@ -1,26 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/include/tools_gf2v.h
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/include/tools_gf2v.h
-@@ -12,8 +12,11 @@
- /* The number of word that an element of GF(2^v) needs */
- #if (HFEvr)
-     #define NB_WORD_GFqv_TMP (HFEvq+1)
-+    /* Mask for arithmetic in GF(2^v) */
-+    #define HFE_MASKv ((UINT_1<<(HFEvr))-UINT_1)
- #else
-     #define NB_WORD_GFqv_TMP HFEvq
-+    #define HFE_MASKv UINT_M1
- #endif
- 
- #if (NB_WORD_GFqv_TMP == 1)
-@@ -24,10 +27,6 @@
-     #define NB_WORD_GFqv NB_WORD_GFqv_TMP
- #endif
- 
--
--/* Mask for arithmetic in GF(2^v) */
--#define HFE_MASKv maskUINT(HFEvr)
--
- #define HFEvr8 (HFEv&7)
- /* Number of bytes that an element of GF(2^(n+v)) needs */
- #define NB_BYTES_GFqv ((HFEv>>3)+((HFEvr8)?1:0))
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
deleted file mode 100644
index b74cbfa..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
+++ /dev/null
@@ -1,89 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c
-@@ -26,16 +26,14 @@
-  */
- int PREFIX_NAME(changeVariablesMQS_simd_gf2)(mqsnv_gf2n MQS, cst_GLnv_gf2 S)
- {
--    UINT tmp[NB_WORD_GFqn];
--    mqsnv_gf2n MQS2, MQS2_cp;
-+    UINT tmp[NB_WORD_GFqn]={0};
-+    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
-+    UINT MQS2[HFEnv*HFEnv*NB_WORD_GFqn]={0};
-+    UINT *MQS2_cp;
-     cst_mqsnv_gf2n MQS_cpi,MQS_cpj;
-     cst_GLnv_gf2 S_cpi,S_cpj;
-     unsigned int i,j;
- 
--    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
--    MQS2=(UINT*)malloc(HFEnv*HFEnv*NB_WORD_GFqn*sizeof(UINT));
--    VERIFY_ALLOC_RET(MQS2);
--
-     /* To avoid the constant of MQS */
-     MQS+=NB_WORD_GFqn;
- 
-@@ -129,8 +127,6 @@
-         S_cpj+=NB_WORD_GF2nv;
-     }
- 
--
--    free(MQS2);
-     return 0;
- }
- 
-@@ -142,7 +138,7 @@
- 
- /* Compute a dot product with one word of S */
- #define LOOPKR(START,NB_IT) \
--    for(kr=START;kr<NB_IT;++kr)\
-+    for(kr=(START);kr<(NB_IT);++kr)\
-     {\
-         /* multiply one bit of S by one element of MQS_cpj */\
-         mask=-(bit_kr&UINT_1); \
-@@ -192,7 +188,7 @@
- 
- /* Loop for a block of rows */
- #define LOOPIR(STARTIR,NB_ITIR,LOOPK) \
--    for(ir=STARTIR;ir<NB_ITIR;++ir)\
-+    for(ir=(STARTIR);ir<(NB_ITIR);++ir)\
-     {\
-         /* Compute a dot product */\
-         LOOPK;\
-@@ -204,7 +200,7 @@
- /* Loop for a block of rows */
- /* Init to 0 the res */
- #define LOOPIR_INIT(STARTIR,NB_ITIR) \
--    for(ir=STARTIR;ir<NB_ITIR;++ir)\
-+    for(ir=(STARTIR);ir<(NB_ITIR);++ir)\
-     {\
-         set0_gf2n(MQS2_cp);\
-         MQS_cpj=MQS_cpi;\
-@@ -236,16 +232,14 @@
-  */
- int PREFIX_NAME(changeVariablesMQS64_gf2)(mqsnv_gf2n MQS, cst_GLnv_gf2 S)
- {
--    mqsnv_gf2n MQS2, MQS2_cp;
-+    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
-+    UINT MQS2[HFEnv*HFEnv*NB_WORD_GFqn]={0};
-+    UINT *MQS2_cp;
-     UINT bit_kr, mask;
-     cst_mqsnv_gf2n MQS_cpi,MQS_cpj;
-     cst_GLnv_gf2 S_cpi,S_cpj;
-     unsigned int iq,ir,j,jq,jr,kq,kr;
- 
--    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
--    MQS2=(UINT*)calloc(HFEnv*HFEnv*NB_WORD_GFqn,sizeof(UINT));
--    VERIFY_ALLOC_RET(MQS2);
--
-     /* To avoid the constant of MQS */
-     MQS_cpi=MQS+NB_WORD_GFqn;
- 
-@@ -401,8 +395,6 @@
-        }
-     #endif
- 
--
--    free(MQS2);
-     return 0;
- }
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
deleted file mode 100644
index 308f31f..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
+++ /dev/null
@@ -1,141 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c
-@@ -29,7 +29,7 @@
-  * @remark  A part of the implementation is not in constant-time.
-  */
- int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root,
--                                     const complete_sparse_monic_gf2nx F,
-+                                     complete_sparse_monic_gf2nx F,
-                                      cst_gf2n U)
- {
-     #if (HFEDeg==1)
-@@ -44,76 +44,74 @@
-             unsigned int j,i,ind=0;
-         #endif
- 
--        vec_gf2n roots;
-+        UINT roots[HFEDeg * NB_WORD_GFqn] = {0};
-         int l;
- 
--        l=findRootsHFE_gf2nx(&roots,F,U);
-+        l=findRootsHFE_gf2nx(roots,F,U);
- 
--        if(!l)
-+        if(l==0)
-         {
-             /* Zero root */
-             return 0;
--        } else
-+        }
-+        if(l==1)
-         {
--            if(l==1)
--            {
--                /* One root */
--                copy_gf2n(root,roots);
--            } else
--            {
--                /* Several roots */
--                #if QUARTZ_ROOT
--                    hash=(UINT*)malloc(l*SIZE_DIGEST_UINT*sizeof(UINT));
--
--                    /* We hash each root */
--                    for(i=0;i<l;++i)
-+            /* One root */
-+            copy_gf2n(root,roots);
-+        }
-+        else
-+        {
-+            /* Several roots */
-+            #if QUARTZ_ROOT
-+                hash=(UINT*)malloc(l*SIZE_DIGEST_UINT*sizeof(UINT));
-+
-+                /* We hash each root */
-+                for(i=0;i<l;++i)
-+                {
-+                    HASH((unsigned char*)(hash+i*SIZE_DIGEST_UINT),
-+                         (unsigned char*)(roots+i*NB_WORD_GFqn),
-+                         NB_BYTES_GFqn);
-+                }
-+
-+                /* We search the smallest hash (seen as an integer) */
-+                for(i=1;i<l;++i)
-+                {
-+                    j=0;
-+                    while((j<SIZE_DIGEST_UINT)&&
-+                          (hash[ind*SIZE_DIGEST_UINT+j]==
-+                           hash[i*SIZE_DIGEST_UINT+j]))
-                     {
--                        HASH((unsigned char*)(hash+i*SIZE_DIGEST_UINT),
--                             (unsigned char*)(roots+i*NB_WORD_GFqn),
--                             NB_BYTES_GFqn);
-+                        ++j;
-                     }
--
--                    /* We search the smallest hash (seen as an integer) */
--                    for(i=1;i<l;++i)
-+                    if((j<SIZE_DIGEST_UINT)&&
-+                       (hash[ind*SIZE_DIGEST_UINT+j]>
-+                        hash[i*SIZE_DIGEST_UINT+j]))
-                     {
--                        j=0;
--                        while((j<SIZE_DIGEST_UINT)&&
--                              (hash[ind*SIZE_DIGEST_UINT+j]==
--                               hash[i*SIZE_DIGEST_UINT+j]))
--                        {
--                            ++j;
--                        }
--                        if((j<SIZE_DIGEST_UINT)&&
--                           (hash[ind*SIZE_DIGEST_UINT+j]>
--                            hash[i*SIZE_DIGEST_UINT+j]))
--                        {
--                            ind=i;
--                        }
-+                        ind=i;
-                     }
-+                }
- 
--                    /* We choose the corresponding root */
--                    copy_gf2n(root,roots+ind*NB_WORD_GFqn);
--
--                    free(hash);
--                #else
-+                /* We choose the corresponding root */
-+                copy_gf2n(root,roots+ind*NB_WORD_GFqn);
- 
--                    /* Sort the roots */
--                    sort_gf2n(roots,l);
-+                free(hash);
-+            #else
- 
--                    #if FIRST_ROOT
--                        /* Choose the first root */
--                        copy_gf2n(root,roots);
--                    #elif DETERMINIST_ROOT
--                        /* Choose a root with a determinist hash */
--                        HASH((unsigned char*)hash,
--                             (unsigned char*)U,NB_BYTES_GFqn);
--                        copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn);
--                    #endif
-+                /* Sort the roots */
-+                sort_gf2n(roots,l);
-+
-+                #if FIRST_ROOT
-+                    /* Choose the first root */
-+                    copy_gf2n(root,roots);
-+                #elif DETERMINIST_ROOT
-+                    /* Choose a root with a determinist hash */
-+                    HASH((unsigned char*)hash,
-+                         (unsigned char*)U,NB_BYTES_GFqn);
-+                    copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn);
-                 #endif
--            }
--            free(roots);
--            return l;
-+            #endif
-         }
-+        return l;
-     #endif
- }
- #endif
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c
deleted file mode 100644
index 18970d1..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQS_gf2.c
+++ /dev/null
@@ -1,40 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/convMQS_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/convMQS_gf2.c
-@@ -108,8 +108,7 @@
-     unsigned int j;
- 
-     #if HFEmr8
--        uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION
--                                             *sizeof(uint8_t));
-+        uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0};
- 
-         convMQS_one_to_last_mr8_equations_gf2(pk_U,pk);
-         for(j=0;j<HFEmr8;++j)
-@@ -118,7 +117,6 @@
-                           pk_U+j*NB_BYTES_EQUATION);
-         }
- 
--        free(pk_U);
-     #endif
- 
-     #if HFEmq8
-@@ -186,8 +184,7 @@
-     unsigned int j;
- 
-     #if HFEmr8
--        uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION
--                                             *sizeof(uint8_t));
-+        uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0};
- 
-         convMQS_one_to_last_mr8_equations_gf2(pk_U,pk);
-     
-@@ -216,8 +213,6 @@
-             pk2_cp+=NB_BYTES_EQUATION;
-             *pk2_cp^=((uint8_t)(val>>(j*HFENr8c)))<<HFENr8;
-         }
--
--        free(pk_U);
-     #endif
- 
-     #if HFEmq8
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQ_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQ_gf2.c
deleted file mode 100644
index 0dbfe3b..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_convMQ_gf2.c
+++ /dev/null
@@ -1,405 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/convMQ_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/convMQ_gf2.c
-@@ -98,7 +98,7 @@
-     /* i == HFEnv */
-     nb_bits=HFEnv;
-     /* For each column */
--    for(j=HFEnv-1;j>=LOST_BITS;--j,++k)
-+    for(j=HFEnv-1;j>=(int)LOST_BITS;--j,++k)
-     {
-         pk2[k>>3]^=((pk[nb_bits>>3]>>(nb_bits&7))&ONE8)<<(k&7);
-         nb_bits+=j;
-@@ -135,10 +135,15 @@
-  */
- UINT PREFIX_NAME(convMQ_last_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk)
- {
--    const uint64_t *pk64;
-     unsigned int iq,ir,k,nb_bits;
-+    uint64_t t1, t2;
-+    const uint8_t *pk64 = pk;
-+    #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-+    const uint8_t *pk_end;
-+    uint64_t end;
-+    unsigned int l;
-+    #endif
- 
--    pk64=(uint64_t*)pk;
- 
-     nb_bits=1;
-     /* For each row */
-@@ -150,29 +155,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -184,16 +194,19 @@
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=(pk64[k]>>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63)));
-+                LOAD_UINT(t1, &pk64[8*k])
-+                LOAD_UINT(t2, &pk64[8*(k+1)])
-+                pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63)));
-             }
-         } else
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
--        pk64+=iq+1;
-+        pk64+=8*(iq+1);
-         pk2+=iq+1;
-         nb_bits+=(iq+1)<<6;
-     }
-@@ -205,29 +218,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -241,14 +259,7 @@
-     #define LAST_ROW_R ((HFEnv-LOST_BITS)&63)
-     iq=LAST_ROW_Q;
- 
--    #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
--        uint8_t *pk_end;
--        uint64_t end;
--        unsigned int l;
--    #endif
--
-     #if LAST_ROW_R
--        ir=LAST_ROW_R;
-         if(nb_bits&63)
-         {
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-@@ -257,15 +268,18 @@
- 
-                 for(k=0;k<NB_WHOLE_BLOCKS;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
-                 #if (NB_WHOLE_BLOCKS<LAST_ROW_Q)
--                    pk2[k]=pk64[k]>>(nb_bits&63);
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1>>(nb_bits&63);
- 
-                     end=0;
--                    pk_end=(uint8_t*)(pk64+k+1);
-+                    pk_end=pk64+8*(k+1);
-                     for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                     {
-                         end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -274,12 +288,13 @@
-                     pk2[k]^=end<<(64-(nb_bits&63));
-                     pk2[k+1]=end>>(nb_bits&63);
-                 #else
--                    pk2[k]=pk64[k]>>(nb_bits&63);
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1>>(nb_bits&63);
- 
--                    if(((nb_bits&63)+ir)>64)
-+                    if(((nb_bits&63)+LAST_ROW_R)>64)
-                     {
-                         end=0;
--                        pk_end=(uint8_t*)(pk64+k+1);
-+                        pk_end=pk64+8*(k+1);
-                         for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                         {
-                             end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -290,14 +305,18 @@
-             #else
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
--                if(((nb_bits&63)+ir)>64)
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-+                if(((nb_bits&63)+LAST_ROW_R)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
-             #endif
-         } else
-@@ -305,11 +324,12 @@
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
- 
-                 end=0;
--                pk_end=(uint8_t*)(pk64+k);
-+                pk_end=pk64+8*k;
-                 for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                 {
-                     end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -318,7 +338,8 @@
-             #else
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             #endif
-         }
-@@ -328,13 +349,16 @@
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-                 for(k=0;k<(iq-1);++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
- 
-                 end=0;
--                pk_end=(uint8_t*)(pk64+k+1);
-+                pk_end=pk64+8*(k+1);
-                 for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                 {
-                     end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -343,15 +367,18 @@
-             #else
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
-             #endif
-         } else
-         {
-             for(k=0;k<iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
-     #endif
-@@ -380,10 +407,11 @@
-  */
- UINT PREFIX_NAME(convMQ_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk)
- {
--    const uint64_t *pk64;
-+    const uint8_t *pk64;
-     unsigned int iq,ir,k,nb_bits;
-+    uint64_t t1, t2;
- 
--    pk64=(uint64_t*)pk;
-+    pk64=pk;
- 
-     nb_bits=1;
-     /* For each row */
-@@ -395,29 +423,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -429,16 +462,19 @@
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=(pk64[k]>>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63)));
-+                LOAD_UINT(t1, &pk64[8*k])
-+                LOAD_UINT(t2, &pk64[8*(k+1)])
-+                pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63)));
-             }
-         } else
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
--        pk64+=iq+1;
-+        pk64+=8*(iq+1);
-         pk2+=iq+1;
-         nb_bits+=(iq+1)<<6;
-     }
-@@ -450,29 +486,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_conv_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_conv_gf2nx.c
deleted file mode 100644
index f7f04dd..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_conv_gf2nx.c
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/conv_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/conv_gf2nx.c
-@@ -41,7 +41,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense,
--                                          const complete_sparse_monic_gf2nx F)
-+                                          complete_sparse_monic_gf2nx F)
- {
-     cst_sparse_monic_gf2nx F_cp=F.poly;
-     gf2nx F_dense_cp=F_dense;
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantn_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantn_gf2.c
deleted file mode 100644
index 7f2284a..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantn_gf2.c
+++ /dev/null
@@ -1,66 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/determinantn_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/determinantn_gf2.c
-@@ -26,7 +26,7 @@
- 
- #define ADDROW(LOOPK) \
-         /* pivot */\
--        pivot=-(((*S_cpj)>>ir)&1);\
-+        pivot=1+~((((*S_cpj)>>ir)&1));\
-         LOOPK;
- 
- 
-@@ -44,7 +44,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -117,7 +117,12 @@
- 
-     /* We know there are 1 on diagonal excepted for the last line */
-     bit_ir&=*S_cpi;
--    return bit_ir?1:0;
-+    if (bit_ir) {
-+      bit_ir = 1;
-+    } else {
-+      bit_ir = 0;
-+    }
-+    return (gf2) bit_ir;
- }
- 
- 
-@@ -133,10 +138,10 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                         LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\
- \
-         /* Here, the pivot is 1 if S is invertible */\
-@@ -158,7 +163,7 @@
-            algorithm. */\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                         LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\
- \
-         /* Next row */\
-@@ -214,7 +219,7 @@
-         det_i&=(*S_cpi)>>ir;
-     #endif
- 
--    return det_i;
-+    return (gf2) det_i;
- }
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
deleted file mode 100644
index 00c7462..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
+++ /dev/null
@@ -1,66 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/determinantnv_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/determinantnv_gf2.c
-@@ -26,7 +26,7 @@
- 
- #define ADDROW(LOOPK) \
-         /* pivot */\
--        pivot=-(((*S_cpj)>>ir)&1);\
-+        pivot=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;
- 
- 
-@@ -44,7 +44,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -117,7 +117,12 @@
- 
-     /* We know there are 1 on diagonal excepted for the last line */
-     bit_ir&=*S_cpi;
--    return bit_ir?1:0;
-+    if (bit_ir) {
-+      bit_ir = 1;
-+    } else {
-+      bit_ir = 0;
-+    }
-+    return (gf2) bit_ir;
- }
- 
- 
-@@ -133,10 +138,10 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                         LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\
- \
-         /* Here, the pivot is 1 if S is invertible */\
-@@ -158,7 +163,7 @@
-            algorithm. */\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                         LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\
- \
-         /* Next row */\
-@@ -214,7 +219,7 @@
-         det_i&=(*S_cpi)>>ir;
-     #endif
- 
--    return det_i;
-+    return (gf2) det_i;
- }
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c
deleted file mode 100644
index e52f791..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_div_gf2nx.c
+++ /dev/null
@@ -1,62 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/div_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/div_gf2nx.c
-@@ -120,8 +120,11 @@
-         }
- 
-         leading_coef=A+da*NB_WORD_GFqn;
--        i=(db<<1)-da;
--        i=MAXI(0,(int)i);
-+        i=0;
-+        if(2*db > da)
-+        {
-+            i = 2*db - da;
-+        }
-         res=A+(da-db+i)*NB_WORD_GFqn;
- 
-         for(;i<db;++i)
-@@ -274,7 +277,7 @@
-  * @remark  This implementation is not in constant-time.
-  */
- unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp,
--                                          const complete_sparse_monic_gf2nx F,
-+                                          complete_sparse_monic_gf2nx F,
-                                           cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -343,7 +346,7 @@
-  * @remark  Constant-time implementation when dp is not secret.
-  */
- void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -385,7 +388,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly,
--                                      const complete_sparse_monic_gf2nx F,
-+                                      complete_sparse_monic_gf2nx F,
-                                       cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -430,7 +433,7 @@
-  * @remark  Constant-time implementation when dp is not secret.
-  */
- void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                           const complete_sparse_monic_gf2nx F,
-+                                           complete_sparse_monic_gf2nx F,
-                                            cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -489,7 +492,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
deleted file mode 100644
index 1634693..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
+++ /dev/null
@@ -1,39 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/evalMQSnocst8_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/evalMQSnocst8_gf2.c
-@@ -35,7 +35,7 @@
-     #define NB_BYTES_EQ NB_EQq
- #endif
- 
--#define MASK_EQ mask64(NB_EQ&63)
-+#define MASK_EQ ((UINT_1<<(NB_EQ&63))-UINT_1)
- #if (NB_EQ&63)
-     #define MASK_64(c) (c)&=MASK_EQ;
-     #define MASK2_64(c,a) (c)=(a)&MASK_EQ;
-@@ -103,7 +103,7 @@
- 
- 
- #define LOOPJR_NOCST_64(START,NB_IT) \
--    for(jr=START;jr<NB_IT;++jr)\
-+    for(jr=(START);jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
-@@ -120,7 +120,7 @@
- #else
- 
- #define LOOPJR_UNROLLED_64(START,NB_IT) \
--    for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-+    for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-     {\
-         for(h=0;h<LEN_UNROLLED_64;++h)\
-         {\
-@@ -132,7 +132,7 @@
-             xj>>=1;\
-         }\
-     }\
--    for(;jr<NB_IT;++jr)\
-+    for(;jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
deleted file mode 100644
index 7538c7b..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
+++ /dev/null
@@ -1,129 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/evalMQSnocst8_quo_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/evalMQSnocst8_quo_gf2.c
-@@ -1,4 +1,4 @@
--#include "evalMQSnocst8_gf2.h"
-+#include "evalMQSnocst8_quo_gf2.h"
- #include "add_gf2x.h"
- #include "init.h"
- #include "simd.h"
-@@ -49,14 +49,15 @@
-     #define NB_BYTES_EQ NB_EQq
- #endif
- 
--#define MASK_EQ mask64(NB_EQ&63)
- #if (NB_EQ&63)
-     #define MASK_64(c) (c)&=MASK_EQ;
-     #define MASK2_64(c,a) (c)=(a)&MASK_EQ;
-+    #define MASK_EQ ((UINT_1<<(NB_EQ&63))-UINT_1)
- #else
-     /* The AND is useless here */
-     #define MASK_64(c)
-     #define MASK2_64(c,a)
-+    #define MASK_EQ UINT_M1
- #endif
- 
- 
-@@ -89,68 +90,45 @@
- 
- #define CONCAT_NB_WORD_EQ_SUP(name) CONCAT(name,NB_WORD_EQ)
- 
--
--
- /*** Tuning ***/
- #define LEN_UNROLLED_64 4
- 
--
--
--
--
--
--
- /* 64-bit version */
--
--#if (NB_WORD_EQ<7)
--    #define XOR_ELEM CONCAT(CONCAT_NB_WORD_EQ_SUP(ADD),_2_GF2X)
--#else
--    #define XOR_ELEM(a,b) ADD_2_GF2X(a,b,NB_WORD_EQ);
--#endif
--
--#if (NB_WORD_EQ<10)
--    #define COPY_64bits_variables CONCAT_NB_WORD_EQ_SUP(COPY)
--#else
--    #define COPY_64bits_variables(c,a) COPY(c,a,NB_WORD_EQ)
--#endif
--
--
-+#define XOR_ELEM(a,b) ADD_2_GF2X((unsigned char *)(a),(unsigned char *)(b),8*NB_WORD_EQ);
- 
- #define LOOPJR_NOCST_64(START,NB_IT) \
--    for(jr=START;jr<NB_IT;++jr)\
-+    for(jr=(START);jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
--            XOR_ELEM(c,(const UINT*)pk);\
-+            XOR_ELEM(c,pk);\
-         }\
-         pk+=NB_BYTES_EQ;\
-         xj>>=1;\
-     }
- 
--
--
- #if (LEN_UNROLLED_64==1)
-     #define LOOPJR_UNROLLED_64 LOOPJR_NOCST_64
- #else
- 
- #define LOOPJR_UNROLLED_64(START,NB_IT) \
--    for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-+    for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-     {\
-         for(h=0;h<LEN_UNROLLED_64;++h)\
-         {\
-             if(xj&UINT_1)\
-             {\
--                XOR_ELEM(c,(const UINT*)pk);\
-+                XOR_ELEM(c,pk);\
-             }\
-             pk+=NB_BYTES_EQ;\
-             xj>>=1;\
-         }\
-     }\
--    for(;jr<NB_IT;++jr)\
-+    for(;jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
--            XOR_ELEM(c,(const UINT*)pk);\
-+            XOR_ELEM(c,pk);\
-         }\
-         pk+=NB_BYTES_EQ;\
-         xj>>=1;\
-@@ -172,7 +150,7 @@
-     #endif
- 
-     /* Constant cst_pk */
--    COPY_64bits_variables(c,(const UINT*)pk);
-+    LOAD_UINT_ARRAY(c, pk, NB_WORD_EQ)
-     pk+=NB_BYTES_EQ;
- 
-     /* for each row of the quadratic matrix of pk, excepted the last block */
-@@ -186,7 +164,7 @@
-                 /* for each column of the quadratic matrix of pk */
- 
-                 /* xj=xi=1 */
--                XOR_ELEM(c,(const UINT*)pk);
-+                XOR_ELEM(c,pk);
-                 pk+=NB_BYTES_EQ;
- 
-                 xj=xi>>1;
-@@ -222,7 +200,7 @@
-                 /* for each column of the quadratic matrix of pk */
- 
-                 /* xj=xi=1 */
--                XOR_ELEM(c,(const UINT*)pk);
-+                XOR_ELEM(c,pk);
-                 pk+=NB_BYTES_EQ;
- 
-                 xj=xi>>1;
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
deleted file mode 100644
index d70638a..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
+++ /dev/null
@@ -1,68 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c
-@@ -109,18 +109,6 @@
- /**************************************************************************/
- 
- 
--#if NB_VARr
--    #define REM_X \
--        xi=m[i];\
--        for(j=0;j<NB_VARr;++j,++k)\
--        {\
--            x[k]=-((xi>>j)&UINT_1);\
--        }
--#else
--    #define REM_X
--#endif
--
--
- 
- 
- /* Input:
-@@ -139,16 +127,18 @@
-     unsigned int i,j,k;
- 
-     /* Compute one time all -((xi>>1)&UINT_1) */
-+    i=0;
-     k=0;
--    for(i=0;i<NB_VARq;++i)
-+    while(k < NB_VAR)
-     {
-         xi=m[i];
--        for(j=0;j<NB_BITS_UINT;++j,++k)
-+        for(j=0; (j<NB_BITS_UINT) && (k<NB_VAR); ++j)
-         {
--            x[k]=-((xi>>j)&UINT_1);
-+              x[k]=(1+~((xi>>j)&UINT_1));
-+              ++k;
-         }
-+        ++i;
-     }
--    REM_X;
- 
-     /* Constant cst_pk */
-     COPY_64bits_variables(c,pk);
-@@ -187,16 +177,18 @@
-     unsigned int i,j,k;
- 
-     /* Compute one time all -((xi>>1)&UINT_1) */
-+    i=0;
-     k=0;
--    for(i=0;i<NB_VARq;++i)
-+    while(k < NB_VAR)
-     {
-         xi=m[i];
--        for(j=0;j<NB_BITS_UINT;++j,++k)
-+        for(j=0; (j<NB_BITS_UINT) && (k<NB_VAR); ++j)
-         {
--            x[k]=-((xi>>j)&UINT_1);
-+              x[k]=(1+~((xi>>j)&UINT_1));
-+              ++k;
-         }
-+        ++i;
-     }
--    REM_X;
- 
-     /* Constant cst_pk */
-     COPY_64bits_variables(c,pk);
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
deleted file mode 100644
index ef785f0..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
+++ /dev/null
@@ -1,74 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c
-@@ -41,7 +41,15 @@
-     i=1;
-     /* (2^i) < deg does not require modular reduction by f */
-     #if(HFEn<33)
--        const unsigned int min=(deg<(1U<<HFEn))?deg:HFEn;
-+        const unsigned int min;
-+        if (deg<(1U<<HFEn))
-+        {
-+          min=deg;
-+        }
-+        else
-+        {
-+          min=HFEn;
-+        }
-         while((1U<<i)<min)
-     #else
-         while((1U<<i)<deg)
-@@ -107,9 +115,12 @@
- void PREFIX_NAME(findRootsSplit_gf2nx)(vec_gf2n roots, gf2nx f,
-                                        unsigned int deg)
- {
--    gf2nx poly_trace,f_cp,tmp_p;
--    gf2nx poly_frob;
--    static_gf2n inv[NB_WORD_GFqn];
-+    UINT poly_frob[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p1[HFEDeg*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *poly_trace=p1;
-+    UINT *f_cp=p2;
-+    static_gf2n inv[NB_WORD_GFqn]={0};
-     unsigned int b,i,l,d;
- 
-     if(deg==1)
-@@ -119,11 +130,6 @@
-         return;
-     }
- 
--    poly_frob=(UINT*)malloc(((deg<<1)-1)*NB_WORD_GFqn*sizeof(UINT));
--    /* poly_trace is modulo f, this degree is strictly less than deg */
--    poly_trace=(UINT*)malloc(deg*NB_WORD_GFqn*sizeof(UINT));
--    /* f_cp a copy of f */
--    f_cp=(UINT*)malloc((deg+1)*NB_WORD_GFqn*sizeof(UINT));
-     do
-     {
-         /* Set poly_frob to zero */
-@@ -151,16 +157,12 @@
-         l=gcd_gf2nx(&b,f_cp,deg,poly_trace,d);
- 
-     } while((!l)||(l==deg));
--    free(poly_frob);
- 
-     if(b)
-     {
--        tmp_p=poly_trace;
--        poly_trace=f_cp;
--        f_cp=tmp_p;
-+        f_cp=poly_trace;
-     }
-     /* Here, f_cp is a non-trivial divisor of degree l */
--    free(poly_trace);
- 
-     /* f_cp is the gcd */
-     /* Here, it becomes monic */
-@@ -180,7 +182,6 @@
-     /* f_cp is monic */
-     /* We can apply findRootsSplit_gf2nx recursively */
-     findRootsSplit_gf2nx(roots,f_cp,l);
--    free(f_cp);
- 
-     /* f is monic and f_cp is monic so Q is monic */
-     /* We can apply findRootsSplit_gf2nx recursively */
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
deleted file mode 100644
index 509dead..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
+++ /dev/null
@@ -1,142 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/findRoots_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/findRoots_gf2nx.c
-@@ -27,33 +27,23 @@
-  * @remark  Requirement: F.L must be initialized with initListDifferences_gf2nx.
-  * @remark  A part of the implementation is not in constant-time.
-  */
--int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots,
--                                    const complete_sparse_monic_gf2nx F,
-+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots,
-+                                    complete_sparse_monic_gf2nx F,
-                                     cst_gf2n U)
- {
-     #if (HFEDeg==1)
--        *roots=(UINT*)malloc(NB_WORD_GFqn*sizeof(UINT));
--        VERIFY_ALLOC_RET(roots);
--        add_gf2n(*roots,F.poly,U);
-+        add_gf2n(roots,F.poly,U);
-         return 1;
-     #else
- 
--    gf2nx tmp_p, poly, poly2;
-+    UINT p1[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *tmp_p;
-+    UINT *poly=p1;
-+    UINT *poly2=p2;
-     unsigned int i,l,d2;
- 
-     d2=HFEDeg;
--    poly=(UINT*)calloc(((HFEDeg<<1)-1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly) 
--    {
--        return ERROR_ALLOC;
--    }
--
--    poly2=(UINT*)calloc((HFEDeg+1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly2) 
--    {
--        free(poly);
--        return ERROR_ALLOC;
--    }
- 
-     /* X^(2^n) - X mod (F-U) */
-     l=best_frobeniusMap_HFE_gf2nx(poly,F,U);
-@@ -78,26 +68,14 @@
-         /* The gcd is a constant (!=0) */
-         /* Irreducible: 0 root */
-         /* l=0; */
--        free(poly);
--        free(poly2);
-         return 0;
--    } else
--    {
--        /* poly2 is the gcd */
--        /* Here, it becomes monic */
--        convMonic_gf2nx(poly2,l);
-     }
--    free(poly);
-+    /* poly2 is the gcd */
-+    /* Here, it becomes monic */
-+    convMonic_gf2nx(poly2,l);
- 
- 
--    *roots=(UINT*)malloc(l*NB_WORD_GFqn*sizeof(UINT));
--    if(!roots) 
--    {
--        free(poly2);
--        return ERROR_ALLOC;
--    }
--    findRootsSplit_gf2nx(*roots,poly2,l);
--    free(poly2);
-+    findRootsSplit_gf2nx(roots,poly2,l);
- 
-     return (int)l;
-     #endif
-@@ -119,7 +97,7 @@
-  * @remark  A part of the implementation is not in constant-time.
-  */
- int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root,
--                                       const complete_sparse_monic_gf2nx F,
-+                                       complete_sparse_monic_gf2nx F,
-                                        cst_gf2n U)
- {
-     #if (HFEDeg==1)
-@@ -127,23 +105,15 @@
-         return 1;
-     #else
- 
--    static_gf2n inv[NB_WORD_GFqn];
--    gf2nx tmp_p, poly, poly2;
-+    static_gf2n inv[NB_WORD_GFqn]={0};
-+    UINT p1[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *tmp_p;
-+    UINT *poly=p1;
-+    UINT *poly2=p2;
-     unsigned int i,l,d2;
- 
-     d2=HFEDeg;
--    poly=(UINT*)calloc(((HFEDeg<<1)-1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly) 
--    {
--        return ERROR_ALLOC;
--    }
--
--    poly2=(UINT*)calloc((HFEDeg+1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly2) 
--    {
--        free(poly);
--        return ERROR_ALLOC;
--    }
- 
-     /* X^(2^n) - X mod (F-U) */
-     l=best_frobeniusMap_HFE_gf2nx(poly,F,U);
-@@ -171,20 +141,14 @@
- 
-         /* else, l roots */
- 
--        free(poly);
--        free(poly2);
-         return 0;
--    } else
--    {
--        /* poly2 is the gcd such that poly2 = a*x+b. */
--        /* The root is b*a^(-1). */
--        inv_gf2n(inv,poly2+NB_WORD_GFqn);
--        mul_gf2n(root,inv,poly2);
--
--        free(poly);
--        free(poly2);
--        return 1;
-     }
-+    /* poly2 is the gcd such that poly2 = a*x+b. */
-+    /* The root is b*a^(-1). */
-+    inv_gf2n(inv,poly2+NB_WORD_GFqn);
-+    mul_gf2n(root,inv,poly2);
-+
-+    return 1;
-     #endif
- }
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
deleted file mode 100644
index 27d75c0..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
+++ /dev/null
@@ -1,84 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/frobeniusMap_gf2nx.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/frobeniusMap_gf2nx.c
-@@ -26,10 +26,10 @@
-  * @remark  Requirement: F.L must be initialized with initListDifferences_gf2nx.
-  * @remark  Constant-time implementation when CONSTANT_TIME!=0.
-  */
--unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const
-+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn,
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U)
- {
--    static_gf2n cst[NB_WORD_GFqn];
-+    static_gf2n cst[NB_WORD_GFqn]={0};
-     #if (HFEDegI==HFEDegJ)
-         cst_sparse_monic_gf2nx F_cp;
-         gf2nx Xqn_cp;
-@@ -155,10 +155,10 @@
-  * @remark  Constant-time implementation.
-  */
- static void precompute_table(vec_gf2nx table,
--                             const complete_sparse_monic_gf2nx F,
-+                             complete_sparse_monic_gf2nx F,
-                              cst_gf2n cst)
- {
--    static_gf2n mul_coef[NB_WORD_GFqn];
-+    static_gf2n mul_coef[NB_WORD_GFqn]={0};
-     gf2n leading_coef;
-     vec_gf2nx table_cp;
-     unsigned int k,j,i;
-@@ -269,11 +269,12 @@
- unsigned int PREFIX_NAME(frobeniusMap_multisqr_HFE_gf2nx)(gf2nx Xqn, const
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U)
- {
--    static_gf2n cst[NB_WORD_GFqn];
--    static_gf2n mul_coef[NB_WORD_GFqn];
--    gf2nx Xqn_cp;
--    vec_gf2nx table,table_cp;
--    gf2nx Xqn_sqr;
-+    static_gf2n cst[NB_WORD_GFqn]={0};
-+    static_gf2n mul_coef[NB_WORD_GFqn]={0};
-+    UINT table[(KX*HFEDeg+POW_II)*NB_WORD_GFqn]={0};
-+    UINT Xqn_sqr[HFEDeg*NB_WORD_GFqn]={0};
-+    UINT *table_cp;
-+    UINT *Xqn_cp;
-     gf2n current_coef;
-     #if CONSTANT_TIME
-         UINT b,mask;
-@@ -284,16 +285,12 @@
-     add_gf2n(cst,F.poly,U);
- 
-     /* Table of the X^(k*2^II) mod F. */
--    table=(UINT*)malloc((KX*HFEDeg+POW_II)*NB_WORD_GFqn*sizeof(UINT));
-     precompute_table(table,F,cst);
- 
-     /* X^(2^(HFEDegI+II)) = X^( (2^HFEDegI) * (2^II)) */
-     /* We take the polynomial from the table */
--    table+=(((ONE32<<HFEDegI)-KP)*HFEDeg)*NB_WORD_GFqn;
--    copy_gf2nx(Xqn,table,HFEDeg,i);
--    table-=(((ONE32<<HFEDegI)-KP)*HFEDeg)*NB_WORD_GFqn;
-+    copy_gf2nx(Xqn,table+((((ONE32<<HFEDegI)-KP)*HFEDeg)*NB_WORD_GFqn),HFEDeg,i);
- 
--    Xqn_sqr=(UINT*)calloc(HFEDeg*NB_WORD_GFqn,sizeof(UINT));
-     for(i=0;i<((HFEn-HFEDegI-II)/II);++i)
-     {
-         /* Step 1: Xqn^(2^II) with II squarings */
-@@ -349,9 +346,6 @@
-         }
-     }
- 
--    free(table);
--    free(Xqn_sqr);
--
-     #if ((HFEn-HFEDegI)%II)
-         #if (!CONSTANT_TIME)
-             d=HFEDeg-1;
-@@ -389,7 +383,7 @@
-             b=isNot0_gf2n(Xqn+i*NB_WORD_GFqn);
-             mask|=b;
-             /* We add 1 to d as soon as we exceed all left zero coefficients */
--            d+=mask;
-+            d+=(unsigned int)mask;
-         }
-     #elif ((HFEn-HFEDegI)%II)
-         if(d==1)
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
deleted file mode 100644
index f07abd9..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
+++ /dev/null
@@ -1,415 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/genSecretMQS_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/genSecretMQS_gf2.c
-@@ -30,7 +30,7 @@
- 
- #if HFEv
-     #define LINEAR_VCASE_REF(a_veci) \
--        a_veci-=(HFEn-1)*NB_WORD_GFqn;\
-+        (a_veci)-=(HFEn-1)*NB_WORD_GFqn;\
-         MQS_cp=MQS+(HFEn+1)*NB_WORD_GFqn;\
-         for(j=0;j<HFEv;++j)\
-         {\
-@@ -47,7 +47,7 @@
-                 add2_gf2n(MQS_cp,tmp1);\
-                 MQS_cp+=NB_WORD_GFqn;\
-             }\
--            a_veci+=NB_WORD_GFqn;\
-+            (a_veci)+=NB_WORD_GFqn;\
-         }\
-         F_cp+=HFEv*NB_WORD_GFqn;
- #else
-@@ -64,7 +64,7 @@
-     for(j=1;j<HFEn;++j)\
-     {\
-         mul_gf2n(lin_cp,F_cp,a_vec);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;
-@@ -80,7 +80,7 @@
-     {\
-         mul_gf2n(tmp1,F_cp,a_vec);\
-         add2_gf2n(lin_cp,tmp1);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;\
-@@ -97,7 +97,7 @@
-     for(j=1;j<HFEn;++j)\
-     {\
-         add2_gf2n(lin_cp,a_vec);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }
- 
-@@ -124,7 +124,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_vec+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_vec)+ja*NB_WORD_GFqn);\
-         mul_gf2n(MQS_cp,tmp1,F_cp);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
-@@ -143,16 +143,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,tmp_i,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_i,(a_vecj)+ja*NB_WORD_GFqn);\
-             copy_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,tmp_j,a_vec+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_j,(a_vec)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_vec+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;
- 
-@@ -175,7 +175,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_veci+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_veci)+ja*NB_WORD_GFqn);\
-         mul_gf2n(tmp_i,tmp1,F_cp);\
-         add2_gf2n(MQS_cp,tmp_i);\
-         MQS_cp+=NB_WORD_GFqn;\
-@@ -196,16 +196,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,tmp_i,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_i,(a_vecj)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,tmp_j,a_veci+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_j,(a_veci)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_veci+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_veci)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     /* Here, a_veci = row i+1 */\
-     /* Here, a_vecj = row j+1 */\
-@@ -229,7 +229,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_veci+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_veci)+ja*NB_WORD_GFqn);\
-         add2_gf2n(MQS_cp,tmp1);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
-@@ -246,16 +246,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,a_veci,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_veci,(a_vecj)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,a_vecj,a_veci+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vecj,(a_veci)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_veci+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_veci)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     /* Here, a_veci = row i+1 */\
-     /* Here, a_vecj = row j+1 */
-@@ -276,7 +276,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(MQS_cp,a_vecj+ja*NB_WORD_GFqn,a_vec+ja*NB_WORD_GFqn);\
-+        add_gf2n(MQS_cp,(a_vecj)+ja*NB_WORD_GFqn,(a_vec)+ja*NB_WORD_GFqn);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
- \
-@@ -291,16 +291,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,a_vec,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vec,(a_vecj)+ja*NB_WORD_GFqn);\
-             copy_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,a_vecj,a_vec+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vecj,(a_vec)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_vec+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }
- 
- 
-@@ -322,6 +322,11 @@
- int PREFIX_NAME(genSecretMQS_gf2_ref)(mqsnv_gf2n MQS, cst_sparse_monic_gf2nx F)
- {
-     /* if there is not quadratic terms X^(2^i + 2^j) */
-+    mqsnv_gf2n MQS_cp;
-+    UINT lin[HFEn*NB_WORD_GFqn]={0};
-+    UINT *lin_cp;
-+    cst_vec_gf2n a_vec;
-+    unsigned int i,j;
-     #if (HFEDeg<3)
-         #if (HFEDeg==2)
-             cst_vec_gf2n a_veci;
-@@ -329,63 +334,41 @@
-         #if ((HFEDeg==2)||(HFEv))
-             cst_sparse_monic_gf2nx F_cp;
-         #endif
--        mqsnv_gf2n MQS_cp;
--        vecn_gf2n lin,lin_cp;
--        cst_vec_gf2n a_vec;
--        unsigned int i,j;
-     #else
--        mqsnv_gf2n MQS_cp;
--        vecn_gf2n lin,lin_cp;
-         static_gf2n tmp1[NB_WORD_GFqn];
-         #if (HFEDeg!=3)
-             static_gf2n tmp_i[NB_WORD_GFqn],tmp_j[NB_WORD_GFqn];
-         #endif
-         cst_sparse_monic_gf2nx F_cp;
--        cst_vec_gf2n a_vec,a_veci,a_vecj;
--        unsigned int i,j,ia,ja;
--    #endif
--
--    #if(HFEDeg<3)
--        /* there are not quadratic terms X^(2^i + 2^j) */
--        for(i=0;i<MQnv_GFqn_SIZE;++i)
--        {
--            MQS[i]=0;
--        }
-+        cst_vec_gf2n a_veci,a_vecj;
-+        unsigned int ia,ja;
-     #endif
- 
-     /* Precompute alpha_vec is disabled in the submission */
-     #if PRECOMPUTED_CBASIS
-         static cst_vec_gf2n alpha_vec=cbasis_h;
-     #else
--        vec_gf2n alpha_vec;
--
-         /* Matrix in GF(2^n) with (HFEDegI+1) rows and HFEn-1 columns */
-         /* calloc is useful when it initialises a multiple precision element
-            to 1 */
-         #if(HFEDegI!=HFEDegJ)
--            alpha_vec=(UINT*)calloc((HFEDegI+1)*(HFEn-1)*NB_WORD_GFqn,
--                                    sizeof(UINT));
-+            UINT alpha_vec[(HFEDegI+1)*(HFEn-1)*NB_WORD_GFqn] = {0};
-         #else
-             /* An additional row for the leading term X^(2^i + 2^j)
-                                                     = X^(2^(i+1)) */
--            alpha_vec=(UINT*)calloc((HFEDegI+2)*(HFEn-1)*NB_WORD_GFqn,
--                                    sizeof(UINT));
-+            UINT alpha_vec[(HFEDegI+2)*(HFEn-1)*NB_WORD_GFqn] = {0};
-         #endif
--        VERIFY_ALLOC_RET(alpha_vec);
- 
-         genCanonicalBasis_gf2n(alpha_vec);
-     #endif
- 
--    /* Temporary linear vector */
--    lin=(UINT*)calloc(HFEn*NB_WORD_GFqn,sizeof(UINT));
--    if(!lin)
--    {
--        #if (!PRECOMPUTED_CBASIS)
--            free(alpha_vec);
--        #endif
--        return ERROR_ALLOC;
--    }
--
-+    #if(HFEDeg<3)
-+        /* there are not quadratic terms X^(2^i + 2^j) */
-+        for(i=0;i<MQnv_GFqn_SIZE;++i)
-+        {
-+            MQS[i]=0;
-+        }
-+    #endif
- 
-     /* Constant : copy the first coefficient of F in MQS */
-     copy_gf2n(MQS,F);
-@@ -482,16 +465,13 @@
-     /* The current term is X^(q^i + q^j) */
-     for(i=2;i<HFEDegI;++i)
-     {
--        /* Here a_vec = row i */
-+        j = 0;
-         #if ENABLED_REMOVE_ODD_DEGREE
--        j=(((ONE32<<i)+ONE32)<=HFE_odd_degree)?0:1;
-+        if (((ONE32<<i)+ONE32) > HFE_odd_degree) j=1;
-+        #endif
-+        /* Here a_vec = row i */
-         a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
-         for(;j<i;++j)
--        #else
--        /* Here a_vec = row i */
--        a_vecj=alpha_vec;
--        for(j=0;j<i;++j)
--        #endif
-         {
-             a_veci=a_vec;
-             QUADRATIC_CASE_REF(a_veci,a_vecj);
-@@ -509,19 +489,19 @@
-     /* The current term is X^(q^HFEDegi + q^j) */
- 
-     /* Here a_vec = row i */
-+    j = 0;
-     #if ENABLED_REMOVE_ODD_DEGREE
--    j=(((ONE32<<i)+ONE32)<=HFE_odd_degree)?0:1;
--    a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
--    for(;j<HFEDegJ;++j)
--    #else
--    /* Here a_vec = row i */
--    a_vecj=alpha_vec;
--    for(j=0;j<HFEDegJ;++j)
-+    if (((ONE32<<i)+ONE32) > HFE_odd_degree) j=1;
-     #endif
-+    /* Here a_vec = row i */
-+    a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
-+    #if HFEDegJ
-+    for(; j<HFEDegJ;++j)
-     {
-         a_veci=a_vec;
-         QUADRATIC_CASE_REF(a_veci,a_vecj);
-     }
-+    #endif
-     /* Here a_veci = row i+1 */
- 
-     /* j=HFEDegJ */
-@@ -539,10 +519,6 @@
-     #endif
-     #endif
- 
--    #if (!PRECOMPUTED_CBASIS)
--        free(alpha_vec);
--    #endif
--
-     /* Put linear part on "diagonal" of MQS */
-     lin_cp=lin;
-     MQS_cp=MQS+NB_WORD_GFqn;
-@@ -553,13 +529,11 @@
-         MQS_cp+=i*NB_WORD_GFqn;
-     }
- 
--    free(lin);
-     return 0;
- }
- 
- 
- 
--
- /*****************************************************************************/
- /*****************************************************************************/
- /********************************* OPTIMIZED *********************************/
-@@ -604,8 +578,6 @@
-         /* F begins to X^3, the first "quadratic" term */
-         F+=(NB_WORD_GFqn*(HFEv+1))<<1;
- 
--        F_cp=F;
--
-         /* X^3 */
-         #if ((!ENABLED_REMOVE_ODD_DEGREE)||(1<=LOG_odd_degree))
-             copy_gf2n(buf,F);
-@@ -656,15 +628,17 @@
-         #if(HFEDegI!=HFEDegJ)
-             /* Monic case */
-             set1_gf2n(buf);
--            #if ((!ENABLED_REMOVE_ODD_DEGREE)||(LOG_odd_degree>=HFEDegI))
--            for(j=0;j<HFEDegJ;++j)
--            #else
--            for(j=1;j<HFEDegJ;++j)
-+            #if HFEDegJ
-+              #if ((!ENABLED_REMOVE_ODD_DEGREE)||(LOG_odd_degree>=HFEDegI))
-+              for(j=0;j<HFEDegJ;++j)
-+              #else
-+              for(j=1;j<HFEDegJ;++j)
-+              #endif
-+              {
-+                  add2_gf2n(buf,F_cp);
-+                  F_cp+=NB_WORD_GFqn;
-+              }
-             #endif
--            {
--                add2_gf2n(buf,F_cp);
--                F_cp+=NB_WORD_GFqn;
--            }
-             buf+=NB_WORD_GFqn;
-         #endif
-     #endif
-@@ -788,10 +762,7 @@
- 
-     #if(HFEDeg>2)
-         /* Vector with linear terms of F */
--        UINT* F_lin;
--
--        F_lin=(UINT*)calloc((HFEDegI+1)*(HFEv+1)*NB_WORD_GFqn,sizeof(UINT));
--        VERIFY_ALLOC_RET(F_lin);
-+        UINT F_lin[(HFEDegI+1)*(HFEv+1)*NB_WORD_GFqn]={0};
- 
-         F_cp=F+MQv_GFqn_SIZE;
- 
-@@ -828,13 +799,10 @@
-     #if PRECOMPUTED_CBASIS
-         static cst_vec_gf2n alpha_vec=cbasis_v;
-     #else
--        vec_gf2n alpha_vec;
--
-         /* Matrix in GF(2^n) with HFEn-1 rows and (HFEDegI+1) columns */
-         /* calloc is useful when it initialises a multiple precision element
-            to 1 */
--        alpha_vec=(UINT*)calloc(SIZE_ROW*(HFEn-1)*NB_WORD_GFqn,sizeof(UINT));
--        VERIFY_ALLOC_RET(alpha_vec);
-+        UINT alpha_vec[SIZE_ROW*(HFEn-1)*NB_WORD_GFqn]={0};
- 
-         genCanonicalBasisVertical_gf2n(alpha_vec);
-     #endif
-@@ -848,10 +816,7 @@
- 
-     /* Precompute an other table */
-     #if(HFEDeg>2)
--        UINT* buf;
--        buf=(UINT*)calloc(HFEDegI*HFEn*NB_WORD_GFqn,sizeof(UINT));
--        VERIFY_ALLOC_RET(buf);
--
-+        UINT buf[HFEDegI*HFEn*NB_WORD_GFqn]={0};
-         special_buffer(buf,F,alpha_vec);
-     #endif
- 
-@@ -1045,12 +1010,6 @@
-             /* k becomes k+1 */
-             a_vec_k+=SIZE_ROW*NB_WORD_GFqn;
-         }
--        free(buf);
--        free(F_lin);
--    #endif
--
--    #if (!PRECOMPUTED_CBASIS)
--        free(alpha_vec);
-     #endif
- 
-     /* MQS with v vinegar variables */
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
deleted file mode 100644
index c5078f8..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
+++ /dev/null
@@ -1,140 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c
-@@ -1,59 +1,93 @@
- #include "initMatrixId_gf2.h"
- 
--
--#define INITMATID_GF2(NAME,MATRIX_SIZE,nq,nr,NB_WORD_n) \
--void NAME(Mn_gf2 S)\
--{\
--    UINT bit_ir;\
--    unsigned int iq,ir;\
--\
--    /* Initialize to 0 */\
--    for(iq=0;iq<MATRIX_SIZE;++iq)\
--    {\
--        S[iq]=0;\
--    }\
--\
--    /* for each row excepted the last block */\
--    for(iq=0;iq<nq;++iq)\
--    {\
--        bit_ir=1;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            *S=bit_ir;\
--            /* Next row */\
--            S+=NB_WORD_n;\
--            bit_ir<<=1;\
--        }\
--        /* Next block of column */\
--        ++S;\
--    }\
--\
--    /* for each row of the last block */\
--    bit_ir=1;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        *S=bit_ir;\
--        /* Next row */\
--        S+=NB_WORD_n;\
--        bit_ir<<=1;\
--    }\
--}
--
- /**
-  * @brief   Generate the identity matrix (n,n) in GF(2).
-  * @param[out]  S   The identity matrix (n,n) in GF(2).
-  * @remark  Requires to allocate n elements of GF(2^n) for S.
-  * @remark  This function does not require a constant-time implementation.
-  */
--INITMATID_GF2(PREFIX_NAME(initMatrixIdn_gf2),MATRIXn_SIZE,HFEnq,HFEnr,
--              NB_WORD_GFqn);
-+void PREFIX_NAME(initMatrixIdn_gf2)(Mn_gf2 S)
-+{
-+    UINT bit_ir;
-+    unsigned int iq,ir;
-+
-+    /* Initialize to 0 */
-+    for(iq=0;iq<MATRIXn_SIZE;++iq)
-+    {
-+        S[iq]=0;
-+    }
-+
-+    /* for each row excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=1;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            *S=bit_ir;
-+            /* Next row */
-+            S+=NB_WORD_GFqn;
-+            bit_ir<<=1;
-+        }
-+        /* Next block of column */
-+        ++S;
-+    }
-+
-+    #if HFEnr
-+    /* for each row of the last block */
-+    bit_ir=1;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        *S=bit_ir;
-+        /* Next row */
-+        S+=NB_WORD_GFqn;
-+        bit_ir<<=1;
-+    }
-+    #endif
-+}
-+
- /**
-  * @brief   Generate the identity matrix (n+v,n+v) in GF(2).
-  * @param[out]  S   The identity matrix (n+v,n+v) in GF(2).
-  * @remark  Requires to allocate n+v elements of GF(2^(n+v)) for S.
-  * @remark  This function does not require a constant-time implementation.
-  */
--INITMATID_GF2(PREFIX_NAME(initMatrixIdnv_gf2),MATRIXnv_SIZE,HFEnvq,HFEnvr,
--              NB_WORD_GF2nv);
-+void PREFIX_NAME(initMatrixIdnv_gf2)(Mn_gf2 S)
-+{
-+    UINT bit_ir;
-+    unsigned int iq,ir;
-+
-+    /* Initialize to 0 */
-+    for(iq=0;iq<MATRIXnv_SIZE;++iq)
-+    {
-+        S[iq]=0;
-+    }
-+
-+    /* for each row excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=1;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            *S=bit_ir;
-+            /* Next row */
-+            S+=NB_WORD_GF2nv;
-+            bit_ir<<=1;
-+        }
-+        /* Next block of column */
-+        ++S;
-+    }
-+
-+    /* for each row of the last block */
-+    #if HFEnvr
-+    bit_ir=1;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        *S=bit_ir;
-+        /* Next row */
-+        S+=NB_WORD_GF2nv;
-+        bit_ir<<=1;
-+    }
-+    #endif
-+}
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
deleted file mode 100644
index 3272e0e..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/invMatrixn_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/invMatrixn_gf2.c
-@@ -28,7 +28,7 @@
- 
- #define ADDROW(LOOPK,LOOPKINV) \
-         /* pivot */\
--        mask=-(((*S_cpj)>>ir)&1);\
-+        mask=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;\
-         LOOPKINV;
- 
-@@ -49,7 +49,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=UINT_1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -70,7 +70,7 @@
- 
- /* Only modify S_inv */
- #define LOOPIR_DOWN_TO_UP(NB_IT) \
--    for(ir=NB_IT-1;ir!=(unsigned int)(-1);--ir,--i)\
-+    for(ir=(NB_IT)-1;ir!=(unsigned int)(-1);--ir,--i)\
-     {\
-         S_cpj=S_cpi;\
-         Sinv_cpj=Sinv_cpi;\
-@@ -81,7 +81,7 @@
-             S_cpj-=NB_WORD_GFqn;\
-             Sinv_cpj-=NB_WORD_GFqn;\
-             /* pivot */\
--            mask=-(((*S_cpj)>>ir)&1);\
-+            mask=(1+~(((*S_cpj)>>ir)&1));\
-             xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\
-         }\
- \
-@@ -133,7 +133,6 @@
-         LOOPIR(HFEnr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask);
- 
-         /* Step 2 */
--        bit_ir=UINT_1<<(HFEnr-1);
-         LOOPIR_DOWN_TO_UP(HFEnr);
-     #else
-         /* To begin to last row */
-@@ -178,16 +177,16 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                    LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\
-                    xorLoadMask1_gf2n(Sinv_cpi,Sinv_cpj,mask);\
-                   });\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                    LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\
-                    xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\
-                   });\
-@@ -314,7 +313,7 @@
-                 Sinv_cpj+=NB_WORD_GFqn;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -343,7 +342,7 @@
-                 Sinv_cpj+=NB_WORD_GFqn;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -381,7 +380,7 @@
-         for(j=0;j<i;++j)
-         {
-             /* pivot */
--            mask=-(((U[j>>6])>>(j&63U))&1U);
-+            mask=(1+~(((U[j>>6])>>(j&63U))&1U));
-             xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);
- 
-             /* next row */
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
deleted file mode 100644
index 5fe3bf2..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c
-@@ -28,7 +28,7 @@
- 
- #define ADDROW(LOOPK,LOOPKINV) \
-         /* pivot */\
--        mask=-(((*S_cpj)>>ir)&1);\
-+        mask=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;\
-         LOOPKINV;
- 
-@@ -49,7 +49,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=UINT_1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -70,7 +70,7 @@
- 
- /* Only modify S_inv */
- #define LOOPIR_DOWN_TO_UP(NB_IT) \
--    for(ir=NB_IT-1;ir!=(unsigned int)(-1);--ir,--i)\
-+    for(ir=(NB_IT)-1;ir!=(unsigned int)(-1);--ir,--i)\
-     {\
-         S_cpj=S_cpi;\
-         Sinv_cpj=Sinv_cpi;\
-@@ -81,7 +81,7 @@
-             S_cpj-=NB_WORD_GF2nv;\
-             Sinv_cpj-=NB_WORD_GF2nv;\
-             /* pivot */\
--            mask=-(((*S_cpj)>>ir)&1);\
-+            mask=(1+~(((*S_cpj)>>ir)&1));\
-             xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\
-         }\
- \
-@@ -133,7 +133,6 @@
-         LOOPIR(HFEnvr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask);
- 
-         /* Step 2 */
--        bit_ir=UINT_1<<(HFEnvr-1);
-         LOOPIR_DOWN_TO_UP(HFEnvr);
-     #else
-         /* To begin to last row */
-@@ -178,16 +177,16 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                    LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\
-                    xorLoadMask1_gf2nv(Sinv_cpi,Sinv_cpj,mask);\
-                   });\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                    LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\
-                    xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\
-                   });\
-@@ -315,7 +314,7 @@
-                 Sinv_cpj+=NB_WORD_GF2nv;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -344,7 +343,7 @@
-                 Sinv_cpj+=NB_WORD_GF2nv;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -382,7 +381,7 @@
-         for(j=0;j<i;++j)
-         {
-             /* pivot */
--            mask=-(((U[j>>6])>>(j&63U))&1U);
-+            mask=(1+~(((U[j>>6])>>(j&63U))&1U));
-             xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);
- 
-             /* next row */
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c
deleted file mode 100644
index bac490f..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_inv_gf2n.c
+++ /dev/null
@@ -1,28 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/inv_gf2n.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/inv_gf2n.c
-@@ -3,7 +3,6 @@
- #include "add_gf2n.h"
- #include "mul_gf2n.h"
- #include "sqr_gf2n.h"
--#include "rem_gf2x.h"
- #include "rem_gf2n.h"
- #include "ITMIA_addchains.h"
- 
-@@ -222,7 +221,7 @@
-     /* res=res * res^(2^((HFEn-1)>>i)) */\
-     /* res^(2^((HFEn-1)>>i)) */\
-     PINIT_GF2N_(r_128_copy,r2_128_copy,r3_128_copy,r4_128_copy,r5_128_copy,r);\
--    for(j=0;j<val;++j)\
-+    for(j=0;j<(val);++j)\
-     {\
-         SQR_GF2N_(r,r_128,r2_128,r3_128,r4_128,r5_128,ra,rem_function);\
-     }\
-@@ -266,6 +265,7 @@
-  * in GF(2^n). However, the complexity of the squarings can be improved with
-  * multi-squaring tables.
-  */
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(inv_ITMIA_gf2n)(static_gf2n res[NB_WORD_GFqn],
-                              cst_static_gf2n A[NB_WORD_GFqn])
- {
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
deleted file mode 100644
index b79b3ea..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
+++ /dev/null
@@ -1,20 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c
-@@ -51,14 +51,13 @@
- 
-         for(i=0;i<HFEmq;++i)
-         {
--            *((UINT*)pk)=pk_last[i];
-+            STORE_UINT(pk, pk_last[i])
-             pk+=8;
-         }
- 
--        /* We fill the last bytes of pk without 64-bit cast */
-         for(i=0;i<(NB_BYTES_GFqm&7);++i)
-         {
--            pk[i]=pk_last[NB_WORD_GF2m-1]>>(i<<3);
-+            pk[i]=(uint8_t)(pk_last[NB_WORD_GF2m-1]>>(i<<3));
-         }
-     #endif
- }
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c
deleted file mode 100644
index be95968..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2n.c
+++ /dev/null
@@ -1,213 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/mul_gf2n.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/mul_gf2n.c
-@@ -28,7 +28,7 @@
-     for(i=0;i<HFEnq;++i)
-     {
-         /* j=0 */
--        mask_B=-((*B)&ONE64);
-+        mask_B=(1+~((*B)&ONE64));
-         for(k=0;k<NB_WORD_GFqn;++k)
-         {
-             C[k]^=A[k]&mask_B;
-@@ -40,7 +40,7 @@
-             for(j=1;j<=(64-HFEnr);++j)
-             {
-                 jc=64-j;
--                mask_B=-(((*B)>>j)&ONE64);
-+                mask_B=(1+~(((*B)>>j)&ONE64));
-                 /* k=0 */
-                 tmp_A=(*A)&mask_B;
-                 C[0]^=tmp_A<<j;
-@@ -57,7 +57,7 @@
-         #endif
-         {
-             jc=64-j;
--            mask_B=-(((*B)>>j)&ONE64);
-+            mask_B=(1+~(((*B)>>j)&ONE64));
-             /* k=0 */
-             tmp_A=(*A)&mask_B;
-             C[0]^=tmp_A<<j;
-@@ -77,7 +77,7 @@
- 
-     #if HFEnr
-         /* j=0 */
--        mask_B=-((*B)&ONE64);
-+        mask_B=(1+~((*B)&ONE64));
-         for(k=0;k<NB_WORD_GFqn;++k)
-         {
-             C[k]^=A[k]&mask_B;
-@@ -92,7 +92,7 @@
-         #endif
-         {
-             jc=64-j;
--            mask_B=-(((*B)>>j)&ONE64);
-+            mask_B=(1+~(((*B)>>j)&ONE64));
-             /* k=0 */
-             tmp_A=(*A)&mask_B;
-             C[0]^=tmp_A<<j;
-@@ -108,7 +108,7 @@
-             for(;j<HFEnr;++j)
-             {
-                 jc=64-j;
--                mask_B=-(((*B)>>j)&ONE64);
-+                mask_B=(1+~(((*B)>>j)&ONE64));
-                 /* k=0 */
-                 tmp_A=(*A)&mask_B;
-                 C[0]^=tmp_A<<j;
-@@ -132,44 +132,6 @@
- /***********************************************************************/
- /***********************************************************************/
- 
--
--/* When rem is a macro */
--#if (K3!=1)
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q,R;\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q;\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--#define MUL_MOD_MACRO64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],Q,R;\
--        mul_function;\
--        rem_function;\
--    }
--
--/* HFEn == 64 */
--#define MUL_MOD_MACRO64_K64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],R;\
--        mul_function;\
--        rem_function;\
--    }
--
--
- #if HFEnr
-     #define INIT_Q(size2) uint64_t Q[size2];
- #else
-@@ -177,67 +139,60 @@
-     #define INIT_Q(size2)
- #endif
- 
--#if ((HFEn==312)||(HFEn==402)||(HFEn==544))
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size];\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size],R;\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--/* When rem is a function */
--#define MUL_MOD_FUNCTION32(name_function,mul_function,rem_function) \
--    name_function\
--    {\
--        uint64_t res_mul;\
--        mul_function;\
--        rem_function;\
--    }
--
--#define MUL_MOD_FUNCTION(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size];\
--        mul_function;\
--        rem_function;\
--    }
- 
- #if (REM_MACRO)
-     #if (NB_WORD_GFqn!=1)
--        MUL_MOD_MACRO(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL], R;
-+            INIT_Q(NB_WORD_GFqn);
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(res,res_mul,Q,R);
-+        }
-     #elif (HFEn<33)
--        MUL_MOD_MACRO32(MUL_THEN_REM_GF2N,mul_gf2x(&res_mul,A,B),
--                        REM_GF2N(*res,res_mul,Q,R));
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            mul_gf2x(&res_mul,A,B);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-     #elif HFEnr
--        MUL_MOD_MACRO64(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-     #else
-         /* HFEn == 64 */
--        MUL_MOD_MACRO64_K64(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(*res,res_mul,,R);
-+        }
-     #endif
- #elif (NB_WORD_GFqn!=1)
--    MUL_MOD_FUNCTION(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                     rem_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        mul_gf2x(res_mul,A,B);
-+        rem_gf2n(res,res_mul);
-+    }
- #elif (HFEn<33)
--    MUL_MOD_FUNCTION32(MUL_THEN_REM_GF2N,mul_gf2x(&res_mul,A,B),
--                       rem_gf2n(res,&res_mul));
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        mul_gf2x(&res_mul,A,B);
-+        rem_gf2n(res,&res_mul);
-+    }
- #else
--    MUL_MOD_FUNCTION(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                     rem_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        mul_gf2x(res_mul,A,B);
-+        rem_gf2n(res,res_mul);
-+    }
- #endif
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2x.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2x.c
deleted file mode 100644
index 5edab07..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_mul_gf2x.c
+++ /dev/null
@@ -1,211 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/mul_gf2x.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/mul_gf2x.c
-@@ -45,6 +45,7 @@
-     MUL128_NO_SIMD_GF2X(C,A,B,tmp,AA,BB);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul160_no_simd_gf2x)(uint64_t C[5], const uint64_t A[3],
-                                                      const uint64_t B[3])
- {
-@@ -52,6 +53,7 @@
-     MUL160_NO_SIMD_GF2X(C,A,B,tmp,AA,BB);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul192_no_simd_gf2x)(uint64_t C[6], const uint64_t A[3],
-                                                      const uint64_t B[3])
- {
-@@ -59,6 +61,7 @@
-     MUL192_NO_SIMD_GF2X(C,A,B,tmp,AA,BB);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul224_no_simd_gf2x)(uint64_t C[7], const uint64_t A[4],
-                                                      const uint64_t B[4])
- {
-@@ -66,6 +69,7 @@
-     MUL224_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul256_no_simd_gf2x)(uint64_t C[8], const uint64_t A[4],
-                                                      const uint64_t B[4])
- {
-@@ -73,6 +77,7 @@
-     MUL256_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul288_no_simd_gf2x)(uint64_t C[9], const uint64_t A[5],
-                                                      const uint64_t B[5])
- {
-@@ -81,6 +86,7 @@
- }
- 
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul320_no_simd_gf2x)(uint64_t C[10], const uint64_t A[5],
-                                                       const uint64_t B[5])
- {
-@@ -88,6 +94,7 @@
-     MUL320_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul352_no_simd_gf2x)(uint64_t C[11], const uint64_t A[6],
-                                                       const uint64_t B[6])
- {
-@@ -95,6 +102,7 @@
-     MUL352_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul384_no_simd_gf2x)(uint64_t C[12], const uint64_t A[6],
-                                                       const uint64_t B[6])
- {
-@@ -102,6 +110,7 @@
-     MUL384_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul416_no_simd_gf2x)(uint64_t C[13], const uint64_t A[7],
-                                                       const uint64_t B[7])
- {
-@@ -109,6 +118,7 @@
-     MUL416_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp3,tmp4,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul448_no_simd_gf2x)(uint64_t C[14], const uint64_t A[7],
-                                                       const uint64_t B[7])
- {
-@@ -116,6 +126,7 @@
-     MUL448_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp3,tmp4,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul480_no_simd_gf2x)(uint64_t C[15], const uint64_t A[8],
-                                                       const uint64_t B[8])
- {
-@@ -123,6 +134,7 @@
-     MUL480_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp3,tmp4,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul512_no_simd_gf2x)(uint64_t C[16], const uint64_t A[8],
-                                                       const uint64_t B[8])
- {
-@@ -130,6 +142,7 @@
-     MUL512_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp3,tmp4,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul544_no_simd_gf2x)(uint64_t C[17], const uint64_t A[9],
-                                                       const uint64_t B[9])
- {
-@@ -137,6 +150,7 @@
-     MUL544_NO_SIMD_GF2X(C,A,B,tmp,AA,BB,tmp3,tmp4,tmp1,tmp2);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(mul576_no_simd_gf2x)(uint64_t C[18], const uint64_t A[9],
-                                                       const uint64_t B[9])
- {
-@@ -220,6 +234,7 @@
-     PSTORE384(C,C1,C2,C3);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul224_gf2x)(uint64_t C[7], const uint64_t A[4],
-                                               const uint64_t B[4])
- {
-@@ -231,6 +246,7 @@
-     PSTORE448(C,C1,C2,C3,C4);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul256_gf2x)(uint64_t C[8], const uint64_t A[4],
-                                               const uint64_t B[4])
- {
-@@ -242,6 +258,7 @@
-     PSTORE512(C,C1,C2,C3,C4);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul288_gf2x)(uint64_t C[9], const uint64_t A[5],
-                                               const uint64_t B[5])
- {
-@@ -254,6 +271,7 @@
-     PSTORE576(C,C1,C2,C3,C4,C5);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul320_gf2x)(uint64_t C[10], const uint64_t A[5],
-                                                const uint64_t B[5])
- {
-@@ -266,6 +284,7 @@
-     PSTORE640(C,C1,C2,C3,C4,C5);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul352_gf2x)(uint64_t C[11], const uint64_t A[6],
-                                                const uint64_t B[6])
- {
-@@ -279,6 +298,7 @@
- }
- 
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul384_gf2x)(uint64_t C[12], const uint64_t A[6],
-                                                const uint64_t B[6])
- {
-@@ -291,6 +311,7 @@
-     PSTORE768(C,C1,C2,C3,C4,C5,C6);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul416_gf2x)(uint64_t C[13], const uint64_t A[7],
-                                                const uint64_t B[7])
- {
-@@ -304,6 +325,7 @@
-     PSTORE832(C,C1,C2,C3,C4,C5,C6,C7);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul448_gf2x)(uint64_t C[14], const uint64_t A[7],
-                                                const uint64_t B[7])
- {
-@@ -317,6 +339,7 @@
-     PSTORE896(C,C1,C2,C3,C4,C5,C6,C7);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul480_gf2x)(uint64_t C[15], const uint64_t A[8],
-                                                const uint64_t B[8])
- {
-@@ -330,6 +353,7 @@
-     PSTORE960(C,C1,C2,C3,C4,C5,C6,C7,C8);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul512_gf2x)(uint64_t C[16], const uint64_t A[8],
-                                                const uint64_t B[8])
- {
-@@ -343,6 +367,7 @@
-     PSTORE1024(C,C1,C2,C3,C4,C5,C6,C7,C8);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul544_gf2x)(uint64_t C[17], const uint64_t A[9],
-                                                const uint64_t B[9])
- {
-@@ -356,6 +381,7 @@
-     PSTORE1088(C,C1,C2,C3,C4,C5,C6,C7,C8,C9);
- }
- 
-+// NOLINTNEXTLINE(google-readability-function-size)
- void PREFIX_NAME(pmul576_gf2x)(uint64_t C[18], const uint64_t A[9],
-                                                const uint64_t B[9])
- {
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_predicate.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_predicate.c
deleted file mode 100644
index 1a1ded8..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_predicate.c
+++ /dev/null
@@ -1,57 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/predicate.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/predicate.c
-@@ -43,7 +43,7 @@
-         r|=a[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -66,7 +66,7 @@
-         r|=a[i];
-     }
-     ORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -115,7 +115,7 @@
-         r|=a[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -162,7 +162,7 @@
-         r|=a[i]^b[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -185,7 +185,7 @@
-             return (a[i]<b[i]);
-         }
-     }
--    return (a[0]<b[0]);
-+    return (int) (a[0]<b[0]);
- }
- 
- 
-@@ -219,7 +219,7 @@
-     {
-         bo=i^d;
-         NORBITS_UINT(bo);
--        mask|=(-bo)&CMP_LT_UINT(*a,*b);
-+        mask|=(1+~bo)&CMP_LT_UINT(*a,*b);
-         ++a;
-         ++b;
-     }
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_randMatrix_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
deleted file mode 100644
index 2d946a3..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
+++ /dev/null
@@ -1,397 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/randMatrix_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/randMatrix_gf2.c
-@@ -135,92 +135,85 @@
- #endif
- 
- 
--#define CLEANLOWMATRIX_GF2(NAME,LTRIANGULAR_SIZE,nq,nr) \
--void NAME(Tn_gf2 L) \
--{\
--    UINT mask;\
--    unsigned int iq,ir;\
--\
--    /* for each row */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        mask=0;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--            *L&=mask;\
--            *L^=UINT_1<<ir;\
--            mask<<=1;\
--            ++mask;\
--\
--            L+=iq;\
--        }\
--        /* Next column */\
--        ++L;\
--    }\
--\
--    /* iq = HFEnq */\
--    mask=0;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--        *L&=mask;\
--        *L^=UINT_1<<ir;\
--        mask<<=1;\
--        ++mask;\
--\
--        L+=iq;\
--    }\
--}
-+void PREFIX_NAME(cleanLowerMatrixn)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
- 
-+    #if HFEnr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
- 
--CLEANLOWMATRIX_GF2(PREFIX_NAME(cleanLowerMatrixn),LTRIANGULAR_N_SIZE,HFEnq,
--                   HFEnr);
-+        L+=iq;
-+    }
-+    #endif
-+}
- #if HFEv
--    CLEANLOWMATRIX_GF2(PREFIX_NAME(cleanLowerMatrixnv),LTRIANGULAR_NV_SIZE,
--                       HFEnvq,HFEnvr);
--#endif
-+void PREFIX_NAME(cleanLowerMatrixnv)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnvq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
- 
-+    #if HFEnvr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
- 
--#define GENLOWMATRIX_GF2(NAME,LTRIANGULAR_SIZE,nq,nr) \
--void NAME(Tn_gf2 L) \
--{\
--    UINT mask;\
--    unsigned int iq,ir;\
--\
--    randombytes((unsigned char*)L,LTRIANGULAR_SIZE<<3);\
--\
--    /* for each row */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        mask=0;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--            *L&=mask;\
--            *L^=UINT_1<<ir;\
--            mask<<=1;\
--            ++mask;\
--\
--            L+=iq;\
--        }\
--        /* Next column */\
--        ++L;\
--    }\
--\
--    /* iq = HFEnq */\
--    mask=0;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--        *L&=mask;\
--        *L^=UINT_1<<ir;\
--        mask<<=1;\
--        ++mask;\
--\
--        L+=iq;\
--    }\
-+        L+=iq;
-+    }
-+    #endif
- }
-+#endif
-+
- 
- /**
-  * @brief   Generate a random invertible lower triangular matrix (n,n) L in 
-@@ -231,7 +224,47 @@
-  * words equal to zero in the upper trigular part are not stored.
-  * @remark  Constant-time implementation.
-  */
--GENLOWMATRIX_GF2(PREFIX_NAME(genLowerMatrixn),LTRIANGULAR_N_SIZE,HFEnq,HFEnr);
-+void PREFIX_NAME(genLowerMatrixn)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    randombytes((unsigned char*)L,LTRIANGULAR_N_SIZE<<3);
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
-+
-+    #if HFEnr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
-+
-+        L+=iq;
-+    }
-+    #endif
-+}
-+
- #if HFEv
-     /**
-      * @brief   Generate a random invertible lower triangular matrix (n+v,n+v) L
-@@ -243,15 +276,54 @@
-      * the words equal to zero in the upper trigular part are not stored.
-      * @remark  Constant-time implementation.
-      */
--    GENLOWMATRIX_GF2(PREFIX_NAME(genLowerMatrixnv),LTRIANGULAR_NV_SIZE,HFEnvq,
--                     HFEnvr);
-+void PREFIX_NAME(genLowerMatrixnv)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    randombytes((unsigned char*)L,LTRIANGULAR_NV_SIZE<<3);
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnvq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
-+
-+    #if HFEnvr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
-+
-+        L+=iq;
-+    }
-+    #endif
-+}
- #endif
- 
- 
- #define LOOPJR(NB_IT) \
--    mini=MINI(iq,jq);\
-+    if (iq<jq) mini=iq; \
-+    else mini=jq; \
-     *S=0;\
--    for(jr=0;jr<NB_IT;++jr)\
-+    for(jr=0;jr<(NB_IT);++jr)\
-     {\
-         /* Dot product */\
-         tmp=L_cp[0]&U_cp[0];\
-@@ -267,36 +339,19 @@
- 
- 
- #define LOOPIR(NB_IT,nq,nr,REM) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         U_cp=U;\
-         /* for each row of U (multiply by the transpose) */\
--        for(jq=1;jq<=nq;++jq)\
-+        for(jq=1;jq<=(nq);++jq)\
-         {\
-             LOOPJR(NB_BITS_UINT);\
-         }\
--        REM;\
-+        { REM };\
-         L_cp+=iq;\
-     }
- 
- 
--#define MULMATRICESLU_GF2(NAME,nq,nr,REM) \
--void NAME(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U) \
--{\
--    cst_Tn_gf2 L_cp,U_cp;\
--    UINT tmp;\
--    unsigned int iq,ir,jq,jr,k,mini;\
--\
--    /* Computation of S = L*U */\
--    L_cp=L;\
--    /* for each row of L (and S) */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        LOOPIR(NB_BITS_UINT,nq,nr,REM);\
--    }\
--    LOOPIR(nr,nq,nr,REM);\
--}
--
- /**
-  * @brief   Compute the matrix (n,n) L*U in GF(2).
-  * @param[out]  S   S=L*U is a matrix (n,n) in GF(2).
-@@ -304,9 +359,37 @@
-  * @remark  Constant-time implementation.
-  */
- #if HFEnr
--MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_n),HFEnq,HFEnr,LOOPJR(HFEnr));
-+void PREFIX_NAME(mulMatricesLU_gf2_n)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+{
-+    cst_Tn_gf2 L_cp,U_cp;
-+    UINT tmp;
-+    unsigned int iq,ir,jq,jr,k,mini;
-+
-+    /* Computation of S = L*U */
-+    L_cp=L;
-+    /* for each row of L (and S) */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        LOOPIR(NB_BITS_UINT,HFEnq,HFEnr,LOOPJR(HFEnr));
-+    }
-+    LOOPIR(HFEnr,HFEnq,HFEnr,LOOPJR(HFEnr));
-+}
- #else
--MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_n),HFEnq,HFEnr,);
-+void PREFIX_NAME(mulMatricesLU_gf2_n)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+{
-+    cst_Tn_gf2 L_cp,U_cp;
-+    UINT tmp;
-+    unsigned int iq,ir,jq,jr,k,mini;
-+
-+    /* Computation of S = L*U */
-+    L_cp=L;
-+    /* for each row of L (and S) */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        LOOPIR(NB_BITS_UINT,HFEnq,HFEnr,);
-+    }
-+    LOOPIR(HFEnr,HFEnq,HFEnr,);
-+}
- #endif
- 
- #if HFEv
-@@ -317,10 +400,36 @@
-      * @remark  Constant-time implementation.
-      */
-     #if HFEnvr
--        MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_nv),HFEnvq,HFEnvr,
--                          LOOPJR(HFEnvr));
-+        void PREFIX_NAME(mulMatricesLU_gf2_nv)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+        {
-+            cst_Tn_gf2 L_cp,U_cp;
-+            UINT tmp;
-+            unsigned int iq,ir,jq,jr,k,mini;
-+
-+            /* Computation of S = L*U */
-+            L_cp=L;
-+            /* for each row of L (and S) */
-+            for(iq=1;iq<=HFEnvq;++iq)
-+            {
-+                LOOPIR(NB_BITS_UINT,HFEnvq,HFEnvr,LOOPJR(HFEnvr));
-+            }
-+            LOOPIR(HFEnvr,HFEnvq,HFEnvr,LOOPJR(HFEnvr));
-+        }
-     #else
--        MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_nv),HFEnvq,HFEnvr,);
-+        void PREFIX_NAME(mulMatricesLU_gf2_nv)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+        {
-+            cst_Tn_gf2 L_cp,U_cp;
-+            UINT tmp;
-+            unsigned int iq,ir,jq,jr,k,mini;
-+
-+            /* Computation of S = L*U */
-+            L_cp=L;
-+            /* for each row of L (and S) */
-+            for(iq=1;iq<=HFEnvq;++iq)
-+            {
-+                LOOPIR(NB_BITS_UINT,HFEnvq,HFEnvr,);
-+            }
-+        }
-     #endif
- #endif
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_rem_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_rem_gf2n.c
deleted file mode 100644
index 57dad52..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_rem_gf2n.c
+++ /dev/null
@@ -1,9 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/rem_gf2n.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/rem_gf2n.c
-@@ -1,5 +1,4 @@
- #include "rem_gf2n.h"
--#include "rem_gf2x.h"
- #include "simd.h"
- 
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign.c
deleted file mode 100644
index 3705885..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/sign.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sign.c
-@@ -1,7 +1,6 @@
- #include "api.h"
--#if SUPERCOP
--    #include "crypto_sign.h"
--#endif
-+#include <stddef.h>
-+#include <stdint.h>
- #include <string.h>
- 
- #include "sign_keypairHFE.h"
-@@ -15,11 +14,7 @@
-  * @param[out]   sk  The secret-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
--int crypto_sign_keypair(unsigned char *pk, unsigned char *sk)
--#else
--int PREFIX_NAME(crypto_sign_keypair)(unsigned char *pk, unsigned char *sk)
--#endif
-+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk)
- {
-     return sign_keypairHFE(pk,(UINT*)sk);
- }
-@@ -34,18 +29,14 @@
-  * @param[in]   sk  The secret-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
- int crypto_sign(
--#else
--int PREFIX_NAME(crypto_sign)(
--#endif
--  unsigned char *sm, unsigned long long *smlen,
--  const unsigned char *m, unsigned long long mlen,
--  const unsigned char *sk)
-+  uint8_t *sm, size_t *smlen,
-+  const uint8_t *m, size_t mlen,
-+  const uint8_t *sk)
- {
-     *smlen=mlen+CRYPTO_BYTES;
--    memcpy(sm+CRYPTO_BYTES,m,(size_t)mlen);
--    return signHFE(sm,m,(size_t)mlen,(UINT*)sk);
-+    memmove(sm+CRYPTO_BYTES,m,mlen);
-+    return signHFE(sm,m,mlen,(UINT*)sk);
- }
- 
- 
-@@ -58,21 +49,35 @@
-  * @param[in]   pk  The public-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
- int crypto_sign_open(
--#else
--int PREFIX_NAME(crypto_sign_open)(
--#endif
--  unsigned char *m, unsigned long long *mlen,
--  const unsigned char *sm, unsigned long long smlen,
--  const unsigned char *pk)
-+  uint8_t *m, size_t *mlen,
-+  const uint8_t *sm, size_t smlen,
-+  const uint8_t *pk)
- {
-     int result;
-     *mlen=smlen-CRYPTO_BYTES;
-     result=sign_openHFE(sm+CRYPTO_BYTES,(size_t)(*mlen),sm,pk);
-     /* For compatibily with SUPERCOP, the memcpy is done only after sign_open */
--    memcpy(m,sm+CRYPTO_BYTES,(size_t)(*mlen));
-+    memmove(m,sm+CRYPTO_BYTES,(size_t)(*mlen));
-     return result;
- }
- 
- 
-+int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk)
-+{
-+    int result;
-+    *siglen = CRYPTO_BYTES;
-+    result = signHFE(sig,m,mlen,(UINT*)sk);
-+    return result;
-+}
-+
-+int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk)
-+{
-+    int result;
-+    if (siglen == CRYPTO_BYTES) {
-+      result = sign_openHFE(m,mlen,sig,pk);
-+    } else {
-+      result = -1;
-+    }
-+    return result;
-+}
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c
deleted file mode 100644
index 6b3611c..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_signHFE.c
+++ /dev/null
@@ -1,220 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/signHFE.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/signHFE.c
-@@ -83,8 +83,10 @@
-         for(k1=1;k1<NB_ITE;++k1)
-         {
-             /* Number of bits to complete the byte of sm8, in [0,7] */
--            val_n=((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))?(HFEDELTA+HFEv)
--                  :((8-(nb_bits&7U))&7U);
-+            if ((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))
-+                val_n=(HFEDELTA+HFEv);
-+            else
-+                val_n=((8-(nb_bits&7U))&7U);
- 
-             /* First byte of sm8 */
-             if(nb_bits&7U)
-@@ -193,10 +195,6 @@
- 
- 
-     #if GEN_INVERTIBLE_MATRIX_LU
--        ALIGNED_GFqn_MALLOC(sk_HFE->sk_uncomp,UINT*,NB_UINT_HFEVPOLY
--                                  +(LTRIANGULAR_NV_SIZE<<1)
--                                  +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t
--                                  +MATRIXnv_SIZE+MATRIXn_SIZE,sizeof(UINT));
-         expandSeed((uint8_t*)(sk_HFE->sk_uncomp),(NB_UINT_HFEVPOLY
-                                      +(LTRIANGULAR_NV_SIZE<<1)
-                                      +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3,
-@@ -313,7 +311,8 @@
- {
-     #if HFEv
-         cst_sparse_monic_gf2nx F_HFEv;
--        UINT* F;
-+        sparse_monic_gf2nx F;
-+        sparse_monic_gf2nx F_cp;
-         unsigned int i;
-     #endif
- 
-@@ -333,13 +332,10 @@
-     #endif
- 
-     #if HFEv
-+        F=sk_HFE->F_struct.poly;
-         F_HFEv=sk_HFE->F_HFEv;
- 
--        ALIGNED_GFqn_MALLOC(F,UINT*,NB_UINT_HFEPOLY,sizeof(UINT));
--        VERIFY_ALLOC_RET(F);
--
-         #if (HFEDeg>1)
--        UINT *F_cp;
-         unsigned int j;
- 
-         /* X^(2^0) */
-@@ -351,11 +347,11 @@
-         {
-             /* Copy i quadratic terms */
- 
-+            j=0;
-             #if ENABLED_REMOVE_ODD_DEGREE
--            for(j=(((1U<<i)+1U)<=HFE_odd_degree)?0:1;j<i;++j)
--            #else
--            for(j=0;j<i;++j)
-+            if(((1U<<i)+1U)>HFE_odd_degree) ++j;
-             #endif
-+            for(;j<i;++j)
-             {
-                 /* X^(2^i + 2^j) */
-                 copy_gf2n(F_cp,F_HFEv);
-@@ -370,11 +366,11 @@
-         }
-         #if HFEDegJ
-             /* X^(2^HFEDegI + 2^j) */
-+            j=0;
-             #if ENABLED_REMOVE_ODD_DEGREE
--            for(j=(((1U<<i)+1U)<=HFE_odd_degree)?0:1;j<HFEDegJ;++j)
--            #else
--            for(j=0;j<HFEDegJ;++j)
-+            if(((1U<<i)+1U)>HFE_odd_degree) ++j;
-             #endif
-+            for(;j<HFEDegJ;++j)
-             {
-                 copy_gf2n(F_cp,F_HFEv);
-                 F_HFEv+=NB_WORD_GFqn;
-@@ -382,7 +378,6 @@
-             }
-         #endif
-         #endif
--        sk_HFE->F_struct.poly=F;
-     #else
-         sk_HFE->F_struct.poly=sk_HFE->F_HFEv;
-     #endif
-@@ -447,7 +442,7 @@
-     #endif
- 
-     int nb_root;
--    secret_key_HFE sk_HFE;
-+    secret_key_HFE sk_HFE={0};
- 
-     #if(HFEv)
-         UINT* F;
-@@ -666,9 +661,6 @@
-             if(nb_root<0)
-             {
-                 /* Error from chooseRootHFE */
--                #if HFEv
--                    ALIGNED_GFqn_FREE(F);
--                #endif
-                 return nb_root;
-             }
- 
-@@ -677,7 +669,7 @@
-                 /* Add the v bits to DR */
-                 #if HFEnr
-                     DR[NB_WORD_GFqn-1]^=V[0]<<HFEnr;
--                    for(i=0;i<(NB_WORD_GFqv-1);++i)
-+                    for(i=0;(int)i<(NB_WORD_GFqv-1);++i)
-                     {
-                         DR[NB_WORD_GFqn+i]=(V[i]>>(64-HFEnr))^(V[i+1]<<HFEnr);
-                     }
-@@ -685,7 +677,7 @@
-                         DR[NB_WORD_GFqn+i]=V[i]>>(64-HFEnr);
-                     #endif
-                 #else
--                    for(i=0;i<NB_WORD_GFqv;++i)
-+                    for(i=0;(int)i<NB_WORD_GFqv;++i)
-                     {
-                         DR[NB_WORD_GFqn+i]=V[i];
-                     }
-@@ -728,14 +720,6 @@
-         }
-     } while(b);
- 
--    #if ENABLED_SEED_SK
--        free(sk_HFE.sk_uncomp);
--    #endif
--    #if HFEv
--        ALIGNED_GFqn_FREE(F);
--    #endif
--
--
-     /* Copy the salt in the signature */
-     for(k=0;k<SIZE_SALT_WORD;++k)
-     {
-@@ -770,9 +754,10 @@
- {
-     UINT sm[SIZE_SIGN_UNCOMPRESSED-SIZE_SALT_WORD]={0};
- 
--    static_vecnv_gf2 DR[NB_WORD_GF2nv];
--    static_gf2n U[NB_WORD_GFqn];
--    UINT Hi_tab[SIZE_DIGEST_UINT],Hi1_tab[SIZE_DIGEST_UINT];
-+    static_vecnv_gf2 DR[NB_WORD_GF2nv] = {0};
-+    static_gf2n U[NB_WORD_GFqn] = {0};
-+    UINT Hi_tab[SIZE_DIGEST_UINT] = {0};
-+    UINT Hi1_tab[SIZE_DIGEST_UINT] = {0};
-     UINT *tmp, *Hi=Hi_tab,*Hi1=Hi1_tab;
-     unsigned int k;
-     #if (HFEnv!=HFEm)
-@@ -786,7 +771,7 @@
-     #endif
- 
-     int nb_root;
--    secret_key_HFE sk_HFE;
-+    secret_key_HFE sk_HFE={0};
- 
-     #if(HFEv)
-         UINT* F;
-@@ -824,13 +809,6 @@
-     /* Compute H1 = H(m) */
-     HASH((unsigned char*)Hi,m,len);
- 
--    /* It is to initialize S0 to 0, because Sk||Xk is stored in sm */
--    for(k=0;k<NB_WORD_GF2nv;++k)
--    {
--        sm[k]=0;
--        DR[k]=0;
--    }
--
-     for(k=1;k<=NB_ITE;++k)
-     {
-         #ifdef KAT_INT
-@@ -967,9 +945,6 @@
-             if(nb_root<0)
-             {
-                 /* Error from chooseRootHFE */
--                #if HFEv
--                    ALIGNED_GFqn_FREE(F);
--                #endif
-                 return nb_root;
-             }
- 
-@@ -981,7 +956,7 @@
-             /* Add the v bits to DR */
-             #if HFEnr
-                 DR[NB_WORD_GFqn-1]^=V[0]<<HFEnr;
--                for(i=0;i<(NB_WORD_GFqv-1);++i)
-+                for(i=0;(int)i<(NB_WORD_GFqv-1);++i)
-                 {
-                     DR[NB_WORD_GFqn+i]=(V[i]>>(64-HFEnr))^(V[i+1]<<HFEnr);
-                 }
-@@ -989,7 +964,7 @@
-                     DR[NB_WORD_GFqn+i]=V[i]>>(64-HFEnr);
-                 #endif
-             #else
--                for(i=0;i<NB_WORD_GFqv;++i)
-+                for(i=0;(int)i<NB_WORD_GFqv;++i)
-                 {
-                     DR[NB_WORD_GFqn+i]=V[i];
-                 }
-@@ -1026,12 +1001,6 @@
-         }
-     }
- 
--    #if ENABLED_SEED_SK
--        free(sk_HFE.sk_uncomp);
--    #endif
--    #if HFEv
--        ALIGNED_GFqn_FREE(F);
--    #endif
-     #ifdef KAT_INT
-         CLOSE_KAT_INT_FILE;
-     #endif
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_keypairHFE.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
deleted file mode 100644
index ad15967..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
+++ /dev/null
@@ -1,199 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/sign_keypairHFE.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sign_keypairHFE.c
-@@ -112,10 +112,16 @@
-     #if (ENABLED_SEED_SK&&GEN_INV_MATRIX_TRIAL_ERROR)
-         expandSeedCxtDeclaration;
-     #endif
--    mqsnv_gf2n Q;
-+    UINT Q[MQnv_GFqn_SIZE];
-     sparse_monic_gf2nx F;
--    GLnv_gf2 S;
--    GLn_gf2 T;
-+    /* Generate S for the linear transformation on variables */
-+    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
-+        UINT S[MATRIXnv_SIZE];
-+        UINT *T = S;
-+    #else
-+    UINT *S;
-+    UINT *T;
-+    #endif
-     #if AFFINE_TRANSFORMATION_BY_t
-         vecm_gf2 t;
-     #endif
-@@ -125,11 +131,17 @@
-         Mnv_gf2 S_buf;
-     #endif
-     #if ENABLED_SEED_SK
--        UINT *sk_uncomp;
-+        UINT sk_uncomp[(NB_UINT_HFEVPOLY+
-+                       (LTRIANGULAR_NV_SIZE<<1)+
-+                       (LTRIANGULAR_N_SIZE<<1)+
-+                       SIZE_VECTOR_t)];
-     #endif
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&(!ENABLED_SEED_SK)&&GEN_INVERSE_IN_FIRST)
-         unsigned int i;
-     #endif
-+    #if (FORMAT_HYBRID_CPK8)
-+        uint8_t pk_tmp[MQ_GFqm8_SIZE];
-+    #endif
-     int ret;
- 
- 
-@@ -141,18 +153,12 @@
-     #if ENABLED_SEED_SK
-         #if GEN_INVERTIBLE_MATRIX_LU
-             /* The seed generates L,U and L',U' such as S=LU and T=L'U' */
--            sk_uncomp=(UINT*)malloc((NB_UINT_HFEVPOLY+(LTRIANGULAR_NV_SIZE<<1)
--                                     +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)
--                                    *sizeof(UINT));
--            VERIFY_ALLOC_RET(sk_uncomp);
-             expandSeed((uint8_t*)sk_uncomp,(NB_UINT_HFEVPOLY
-                                      +(LTRIANGULAR_NV_SIZE<<1)
-                                      +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3,
-                  (uint8_t*)sk,SIZE_SEED_SK);
-         #elif GEN_INV_MATRIX_TRIAL_ERROR
-             /* The seed generates S and T */
--            sk_uncomp=(UINT*)malloc(SIZE_SK_HFE_UNCOMPRESSED_WORD*sizeof(UINT));
--            VERIFY_ALLOC_RET(sk_uncomp);
-             expandSeedIUF(&hashInstance,(uint8_t*)sk,SIZE_SEED_SK<<3);
-             expandSeedSqueeze(&hashInstance,(uint8_t*)sk_uncomp,
-                                SIZE_SK_HFE_UNCOMPRESSED_WORD<<6);
-@@ -176,44 +182,16 @@
-     /* Here, the first element (of GF(2^n)) of Q is reserved to store cst.
-        The matrix Q is stored as upper triangular matrix. */
- 
--    Q=(UINT*)malloc(MQnv_GFqn_SIZE*sizeof(UINT));
--    if(!Q)
--    {
--        #if ENABLED_SEED_SK
--            free(sk_uncomp);
--        #endif
--        return ERROR_ALLOC;
--    }
-     #if PRECOMPUTE2
-         genSecretMQSprec(Q,F);
-     #else
-         ret=genSecretMQS_gf2(Q,F);
-         if(ret)
-         {
--            #if ENABLED_SEED_SK
--                free(sk_uncomp);
--            #endif
--            free(Q);
-             return ret;
-         }
-     #endif
- 
--    /* Generate S for the linear transformation on variables */
--    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--        S=MALLOC_MATRIXnv;
--        if(!S)
--        {
--            #if ENABLED_SEED_SK
--                free(sk_uncomp);
--            #endif
--            free(Q);
--            return ERROR_ALLOC;
--        }
--        /* The allocated memory for S will be use for T */
--        T=S;
--    #endif
--
--
-     #if GEN_INVERTIBLE_MATRIX_LU
-         #if ENABLED_SEED_SK
-             /* The random bytes are already generated from a seed */
-@@ -383,57 +361,18 @@
-     #endif
- 
- 
--    #if (ENABLED_SEED_SK&&GEN_INVERTIBLE_MATRIX_LU\
--                        &&(!AFFINE_TRANSFORMATION_BY_t))
--        free(sk_uncomp);
--    #endif
--
--
--    #if (FORMAT_HYBRID_CPK8)
--        uint8_t* pk_tmp=(uint8_t*)malloc(MQ_GFqm8_SIZE*sizeof(uint8_t));
--    #endif
--
--    #if (FORMAT_HYBRID_CPK8)
--        if(!pk_tmp)
--        {
--            #if (ENABLED_SEED_SK&&\
--                 (GEN_INV_MATRIX_TRIAL_ERROR||AFFINE_TRANSFORMATION_BY_t))
--                free(sk_uncomp);
--            #endif
--            free(Q);
--            #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--                /* T is stored in S. free(S) would have the same effect. */
--                free(T);
--            #endif
--            #if (GEN_INV_MATRIX_TRIAL_ERROR&&ENABLED_SEED_SK\
--                                           &&GEN_INVERSE_IN_FIRST)
--                /* T is stored in S_buf */
--                free(S_buf);
--            #endif
--            return ERROR_ALLOC;
--        }
--    #endif
--
--
-     /* Use T (variable S) to compute cst_pk and Q_pk */
-     #if (FORMAT_HYBRID_CPK8)
-         mixEquationsMQS8_gf2(pk_tmp,Q,T);
-     #elif FORMAT_MONOMIAL_PK8
-         mixEquationsMQS8_gf2(pk,Q,T);
-     #endif
--    free(Q);
- 
--
--    #if (ENABLED_SEED_SK&&GEN_INV_MATRIX_TRIAL_ERROR\
--                        &&(!AFFINE_TRANSFORMATION_BY_t))
--        free(sk_uncomp);
--    #endif
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&ENABLED_SEED_SK&&GEN_INVERSE_IN_FIRST)
-         /* T is stored in S_buf */
-         free(S_buf);
-     #endif
- 
--
-     /* Generate the inverse of T */
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&(!ENABLED_SEED_SK)\
-                                    &&(!GEN_INVERSE_IN_FIRST))
-@@ -441,12 +380,6 @@
-     #endif
- 
- 
--    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--        /* T is stored in S */
--        free(S);
--    #endif
--
--
-     #if AFFINE_TRANSFORMATION_BY_t
-         #if ENABLED_SEED_SK
-             #if GEN_INVERTIBLE_MATRIX_LU
-@@ -466,10 +399,6 @@
-         #else
-             add2_gf2m((UINT*)pk,t);
-         #endif
--
--        #if ENABLED_SEED_SK
--            free(sk_uncomp);
--        #endif
-     #endif
- 
- 
-@@ -477,10 +406,6 @@
-         convMQS_one_eq_to_hybrid_rep8_gf2(pk,pk_tmp);
-     #endif
- 
--    #if (FORMAT_HYBRID_CPK8)
--        free(pk_tmp);
--    #endif
--
-     return 0;
- }
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_openHFE.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_openHFE.c
deleted file mode 100644
index 4d00d9d..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sign_openHFE.c
+++ /dev/null
@@ -1,41 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/sign_openHFE.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sign_openHFE.c
-@@ -57,8 +57,10 @@
-         for(k1=1;k1<NB_ITE;++k1)
-         {
-             /* Number of bits to complete the byte of sm8, in [0,7] */
--            val_n=((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))?(HFEDELTA+HFEv)
--                  :((8-(nb_bits&7U))&7U);
-+            if ((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))
-+              val_n=(HFEDELTA+HFEv);
-+            else
-+              val_n=((8-(nb_bits&7U))&7U);
- 
-             /* First byte of sm8 */
-             if(nb_bits&7U)
-@@ -509,12 +511,8 @@
- 
-     /* Compute p(S_(NB_IT),X_(NB_IT)) */
-     #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP)
--        UINT* pk_tmp;
-+        UINT pk_tmp[(1+NB_WORD_UNCOMP_EQ*HFEmr8)];
-         unsigned int i;
--        #if HFEmr8
--            /* 1 to store the constant */
--            pk_tmp=(UINT*)malloc((1+NB_WORD_UNCOMP_EQ*HFEmr8)*sizeof(UINT));
--        #endif
- 
-         #if (HFENr8&&(HFEmr8>1))
-             uint64_t val;
-@@ -578,10 +576,6 @@
-     #endif
- 
- 
--    #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP&&HFEmr8)
--        free(pk_tmp);
--    #endif
--
-     return ret;
- }
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c
deleted file mode 100644
index 7337b7c..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sort_gf2n.c
+++ /dev/null
@@ -1,37 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/sort_gf2n.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sort_gf2n.c
-@@ -111,7 +111,7 @@
- 
-     /* The power of 2 before l, which is 1<<position(MSB(l-1)). */
-     pow2_prev=2;
--    while(pow2_prev<l)
-+    while(pow2_prev<(unsigned int)l)
-     {
-         pow2_prev<<=1;
-     }
-@@ -123,7 +123,14 @@
-         /* Size of the remainder block */
-         rem=l-(pa<<1)*quo;
-         /* Impact on the sort */
--        rem=(rem<=pa)?0:(rem-pa);
-+        if (rem<=pa)
-+        {
-+            rem=0;
-+        }
-+        else
-+        {
-+            rem=rem-pa;
-+        }
- 
-         tab_i=tab;
-         tab_ipa=tab+pa*NB_WORD_GFqn;
-@@ -171,7 +178,7 @@
-     /* pa=1 */
-     tab_i=tab;
-     tab_ipa=tab+NB_WORD_GFqn;
--    for(i=0;i<(l-1);i+=2)
-+    for(i=0;i<(unsigned int)(l-1);i+=2)
-     {
-         CMP_AND_SWAP_CST_TIME(tab_i,tab_ipa);
-         tab_i+=NB_WORD_GFqn<<1;
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sqr_gf2n.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sqr_gf2n.c
deleted file mode 100644
index a938f4e..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_sqr_gf2n.c
+++ /dev/null
@@ -1,234 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/sqr_gf2n.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/sqr_gf2n.c
-@@ -74,55 +74,10 @@
- 
- /***********************************************************************/
- /***********************************************************************/
--/***************************** sqr with rem ****************************/
--/***********************************************************************/
--/***********************************************************************/
--
--
--/***********************************************************************/
--/***********************************************************************/
- /************************* sqr then rem version ************************/
- /***********************************************************************/
- /***********************************************************************/
- 
--
--/* When rem is a macro */
--#if (K3!=1)
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q,R;\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q;\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--#define MUL_MOD_MACRO64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],Q,R;\
--        mul_function;\
--        rem_function;\
--    }
--
--/* HFEn == 64 */
--#define MUL_MOD_MACRO64_K64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],R;\
--        mul_function;\
--        rem_function;\
--    }
--
--
- #if HFEnr
-     #define INIT_Q(size2) uint64_t Q[size2];
- #else
-@@ -130,81 +85,108 @@
-     #define INIT_Q(size2)
- #endif
- 
--#if ((HFEn==312)||(HFEn==402)||(HFEn==544))
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size];\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
-+#if (REM_MACRO)
-+    #if (NB_WORD_GFqn!=1)
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            INIT_Q(NB_WORD_GFqn);
-+            sqr_gf2x(res_mul,A);
-+            REM_GF2N(res,res_mul,Q,R);
-         }
--#else
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size],R;\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
-+
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            INIT_Q(NB_WORD_GFqn);
-+            sqr_nocst_gf2x(res_mul,A);
-+            REM_GF2N(res,res_mul,Q,R);
-+        }
-+    #elif (HFEn<33)
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            sqr_gf2x(&res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-         }
--#endif
- 
--/* When rem is a function */
--#define MUL_MOD_FUNCTION32(name_function,mul_function,rem_function) \
--    name_function\
--    {\
--        uint64_t res_mul;\
--        mul_function;\
--        rem_function;\
--    }
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            sqr_nocst_gf2x(&res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-+    #elif HFEnr
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            sqr_gf2x(res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
- 
--#define MUL_MOD_FUNCTION(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size];\
--        mul_function;\
--        rem_function;\
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            sqr_nocst_gf2x(res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-+    #else
-+/* HFEn == 64 */
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL],R;
-+        sqr_gf2x(res_mul,A);
-+        REM_GF2N(*res,res_mul,,R);
-     }
- 
--#if (REM_MACRO)
--    #if (NB_WORD_GFqn!=1)
--        MUL_MOD_MACRO(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
--        MUL_MOD_MACRO(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
--    #elif (HFEn<33)
--        MUL_MOD_MACRO32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R));
--        MUL_MOD_MACRO32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R));
--    #elif HFEnr
--        MUL_MOD_MACRO64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
--        MUL_MOD_MACRO64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
--    #else
--        /* HFEn == 64 */
--        MUL_MOD_MACRO64_K64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
--        MUL_MOD_MACRO64_K64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL],R;
-+        sqr_nocst_gf2x(res_mul,A);
-+        REM_GF2N(*res,res_mul,,R);
-+    }
-     #endif
- #elif (NB_WORD_GFqn!=1)
--    MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--    MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
-+
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_nocst_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
- #elif (HFEn<33)
--    MUL_MOD_FUNCTION32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A),
--                       remsqr_gf2n(res,&res_mul));
--    MUL_MOD_FUNCTION32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A),
--                       remsqr_gf2n(res,&res_mul));
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        sqr_gf2x(&res_mul,A);
-+        remsqr_gf2n(res,&res_mul);
-+    }
-+
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        sqr_nocst_gf2x(&res_mul,A);
-+        remsqr_gf2n(res,&res_mul);
-+    }
- #else
--    MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--    MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--#endif
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
- 
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_nocst_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
-+#endif
- 
-
diff --git a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c b/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
deleted file mode 100644
index 554e4e2..0000000
--- a/gemss/patches/Optimized_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
+++ /dev/null
@@ -1,261 +0,0 @@
---- upstream/Optimized_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c
-+++ upstream-patched/Optimized_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c
-@@ -9,12 +9,12 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_M(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
--        xorLoadMask1_gf2m(res,S_cp,vec_ir);\
-+        vec_ir=(1+~(bit_ir&1));\
-+        XORLOADMASK1((unsigned char *)res,(unsigned char *)S_cp,vec_ir,8*NB_WORD_GF2m);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-         bit_ir>>=1;\
-@@ -22,11 +22,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_N(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2n(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-@@ -35,11 +35,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_START_N(NB_IT) \
--    for(;ir<NB_IT;++ir)\
-+    for(;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2n(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-@@ -48,11 +48,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_NV(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2nv(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GF2nv;\
-@@ -69,7 +69,10 @@
- #endif
- 
- #if HFEmr
--    #define CLEAN_M (res)[NB_WORD_GF2m-1]&=MASK_GF2m;
-+    #define CLEAN_M \
-+      LOAD_UINT(vec_ir, (((unsigned char *)res)+(8*(NB_WORD_GF2m-1)))) \
-+      vec_ir &= MASK_GF2m; \
-+      STORE_UINT((((unsigned char *)res)+(8*(NB_WORD_GF2m-1))), vec_ir)
- #else
-     #define CLEAN_M
- #endif
-@@ -115,65 +118,134 @@
-         res a vector of length n in GF(2)
-         res = dotproduct(v,S) = v.S
- */
--#define VECMATPROD(NAME,SET0,LOOPIR,REM,nq) \
--void NAME(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S)\
--{\
--    cst_Mn_gf2 S_cp;\
--    UINT bit_ir, vec_ir;\
--    unsigned int iq,ir;\
--\
--    /* initialization of res to 0 */\
--    SET0(res);\
--\
--    S_cp=S;\
--    /* for each bit of vec excepted the last block */\
--    for(iq=0;iq<nq;++iq)\
--    {\
--        bit_ir=vec[iq];\
--        LOOPIR(NB_BITS_UINT);\
--    }\
--    /* the last block */\
--    REM(LOOPIR);\
-+void PREFIX_NAME(vecMatProductnv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2nv(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_NV(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_NV(LOOPIR_NV);
- }
- 
-+void PREFIX_NAME(vecMatProductnvn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_N(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_NV(LOOPIR_N);
-+}
- 
--VECMATPROD(PREFIX_NAME(vecMatProductnv_64),set0_gf2nv,LOOPIR_NV,REM_NV,HFEnvq)
--VECMATPROD(PREFIX_NAME(vecMatProductnvn_64),set0_gf2n,LOOPIR_N,REM_NV,HFEnvq)
--VECMATPROD(PREFIX_NAME(vecMatProductv_64),set0_gf2n,LOOPIR_N,REM_V,HFEvq)
--VECMATPROD(PREFIX_NAME(vecMatProductn_64),set0_gf2n,LOOPIR_N,REM_N,HFEnq)
--VECMATPROD(PREFIX_NAME(vecMatProductm_64),set0_gf2m,LOOPIR_M,REM_M,HFEnq)
--
--
--#define VECMATPROD_START(NAME,SET0,LOOPIR_START,REM_START,nq) \
--void NAME(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S, unsigned int start)\
--{\
--    cst_Mn_gf2 S_cp;\
--    UINT bit_ir, vec_ir;\
--    unsigned int iq,ir;\
--\
--    /* initialization of res to 0 */\
--    SET0(res);\
--\
--    S_cp=S;\
--    ir=start&63U;\
--    /* for each bit of vec excepted the last block */\
--    for(iq=start>>6;iq<nq;++iq)\
--    {\
--        bit_ir=vec[iq]>>ir;\
--        LOOPIR_START(NB_BITS_UINT);\
--        ir=0;\
--    }\
--    /* the last block */\
--    REM_START(LOOPIR_START);\
-+void PREFIX_NAME(vecMatProductv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    iq = 0;
-+    ir = 0;
-+    while(NB_BITS_UINT*iq + ir < HFEv)
-+    {
-+        bit_ir = vec[iq];
-+        for(ir=0;(NB_BITS_UINT*iq + ir < HFEv) && (ir<NB_BITS_UINT);++ir)
-+        {
-+            /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-+                by the (iq*NB_BITS_UINT)+ir row of S */
-+            vec_ir=(1+~(bit_ir&1));
-+            xorLoadMask1_gf2n(res,S_cp,vec_ir);
-+            /* next row of S */
-+            S_cp+=NB_WORD_GFqn;
-+            bit_ir>>=1;
-+        }
-+        ++iq;
-+    }
-+}
-+
-+void PREFIX_NAME(vecMatProductn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_N(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_N(LOOPIR_N);
-+}
-+
-+void PREFIX_NAME(vecMatProductm_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2m(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_M(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_M(LOOPIR_M);
-+}
-+
-+
-+void PREFIX_NAME(vecMatProductnvn_start_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S, unsigned int start) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    ir=start&63U;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=start>>6;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq]>>ir;
-+        LOOPIR_START_N(NB_BITS_UINT);
-+        ir=0;
-+    }
-+    /* the last block */
-+    REM_START_NV(LOOPIR_START_N);
- }
- 
- 
--VECMATPROD_START(PREFIX_NAME(vecMatProductnvn_start_64),set0_gf2n,
--                 LOOPIR_START_N,REM_START_NV,HFEnvq)
--/*
--VECMATPROD_START(PREFIX_NAME(vecMatProductn_start_64),set0_gf2n,
--                 LOOPIR_START_N,REM_START_N,HFEnq)
--*/
- 
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h
deleted file mode 100644
index 64621a5..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_add_gf2nx.h
+++ /dev/null
@@ -1,30 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/add_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/add_gf2nx.h
-@@ -14,7 +14,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define add2_gf2nx(res,A,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]^=(A)[i];\
-     }
-@@ -30,7 +30,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define copy_gf2nx(res,A,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]=(A)[i];\
-     }
-@@ -45,7 +45,7 @@
-  * @remark  Constant-time implementation when len is not secret.
-  */
- #define set0_gf2nx(res,len,i) \
--    for(i=0;i<((len)*NB_WORD_GFqn);++i)\
-+    for((i)=0;(i)<((len)*NB_WORD_GFqn);++(i))\
-     {\
-         (res)[i]=0;\
-     }
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h
deleted file mode 100644
index 915c6ad..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_arch.h
+++ /dev/null
@@ -1,131 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/arch.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/arch.h
-@@ -26,18 +26,6 @@
- #define ZERO8 ((uint8_t)0)
- #define ONE8  ((uint8_t)1)
- 
--/* 0x... */
--#define PRINT_X64(a) printf("0x%"PRIx64,a);
--#define PRINT_X32(a) printf("0x%"PRIx32,a);
--#define PRINT_X16(a) printf("0x%"PRIx16,a);
--#define PRINT_X8(a) printf("0x%"PRIx8,a);
--/* ... */
--#define PRINT_U64(a) printf("%"PRIx64,a);
--#define PRINT_U32(a) printf("%"PRIx32,a);
--#define PRINT_U16(a) printf("%"PRIx16,a);
--#define PRINT_U8(a) printf("%"PRIx8,a);
--
--
- /****************** Definition of an UINT  ******************/
- 
- /* XXX For the moment, this parameter cannot be modified. XXX */
-@@ -81,9 +69,6 @@
- /** Print an UINT. */
- #define PRINT_UINT(a) CONCAT(PRINT_X,NB_BITS_UINT)(a);
- 
--
--
--
- /** A reserved variable to do a for loop on a buffer of UINT. */
- #define RESERVED_VARIABLE reserved_variable
- 
-@@ -103,6 +88,35 @@
-         } \
-     }
- 
-+/** Load a UINT from unsigned char * **/
-+
-+#define LOAD_UINT(a, p) \
-+  (a) = (p)[7]; (a) <<= 8;\
-+  (a) |= (p)[6]; (a) <<= 8;\
-+  (a) |= (p)[5]; (a) <<= 8;\
-+  (a) |= (p)[4]; (a) <<= 8;\
-+  (a) |= (p)[3]; (a) <<= 8;\
-+  (a) |= (p)[2]; (a) <<= 8;\
-+  (a) |= (p)[1]; (a) <<= 8;\
-+  (a) |= (p)[0];
-+
-+#define LOAD_UINT_ARRAY(a, p, N) \
-+  FOR_LOOP(LOAD_UINT((a)[RESERVED_VARIABLE], &(p)[8*RESERVED_VARIABLE]), (N))
-+
-+/** Store a UINT to an unsigned char * **/
-+#define STORE_UINT(p, a) \
-+  (p)[0] = ((a) >> 0x00) & 0xff; \
-+  (p)[1] = ((a) >> 0x08) & 0xff; \
-+  (p)[2] = ((a) >> 0x10) & 0xff; \
-+  (p)[3] = ((a) >> 0x18) & 0xff; \
-+  (p)[4] = ((a) >> 0x20) & 0xff; \
-+  (p)[5] = ((a) >> 0x28) & 0xff; \
-+  (p)[6] = ((a) >> 0x30) & 0xff; \
-+  (p)[7] = ((a) >> 0x38) & 0xff;
-+
-+#define STORE_UINT_ARRAY(a, p, N) \
-+  FOR_LOOP(STORE_UINT(&(p)[8*RESERVED_VARIABLE], (a)[RESERVED_VARIABLE]), (N))
-+
- 
- 
- /****************** C++ compatibility ******************/
-@@ -226,7 +240,7 @@
- /* Only when set to 1, the installation of gf2x is required */
- /* Be careful because this library can be in variable-time and so vulnerable 
-    to the timing attacks. */
--#define ENABLED_GF2X 1
-+#define ENABLED_GF2X 0
- 
- 
- /********************* Allocation *********************/
-@@ -237,7 +251,7 @@
- /** Verify if the allocation by malloc or calloc succeeds. 
-  *  Exit in the failure case. */
- #define VERIFY_ALLOC(p) \
--    if(!p) \
-+    if(!(p)) \
-     {\
-         exit(ERROR_ALLOC);\
-     }
-@@ -245,7 +259,7 @@
- /** Verify if the allocation by malloc or calloc succeeds. 
-  *  Return ERROR_ALLOC in the failure case. */
- #define VERIFY_ALLOC_RET(p) \
--    if(!p) \
-+    if(!(p)) \
-     {\
-         return(ERROR_ALLOC);\
-     }
-@@ -272,7 +286,7 @@
-                 p=(type)_mm_malloc((nmemb)*(size),16);
- #else
-     #define ALIGNED16_MALLOC(p,type,nmemb,size) \
--                if(posix_memalign((void**)(&p),16,(nmemb)*(size)))\
-+                if(posix_memalign((void**)(&(p)),16,(nmemb)*(size)))\
-                 {\
-                     exit(1);\
-                 }
-@@ -280,7 +294,7 @@
- 
- #define ALIGNED16_CALLOC(p,type,nmemb,size) \
-             ALIGNED16_MALLOC(p,type,nmemb,size);\
--            memset((void*)p,0,(nmemb)*(size))
-+            memset((void*)(p),0,(nmemb)*(size))
- 
- 
- /** Align the data on 32 bytes, useful for avx. */
-@@ -291,7 +305,7 @@
-                 p=(type)_mm_malloc((nmemb)*(size),32);
- #else
-     #define ALIGNED32_MALLOC(p,type,nmemb,size) \
--                if(posix_memalign((void**)(&p),32,(nmemb)*(size)))\
-+                if(posix_memalign((void**)(&(p)),32,(nmemb)*(size)))\
-                 {\
-                     exit(1);\
-                 }
-@@ -299,7 +313,7 @@
- 
- #define ALIGNED32_CALLOC(p,type,nmemb,size) \
-             ALIGNED32_MALLOC(p,type,nmemb,size);\
--            memset((void*)p,0,(nmemb)*(size));
-+            memset((void*)(p),0,(nmemb)*(size));
- 
- 
- #define NO_ALIGNED_MALLOC(p,type,nmemb,size) p=(type)malloc((nmemb)*(size));
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h
deleted file mode 100644
index ad90575..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_bit.h
+++ /dev/null
@@ -1,322 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/bit.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/bit.h
-@@ -8,18 +8,8 @@
- 
- /* Tools for the bits manipulation */
- 
--
--/* (2^k) - 1, k<64, and -1 for k=0  */
--#define mask64(k) ((k)?(ONE64<<(k))-ONE64:MONE64)
--
--/* (2^k) - 1, k<32, and -1 for k=0 */
--#define mask32(k) ((k)?(ONE32<<(k))-ONE32:MONE32)
--
--#define maskUINT(k) ((k)?(UINT_1<<(k))-UINT_1:UINT_M1)
--
--
- /** The i-th bit of the UINT val. */
--#define ITHBIT(val,i) ((val>>i)&UINT_1)
-+#define ITHBIT(val,i) (((val)>>(i))&UINT_1)
- 
- 
- /** Compute the MSB position of one UINT. */
-@@ -27,14 +17,14 @@
-     Output: res the MSB position of U. If U is zero, res=0
- */
- #define MSB_SP(res,U,j) \
--    res=0;\
-+    (res)=0;\
-     /* Search the MSB position of one word */\
--    for(j=NB_BITS_UINT>>1;j!=0;j>>=1) \
-+    for((j)=NB_BITS_UINT>>1;(j)!=0;(j)>>=1) \
-     {\
--        if((U)>>(res^j))\
-+        if((U)>>((res)^(j)))\
-         {\
-             /* To remember the choice of the high part */\
--            res^=j;\
-+            (res)^=(j);\
-         }\
-     }
- 
-@@ -43,15 +33,15 @@
-     Output: res the MSB position of U. If U is zero, res=0
- */
- #define MSB_MP(res,U,i,j,nb_word) \
--    i=nb_word-1;\
-+    (i)=(nb_word)-1;\
-     /* Search the first word different from zero */\
--    while(i&&(!U[i])) \
-+    while((i)&&(!(U)[i])) \
-     {\
--        --i;\
-+        --(i);\
-     }\
-     /* Search the MSB of one word */\
--    MSB_SP(res,U[i],j);\
--    res^=i<<LOG2_SIZE_UINT;
-+    MSB_SP((res),(U)[i],j);\
-+    (res)^=(i)<<LOG2_SIZE_UINT;
- 
- 
- 
-@@ -78,18 +68,18 @@
- 
- /* 5 logical operations */
- #define ORBITS64_SHORT(n) \
--    n|=n << 32U;\
--    n>>=32U;\
--    n+=((uint64_t)0xFFFFFFFF);\
--    n>>=32U;
-+    (n)|=(n) << 32U;\
-+    (n)>>=32U;\
-+    (n)+=((uint64_t)0xFFFFFFFF);\
-+    (n)>>=32U;
- 
- 
- /* 5 logical operations */
- #define NORBITS64_SHORT(n) \
--    n|=n << 32U;\
--    n>>=32U;\
--    --n;\
--    n>>=63U;
-+    (n)|=(n) << 32U;\
-+    (n)>>=32U;\
-+    --(n);\
-+    (n)>>=63U;
- 
- 
- /* The third fastest method, based on the variable-precision SWAR algorithm */
-@@ -99,95 +89,95 @@
- 
- /* 12 logical operations */
- #define COUNTBITS64_SWAR(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
--    n=(((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)=((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-        * ((uint64_t)0x101010101010101)) >> 56U;
- 
- 
- /* 13 logical operations */
- #define ORBITS64_SWAR(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-     /* We change ((n/(2^56))+63)/64 in (n+63*(2^56))/(2^62) */\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-       * ((uint64_t)0x101010101010101)) + ((uint64_t)0x3F00000000000000)) >> 62U;
- 
- 
- /* 13 logical operations */
- #define NORBITS64_SWAR(n) \
--    --n;\
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    --(n);\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-         * ((uint64_t)0x101010101010101))) >> 62U;
- 
- 
- /* 13 logical operations */
- #define NORBITS64_SWAR2(n) \
--    n-=(n >> 1U) & ((uint64_t)0x5555555555555555);\
--    n=(n & ((uint64_t)0x3333333333333333)) \
--      + ((n >> 2U) & ((uint64_t)0x3333333333333333));\
-+    (n)-=((n) >> 1U) & ((uint64_t)0x5555555555555555);\
-+    (n)=((n) & ((uint64_t)0x3333333333333333)) \
-+      + (((n) >> 2U) & ((uint64_t)0x3333333333333333));\
-     /* We remark that ({0,1,...,63}+255)<<56 ={255<<56,0<<56,...,62<<56}*2^56*/\
-     /* So, the 63-th bit is 1 iff Hamming_weight(n)=0 */\
--    n=((((n + (n >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-+    (n)=(((((n) + ((n) >> 4U)) & ((uint64_t)0xF0F0F0F0F0F0F0F)) \
-       * ((uint64_t)0x101010101010101)) + ((uint64_t)0xFF00000000000000)) >> 63U;
- 
- 
- /* Slow, 13 logical operations */
- #define XORBITS64_SWAR2(n) \
-     COUNTBITS64_SWAR(n); \
--    n&=ONE64;
-+    (n)&=ONE64;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS64_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***+***+***+***+***+***+***+***+***+***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000+000+000+000+000+000+000+000+000+000+000+000+000  */\
-     /* Then, we sum the 16 bits and store them in the bits 63 to 67. */\
-     /* So the 63-th bit in the bit of parity. */\
--    n=((n & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\
-+    (n)=(((n) & ((uint64_t)0x8888888888888888)) *((uint64_t)0x1111111111111111))\
-       >> 63;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS32_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***+***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000+000+000+000+000  */\
-     /* Then, we sum the 8 bits and store them in the bits 31 to 34. */\
-     /* So the 31-th bit in the bit of parity. */\
--    n=((n & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31;
-+    (n)=(((n) & ((uint32_t)0x88888888)) *((uint32_t)0x11111111)) >> 31;
- 
- 
- /* A special algorithm with 7 logical operations */
- #define XORBITS16_SWAR(n) \
-     /*  +*+*+*+*+*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +***+***+***+***  */\
--    n^=(n << 2);\
-+    (n)^=((n) << 2);\
-     /*  +000+000+000+000  */\
-     /* Then, we sum the 4 bits and store them in the bits 15 to 17. */\
-     /* So the 15-th bit in the bit of parity. */\
--    n=((n & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15;
-+    (n)=(((n) & ((uint16_t)0x8888)) *((uint16_t)0x1111)) >> 15;
- 
- 
- /* A special algorithm with 5 logical operations */
- #define XORBITS8_SWAR(n) \
-     /*  +*+*+*+*  */\
--    n^=(n << 1);\
-+    (n)^=((n) << 1);\
-     /*  +0+0+0+0  */\
-     /* Then, we sum the 4 bits and store them in the bits 7 to 9. */\
-     /* So the 15-th bit in the bit of parity. */\
--    n=((n & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7;
-+    (n)=(((n) & ((uint8_t)0xAA)) *((uint8_t)0x55)) >> 7;
- 
- 
- /* The slowest method, based on the "dichotomic xor/or" */
-@@ -196,55 +186,55 @@
- /* A generic method using the dichotomic principle */
- #define ORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n|=n>>RESERVED_VARIABLE)\
--    n&=UINT_1;
-+                      (n)|=(n)>>RESERVED_VARIABLE)\
-+    (n)&=UINT_1;
- 
- 
- #define NORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n|=n>>RESERVED_VARIABLE)\
--    n=~n;\
--    n&=UINT_1;
-+                      (n)|=(n)>>RESERVED_VARIABLE)\
-+    (n)=~(n);\
-+    (n)&=UINT_1;
- 
- 
- #define XORBITS(n,SIZE) \
-     FOR_LOOP_COMPLETE(SIZE,RESERVED_VARIABLE>0,RESERVED_VARIABLE>>1U,\
--                      n^=n>>RESERVED_VARIABLE)\
--    n&=UINT_1;
-+                      (n)^=(n)>>RESERVED_VARIABLE)\
-+    (n)&=UINT_1;
- 
- 
- /* 13 logical operations */
- #define ORBITS64_DICHO(n) \
--    n|=n >> 32U;\
--    n|=n >> 16U;\
--    n|=n >> 8U;\
--    n|=n >> 4U;\
--    n|=n >> 2U;\
--    n|=n >> 1U;\
--    n&=ONE64;
-+    (n)|=(n) >> 32U;\
-+    (n)|=(n) >> 16U;\
-+    (n)|=(n) >> 8U;\
-+    (n)|=(n) >> 4U;\
-+    (n)|=(n) >> 2U;\
-+    (n)|=(n) >> 1U;\
-+    (n)&=ONE64;
- 
- 
- /* 14 logical operations */
- #define NORBITS64_DICHO(n) \
--    n|=n >> 32U;\
--    n|=n >> 16U;\
--    n|=n >> 8U;\
--    n|=n >> 4U;\
--    n|=n >> 2U;\
--    n|=n >> 1U;\
--    n=~n;\
--    n&=ONE64;
-+    (n)|=(n) >> 32U;\
-+    (n)|=(n) >> 16U;\
-+    (n)|=(n) >> 8U;\
-+    (n)|=(n) >> 4U;\
-+    (n)|=(n) >> 2U;\
-+    (n)|=(n) >> 1U;\
-+    (n)=~(n);\
-+    (n)&=ONE64;
- 
- 
- /* 13 logical operations */
- #define XORBITS64_DICHO(n) \
--    n^=n >> 32U;\
--    n^=n >> 16U;\
--    n^=n >> 8U;\
--    n^=n >> 4U;\
--    n^=n >> 2U;\
--    n^=n >> 1U;\
--    n&=ONE64;
-+    (n)^=(n) >> 32U;\
-+    (n)^=(n) >> 16U;\
-+    (n)^=(n) >> 8U;\
-+    (n)^=(n) >> 4U;\
-+    (n)^=(n) >> 2U;\
-+    (n)^=(n) >> 1U;\
-+    (n)&=ONE64;
- 
- 
- /* Choose the best method */
-@@ -254,17 +244,10 @@
- #define   XORBITS64   XORBITS64_DICHO
- 
- 
--#if (NB_BITS_UINT==64U)
--    #define COUNTBITS_UINT CONCAT(COUNTBITS,NB_BITS_UINT)
--    #define    ORBITS_UINT CONCAT(   ORBITS,NB_BITS_UINT)
--    #define   NORBITS_UINT CONCAT(  NORBITS,NB_BITS_UINT)
--    #define   XORBITS_UINT CONCAT(  XORBITS,NB_BITS_UINT)
--#else
--    #define COUNTBITS_UINT  COUNTBITS64_SWAR
--    #define  ORBITS_UINT(n)  ORBITS(n,NB_BITS_UINT)
--    #define NORBITS_UINT(n) NORBITS(n,NB_BITS_UINT)
--    #define XORBITS_UINT(n) XORBITS(n,NB_BITS_UINT)
--#endif
-+#define COUNTBITS_UINT COUNTBITS64
-+#define    ORBITS_UINT    ORBITS64
-+#define   NORBITS_UINT   NORBITS64
-+#define   XORBITS_UINT   XORBITS64
- 
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
deleted file mode 100644
index 0fb42b7..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_chooseRootHFE_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/chooseRootHFE_gf2nx.h
-@@ -30,7 +30,7 @@
-     #include "gf2nx.h"
- 
-     int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n U);
-     #define chooseRootHFE_gf2nx PREFIX_NAME(chooseRootHFE_gf2nx)
- #endif
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h
deleted file mode 100644
index 597a4d2..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_conv_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/conv_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/conv_gf2nx.h
-@@ -10,7 +10,7 @@
- 
- 
- void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense,
--                                          const complete_sparse_monic_gf2nx F);
-+                                          complete_sparse_monic_gf2nx F);
- #define convHFEpolynomialSparseToDense_gf2nx \
-             PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h
deleted file mode 100644
index e72442e..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_div_gf2nx.h
+++ /dev/null
@@ -1,41 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/div_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/div_gf2nx.h
-@@ -24,13 +24,13 @@
- 
- 
- unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp,
--                                          const complete_sparse_monic_gf2nx F,
-+                                          complete_sparse_monic_gf2nx F,
-                                           cst_gf2n cst);
- void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst);
- void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly,
--                                      const complete_sparse_monic_gf2nx F,
-+                                      complete_sparse_monic_gf2nx F,
-                                       cst_gf2n cst);
- #define div_r_HFE_gf2nx PREFIX_NAME(div_r_HFE_gf2nx)
- #define div_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)
-@@ -39,16 +39,16 @@
- 
- #if ENABLED_REMOVE_ODD_DEGREE
-     void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                           const complete_sparse_monic_gf2nx F,
-+                                           complete_sparse_monic_gf2nx F,
-                                            cst_gf2n cst);
-     void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst);
-     #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)
-     #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)
- #else
--    #define divsqr_r_HFE_cstdeg_gf2nx PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)
--    #define divsqr_r_HFE_cst_gf2nx PREFIX_NAME(div_r_HFE_cst_gf2nx)
-+    #define divsqr_r_HFE_cstdeg_gf2nx div_r_HFE_cstdeg_gf2nx
-+    #define divsqr_r_HFE_cst_gf2nx div_r_HFE_cst_gf2nx
- #endif
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
deleted file mode 100644
index 5fd5501..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_dotProduct_gf2.h
+++ /dev/null
@@ -1,77 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/dotProduct_gf2.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/dotProduct_gf2.h
-@@ -10,50 +10,50 @@
- 
- /* Dot product of vector of bits */
- #define DOTPRODUCT(res,a,b,SIZE) \
--    res=(a)[0]&(b)[0];\
-+    (res)=(a)[0]&(b)[0];\
-     FOR_LOOP_COMPLETE(1,RESERVED_VARIABLE<(SIZE),++RESERVED_VARIABLE,\
--                        res^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\
-+                        (res)^=(a)[RESERVED_VARIABLE]&(b)[RESERVED_VARIABLE])\
-     XORBITS_UINT(res);
- 
- 
- /* Inlined version */
- #define DOTPRODUCT1(res,a,b) \
--    res=(a)[0]&(b)[0];\
-+    (res)=(a)[0]&(b)[0];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT2(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT3(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT4(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT5(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
--    res^=(a)[4]&(b)[4];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-+    (res)^=(a)[4]&(b)[4];\
-     XORBITS_UINT(res);
- 
- #define DOTPRODUCT6(res,a,b) \
--    res=(a)[0]&(b)[0];\
--    res^=(a)[1]&(b)[1];\
--    res^=(a)[2]&(b)[2];\
--    res^=(a)[3]&(b)[3];\
--    res^=(a)[4]&(b)[4];\
--    res^=(a)[5]&(b)[5];\
-+    (res)=(a)[0]&(b)[0];\
-+    (res)^=(a)[1]&(b)[1];\
-+    (res)^=(a)[2]&(b)[2];\
-+    (res)^=(a)[3]&(b)[3];\
-+    (res)^=(a)[4]&(b)[4];\
-+    (res)^=(a)[5]&(b)[5];\
-     XORBITS_UINT(res);
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
deleted file mode 100644
index f9cd4d3..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_findRoots_gf2nx.h
+++ /dev/null
@@ -1,22 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/findRoots_gf2nx.h
-@@ -19,14 +19,14 @@
-     convHFEpolynomialSparseToDense_gf2nx(poly2,F);\
-     /* Initialize to F-U */\
-     add2_gf2n(poly2,U);\
--    l=gcd_gf2nx(&i,poly2,d2,poly,l);
-+    (l)=gcd_gf2nx(&(i),poly2,d2,poly,l);
- 
- 
--int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots,
--                                    const complete_sparse_monic_gf2nx F,
-+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots,
-+                                    complete_sparse_monic_gf2nx F,
-                                     cst_gf2n U);
- int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root,
--                                       const complete_sparse_monic_gf2nx F,
-+                                       complete_sparse_monic_gf2nx F,
-                                        cst_gf2n U);
- #define findRootsHFE_gf2nx PREFIX_NAME(findRootsHFE_gf2nx)
- #define findUniqRootHFE_gf2nx PREFIX_NAME(findUniqRootHFE_gf2nx)
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
deleted file mode 100644
index 7974101..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_frobeniusMap_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/frobeniusMap_gf2nx.h
-@@ -9,7 +9,7 @@
- #include "gf2nx.h"
- 
- 
--unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const
-+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn,
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U);
- #define frobeniusMap_HFE_gf2nx PREFIX_NAME(frobeniusMap_HFE_gf2nx)
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h
deleted file mode 100644
index 34dd608..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_gf2nx.h
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/gf2nx.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/gf2nx.h
-@@ -119,7 +119,7 @@
- 
- /* A structure with a special list to find the exponents of the monomials */
- typedef struct {
--    cst_sparse_monic_gf2nx poly;
-+    UINT poly[NB_UINT_HFEPOLY];
-     /* List of the successive differences of the exponents of the monomials of
-        poly multiplied by NB_WORD_GFqn */
-     unsigned int L[NB_COEFS_HFEPOLY];
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h
deleted file mode 100644
index 57635c2..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_hash.h
+++ /dev/null
@@ -1,346 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/hash.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/hash.h
-@@ -1,311 +1,40 @@
- #ifndef _HASH_H
- #define _HASH_H
- 
--#include "arch.h"
- #include "choice_crypto.h"
--#include "parameters_HFE.h"
--#include "predicate.h"
--#include "init.h"
--
--
--/******************************************************************/
--/****************** Choice of the hash functions ******************/
--/******************************************************************/
--
--
--/* Choice of the hash function */
--/* The user is allowed to switch between SHA2 and SHA3 */
--#if (defined(QUARTZ)||defined(QUARTZ_V1))
--    #define CHOICE_HASH_SHA1
--#elif 0
--    #define CHOICE_HASH_SHA2
--#else
--    #define CHOICE_HASH_SHA3
--#endif
--
--
--/******************************************************************/
--/******************** Enable the hash functions *******************/
--/******************************************************************/
--
--
--/* Use of third libraries */
--/* The user is allowed to switch between OpenSSL and XKCP */
--/* The user can define several macros, while several SHA3 are not defined
--   (if several SHA3 are defined, XKCP has priority). */
--#ifdef CHOICE_HASH_SHA1
--    #define ENABLED_SHA1_OPENSSL
--#endif
--#if defined(CHOICE_HASH_SHA2)
--    #define ENABLED_SHA2_OPENSSL
--#endif
--#if defined(CHOICE_HASH_SHA3)
--    /* XKCP is constant-time and faster than OpenSSL */
--    #define ENABLED_SHA3_XKCP
--    /* #define ENABLED_SHA3_OPENSSL */
--#endif
--
--
--#define ENABLED_SHAKE_XKCP
--/* #define ENABLED_TUPLEHASH_XKCP XXX Disabled XXX */
--
--
--/******************************************************************/
--/***************** Include for the hash functions *****************/
--/******************************************************************/
--
--
--/* We minimize the numbers of #include to decrease the dependencies with the
--   third libraries. */
--#if (defined(ENABLED_SHA1_OPENSSL)||defined(ENABLED_SHA2_OPENSSL))
--    #include <openssl/sha.h>
--#endif
--
--
--#ifdef ENABLED_SHA2_OPENSSL
--    #include "randombytes.h"
--
--    #if ENABLED_OPENSSL_FIPS
--        #include <openssl/err.h>
--        #include <openssl/crypto.h>
--    #endif
--#endif
--
--
--#ifdef ENABLED_SHA3_OPENSSL
--    #include <stddef.h>
--    #include "prefix_name.h"
--    int PREFIX_NAME(sha3_256)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    int PREFIX_NAME(sha3_384)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    int PREFIX_NAME(sha3_512)(unsigned char *output, const unsigned char *m,
--                                                     size_t len);
--    #define sha3_256 PREFIX_NAME(sha3_256)
--    #define sha3_384 PREFIX_NAME(sha3_384)
--    #define sha3_512 PREFIX_NAME(sha3_512)
--#endif
--
--
--#if (defined(ENABLED_SHA3_XKCP)||defined(ENABLED_SHAKE_XKCP))
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/SimpleFIPS202.h>
--    END_EXTERNC
--#endif
--
--
--#ifdef ENABLED_SHAKE_XKCP
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/KeccakHash.h>
--    END_EXTERNC
--#endif
--
--
--#ifdef ENABLED_TUPLEHASH_XKCP
--    BEGIN_EXTERNC
--        #include <libkeccak.a.headers/SP800-185.h>
--    END_EXTERNC
--#endif
--
--
--/******************************************************************/
--/**************** Macro to call the hash functions ****************/
--/******************************************************************/
--
--
--#define SHA1_OPENSSL(output,m,len) SHA1(m,len,output)
--#define SHA256_OPENSSL(output,m,len) SHA256(m,len,output)
--#define SHA384_OPENSSL(output,m,len) SHA384(m,len,output)
--#define SHA512_OPENSSL(output,m,len) SHA512(m,len,output)
--
--#define SHA256_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA256_OPENSSL(output,m,len);
--#define SHA384_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA384_OPENSSL(output,m,len);
--#define SHA512_OPENSSL_FIPS(output,m,len) \
--            if(FIPS_mode()) \
--            {\
--                /* Set to off the FIPS mode */\
--                if(FIPS_mode_set(0)!=1)\
--                {\
--                    exit(ERR_get_error());\
--                }\
--            }\
--            SHA512_OPENSSL(output,m,len);
--
--/* Format: SHA3_*(output,m,len) */
--#if 0
--    #define SHA3_256_XKCP SHA3_256
--    #define SHA3_384_XKCP SHA3_384
--    #define SHA3_512_XKCP SHA3_512
--#else
--    /* SHA3_* is inlined from SimpleFIPS202.c */
--    #define SHA3_256_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(1088, 512, m, len, 0x06, output, 32)
--    #define SHA3_384_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(832, 768, m, len, 0x06, output, 48)
--    #define SHA3_512_XKCP(output,m,len) \
--        KeccakWidth1600_Sponge(576, 1024, m, len, 0x06, output, 64)
--#endif
--
--/* Format: SHAKE*(output,outputByteLen,input,inputByteLen) */
--#if 0
--    #define SHAKE128_XKCP SHAKE128
--    #define SHAKE256_XKCP SHAKE256
--#else
--    /* SHAKE* is inlined from SimpleFIPS202.c */
--    #define SHAKE128_XKCP(output,outputByteLen,m,len) \
--        KeccakWidth1600_Sponge(1344, 256, m, len, 0x1F, output, outputByteLen)
--    #define SHAKE256_XKCP(output,outputByteLen,m,len) \
--        KeccakWidth1600_Sponge(1088, 512, m, len, 0x1F, output, outputByteLen)
--#endif
--
--/* To call with:
--        Keccak_HashInstance hashInstance;
--        Keccak_HashIUF_SHAKE*_XKCP(&hashInstance,data,databitlen);
--   And after a call to Keccak_HashIUF_SHAKE*_XKCP, to use one or several times:
--        Keccak_HashSqueeze(&hashInstance,output,outputbitlen);
--  XXX Here, length in bits XXX
--*/
--#define Keccak_HashIUF_SHAKE128_XKCP(hashInstance,data,databitlen) \
--    Keccak_HashInitialize_SHAKE128(hashInstance);\
--    Keccak_HashUpdate(hashInstance,data,databitlen);\
--    Keccak_HashFinal(hashInstance,0);
--#define Keccak_HashIUF_SHAKE256_XKCP(hashInstance,data,databitlen) \
--    Keccak_HashInitialize_SHAKE256(hashInstance);\
--    Keccak_HashUpdate(hashInstance,data,databitlen);\
--    Keccak_HashFinal(hashInstance,0);
--
--#define TUPLEHASH128_XKCP TupleHash128
--#define TUPLEHASH256_XKCP TupleHash256
--
--
--/************************************************************************/
--/* Macro to call the hash functions corresponding to the security level */
--/************************************************************************/
-+#include "fips202.h"
- 
-+#define CHOICE_HASH_SHA3
- 
- /* Choice of the hash function */
- #if (K<=128)
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA256_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA256_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_256_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_256
--    #endif
--#elif (K<=192)
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA384_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA384_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_384_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_384
--    #endif
--#else
--    #if ENABLED_OPENSSL_FIPS
--        #define SHA2 SHA512_OPENSSL_FIPS
--    #else
--        #define SHA2 SHA512_OPENSSL
--    #endif
--
--    #ifdef ENABLED_SHA3_XKCP
--        #define SHA3 SHA3_512_XKCP
--    #elif defined(ENABLED_SHA3_OPENSSL)
--        #define SHA3 sha3_512
--    #endif
--#endif
--
--
--/* Choice of SHAKE and TupleHash */
--#if (K<=128)
--    #define SHAKE SHAKE128_XKCP
--    #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE128_XKCP
--    #define TUPLEHASH TUPLEHASH128_XKCP
--#else
--    #define SHAKE SHAKE256_XKCP
--    #define Keccak_HashIUF_SHAKE Keccak_HashIUF_SHAKE256_XKCP
--    #define TUPLEHASH TUPLEHASH256_XKCP
--#endif
--
--
--/******************************************************************/
--/******** Macro to call the chosen hash function of MQsoft ********/
--/******************************************************************/
--
--
--#ifdef CHOICE_HASH_SHA1
--    #define HASH SHA1
--#elif defined(CHOICE_HASH_SHA2)
--    #define HASH SHA2
--#else
--    #define HASH SHA3
--#endif
--
--
--/******************************************************************/
--/************************** Other tools ***************************/
--/******************************************************************/
--
--
--BEGIN_EXTERNC
--    /* For KeccakWidth1600_Sponge */
--    #include <libkeccak.a.headers/KeccakSpongeWidth1600.h>
--END_EXTERNC
--
--
--#if (K<=80)
--    #define SIZE_DIGEST 20
--    #define SIZE_DIGEST_UINT 3
--#elif (K<=128)
--    #define SIZE_DIGEST 32
--    #define SIZE_DIGEST_UINT 4
--    #define SIZE_2_DIGEST 64
--    #define EQUALHASH_NOCST ISEQUAL4_NOCST
--    #define COPYHASH COPY4
-+  #define HASH sha3_256
-+  #define SHAKE shake128
-+  #define SIZE_DIGEST 32
-+  #define SIZE_DIGEST_UINT 4
-+  #define SIZE_2_DIGEST 64
-+  #define EQUALHASH_NOCST ISEQUAL4_NOCST
-+  #define COPYHASH COPY4
- #elif (K<=192)
--    #define SIZE_DIGEST 48
--    #define SIZE_DIGEST_UINT 6
--    #define SIZE_2_DIGEST 96
--    #define EQUALHASH_NOCST ISEQUAL6_NOCST
--    #define COPYHASH COPY6
--#else
--    #define SIZE_DIGEST 64
--    #define SIZE_DIGEST_UINT 8
--    #define SIZE_2_DIGEST 128
--    #define EQUALHASH_NOCST ISEQUAL8_NOCST
--    #define COPYHASH COPY8
-+  #define HASH sha3_384
-+  #define SHAKE shake256
-+  #define SIZE_DIGEST 48
-+  #define SIZE_DIGEST_UINT 6
-+  #define SIZE_2_DIGEST 96
-+  #define EQUALHASH_NOCST ISEQUAL6_NOCST
-+  #define COPYHASH COPY6
-+#else
-+  #define HASH sha3_512
-+  #define SHAKE shake256
-+  #define SIZE_DIGEST 64
-+  #define SIZE_DIGEST_UINT 8
-+  #define SIZE_2_DIGEST 128
-+  #define EQUALHASH_NOCST ISEQUAL8_NOCST
-+  #define COPYHASH COPY8
- #endif
- 
--
- #define EQUALHASH(a,b) f_ISEQUAL(a,b,SIZE_DIGEST_UINT)
- 
--
--/* XXX Bytes XXX */
- #define expandSeed(output,outputByteLen,seed,seedByteLen) \
-         SHAKE(output,outputByteLen,seed,seedByteLen)
- 
-@@ -313,6 +42,4 @@
- #define expandSeedIUF Keccak_HashIUF_SHAKE
- #define expandSeedSqueeze Keccak_HashSqueeze
- 
--
- #endif
--
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h
deleted file mode 100644
index 12a62bf..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_init.h
+++ /dev/null
@@ -1,32 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/init.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/init.h
-@@ -116,23 +116,23 @@
- 
- #define SET1_2(c) \
-     SET1_1(c);\
--    SET0_1(c+1);
-+    SET0_1((c)+1);
- 
- #define SET1_3(c) \
-     SET1_1(c);\
--    SET0_2(c+1);
-+    SET0_2((c)+1);
- 
- #define SET1_4(c) \
-     SET1_1(c);\
--    SET0_3(c+1);
-+    SET0_3((c)+1);
- 
- #define SET1_5(c) \
-     SET1_1(c);\
--    SET0_4(c+1);
-+    SET0_4((c)+1);
- 
- #define SET1_6(c) \
-     SET1_1(c);\
--    SET0_5(c+1);
-+    SET0_5((c)+1);
- 
- 
- #endif
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h
deleted file mode 100644
index 3a91bc2..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_macro.h
+++ /dev/null
@@ -1,33 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/macro.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/macro.h
-@@ -7,29 +7,5 @@
- /** This macro permits to concat the names. */
- #define CONCAT(a,b) CONCAT2(a,b)
- 
--
--/** Print a name as a string. */
--#define PRINTF_NAME(name) puts(#name);
--#define PRINTF_NAME1(name) PRINTF_NAME(name)
--#define PRINTF_NAME2(name) PRINTF_NAME1(name)
--
--
--/** Compute Floor(a/b) with a and b positive integers, a can be zero. */
--#define DIV_FLOOR(a,b) ((a)/(b))
--#define DIV_CEIL1(a,b) (((a)/(b))+(((a)%(b))?1:0))
--/* Faster but overflow if (a+b-1) >= 2^x for x=size_of_the_type_in_bits */
--#define DIV_CEIL2(a,b) (((a)+(b)-1)/(b))
--/* Faster but incorrect only when a == 0 and b>1 */
--#define DIV_CEIL3(a,b) ((((a)-1)/(b))+1)
--/** Compute Ceiling(a/b) with a and b positive integers, a can be zero. */
--#define DIV_CEIL DIV_CEIL2
--
--
--/** Return the minimum. */
--#define MINI(a,b) (((a)<(b))?(a):(b))
--/** Return the maximum. */
--#define MAXI(a,b) (((a)>(b))?(a):(b))
--
--
- #endif
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h
deleted file mode 100644
index 326f857..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_mul_gf2n.h
+++ /dev/null
@@ -1,16 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/mul_gf2n.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/mul_gf2n.h
-@@ -60,11 +60,7 @@
- 
- 
- /* Function mul in GF(2^x), then modular reduction */
--#define MUL_THEN_REM_GF2N void \
--            PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                    const uint64_t A[NB_WORD_GFqn], \
--                                    const uint64_t B[NB_WORD_GFqn])
--MUL_THEN_REM_GF2N;
-+void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn]);
- #define mul_then_rem_gf2n PREFIX_NAME(mul_then_rem_gf2n)
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h
deleted file mode 100644
index e76f169..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_parameters_HFE.h
+++ /dev/null
@@ -1,13 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/parameters_HFE.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/parameters_HFE.h
-@@ -11,9 +11,6 @@
-     #define GFq 2U
-     #define Log2_q 1
-     /* For HFE, the previous parameter is necessarily 2. */
--
--    /** This type stores an element of GF(q). */
--    typedef unsigned char gf2;
- #endif
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h
deleted file mode 100644
index b2698c6..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_signHFE.h
+++ /dev/null
@@ -1,23 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/signHFE.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/signHFE.h
-@@ -7,6 +7,7 @@
- #include "gf2nx.h"
- #include "config_HFE.h"
- #include "matrix_gf2.h"
-+#include "sizes_HFE.h"
- #include <stddef.h>
- 
- 
-@@ -30,7 +31,10 @@
-     #endif
- 
-     #if ENABLED_SEED_SK
--        UINT *sk_uncomp;
-+        UINT sk_uncomp[NB_UINT_HFEVPOLY
-+                       +(LTRIANGULAR_NV_SIZE<<1)
-+                       +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t
-+                       +MATRIXnv_SIZE+MATRIXn_SIZE];
-     #endif
- } secret_key_HFE;
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h
deleted file mode 100644
index 4f3525a..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_sqr_gf2n.h
+++ /dev/null
@@ -1,20 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/sqr_gf2n.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/sqr_gf2n.h
-@@ -36,14 +36,8 @@
- 
- 
- /* Function sqr in GF(2^x), then modular reduction */
--#define SQR_THEN_REM_GF2N void \
--            PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                     const uint64_t A[NB_WORD_GFqn])
--#define SQR_NOCST_THEN_REM_GF2N void \
--            PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], \
--                                           const uint64_t A[NB_WORD_GFqn])
--SQR_THEN_REM_GF2N;
--SQR_NOCST_THEN_REM_GF2N;
-+void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]);
-+void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn]);
- #define sqr_then_rem_gf2n PREFIX_NAME(sqr_then_rem_gf2n)
- #define sqr_nocst_then_rem_gf2n PREFIX_NAME(sqr_nocst_then_rem_gf2n)
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h
deleted file mode 100644
index 154b646..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2m.h
+++ /dev/null
@@ -1,42 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2m.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2m.h
-@@ -25,12 +25,13 @@
- /* The number of word that an element of GF(2^m) needs */
- #if (HFEmr)
-     #define NB_WORD_GF2m_TMP (HFEmq+1)
-+    /* Mask to truncate the last word */
-+    #define MASK_GF2m ((UINT_1<<(HFEmr))-UINT_1)
- #else
-     #define NB_WORD_GF2m_TMP HFEmq
-+    #define MASK_GF2m UINT_M1
- #endif
- 
--/* Mask to truncate the last word */
--#define MASK_GF2m maskUINT(HFEmr)
- 
- #define HFEmq8 (HFEm>>3)
- #define HFEmr8 (HFEm&7U)
-@@ -75,19 +76,18 @@
- 
- #define isEqual_gf2m(a,b) f_ISEQUAL(a,b,NB_WORD_GF2m)
- 
-+#define set0_gf2m(c) SET0((unsigned char *)(c),8*NB_WORD_GF2m)
-+#define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1((unsigned char *)(res),(unsigned char *)(a),b,8*NB_WORD_GF2m)
-+
- #if (NB_WORD_GF2m<7)
-     #define add_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_GF2X)
-     #define add2_gf2m CONCAT(CONCAT_NB_WORD_GF2m_SUP(ADD),_2_GF2X)
-     #define copy_gf2m CONCAT_NB_WORD_GF2m_SUP(COPY)
--    #define set0_gf2m CONCAT_NB_WORD_GF2m_SUP(SET0_)
--    #define xorLoadMask1_gf2m CONCAT_NB_WORD_GF2m_SUP(XORLOADMASK1_)
-     #define dotProduct_gf2_m CONCAT_NB_WORD_GF2m_SUP(DOTPRODUCT)
- #else
-     #define add_gf2m(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2m); 
-     #define add2_gf2m(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2m); 
-     #define copy_gf2m(c,a) COPY(c,a,NB_WORD_GF2m)
--    #define set0_gf2m(c) SET0(c,NB_WORD_GF2m)
--    #define xorLoadMask1_gf2m(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2m)
-     #define dotProduct_gf2_m(res,a,b) DOTPRODUCT(res,a,b,NB_WORD_GF2m)
- #endif
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h
deleted file mode 100644
index bdd55b3..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2n.h
+++ /dev/null
@@ -1,52 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2n.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2n.h
-@@ -52,13 +52,13 @@
- /* The number of word that an element of GF(2^n) needs */
- #if (HFEnr)
-     #define NB_WORD_GFqn_TMP (HFEnq+1)
-+    /* Mask for arithmetic in GF(2^n) */
-+    #define MASK_GF2n ((UINT_1<<(HFEnr))-UINT_1)
- #else
-     #define NB_WORD_GFqn_TMP HFEnq
-+    #define MASK_GF2n UINT_M1
- #endif
- 
--/* Mask for arithmetic in GF(2^n) */
--#define MASK_GF2n maskUINT(HFEnr)
--
- #define HFEnr8 (HFEn&7)
- #define MASK8_GF2n ((1U<<HFEnr8)-1)
- /* Number of bytes that an element of GF(2^n) needs */
-@@ -112,16 +112,16 @@
- #define cmp_lt_gf2n(a,b) f_CMP_LT(a,b,NB_WORD_GFqn)
- #define cmp_gt_gf2n(a,b) f_CMP_GT(a,b,NB_WORD_GFqn)
- 
-+#define set0_gf2n(c) SET0((unsigned char *)(c),8*NB_WORD_GFqn)
-+
- #if (NB_WORD_GFqn<7)
-     #define swap_gf2n CONCAT_NB_WORD_GFqn_SUP(SWAP)
-     #define copy_gf2n CONCAT_NB_WORD_GFqn_SUP(COPY)
--    #define set0_gf2n CONCAT_NB_WORD_GFqn_SUP(SET0_)
-     #define set1_gf2n CONCAT_NB_WORD_GFqn_SUP(SET1_)
-     #define xorLoadMask1_gf2n CONCAT_NB_WORD_GFqn_SUP(XORLOADMASK1_)
- #else
-     #define swap_gf2n(a,b) SWAP(XOR_2,a,b,NB_WORD_GFqn)
-     #define copy_gf2n(c,a) COPY(c,a,NB_WORD_GFqn)
--    #define set0_gf2n(c) SET0(c,NB_WORD_GFqn)
-     #define set1_gf2n(c) SET1(c,NB_WORD_GFqn)
-     #define xorLoadMask1_gf2n(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GFqn)
- #endif
-@@ -328,11 +328,7 @@
- 
- 
- 
--#if (NB_WORD_MMUL<7)
--    #define set0_product_gf2n CONCAT_NB_WORD_MMUL(SET0_)
--#else
--    #define set0_product_gf2n(c) SET0(c,NB_WORD_MMUL)
--#endif
-+#define set0_product_gf2n(c) SET0((unsigned char*)(c),8*NB_WORD_MMUL)
- 
- #if (NB_WORD_MMUL==NB_WORD_GFqn)
-     /* Nothing to set to 0 */
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2nv.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2nv.h
deleted file mode 100644
index 7cbdc44..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2nv.h
+++ /dev/null
@@ -1,40 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2nv.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2nv.h
-@@ -24,13 +24,13 @@
- /* The number of word that an element of GF(2^(n+v)) needs */
- #if (HFEnvr)
-     #define NB_WORD_GF2nv_TMP (HFEnvq+1)
-+    /* Mask for arithmetic in GF(2^(n+v)) */
-+    #define MASK_GF2nv ((UINT_1<<(HFEnvr))-UINT_1)
- #else
-     #define NB_WORD_GF2nv_TMP HFEnvq
-+    #define MASK_GF2nv UINT_M1
- #endif
- 
--/* Mask for arithmetic in GF(2^(n+v)) */
--#define MASK_GF2nv maskUINT(HFEnvr)
--
- #define HFEnvq8 (HFEnv>>3)
- #define HFEnvr8 (HFEnv&7)
- #define MASK8_GF2nv ((1U<<HFEnvr8)-1)
-@@ -79,18 +79,17 @@
- 
- 
- 
-+#define set0_gf2nv(c) SET0((unsigned char *)(c),8*NB_WORD_GF2nv)
- 
- #if (NB_WORD_GF2nv<7)
-     #define add_gf2nv CONCAT(CONCAT_NB_WORD_GF2nv_SUP(ADD),_GF2X)
-     #define add2_gf2nv CONCAT(CONCAT_NB_WORD_GF2nv_SUP(ADD),_2,_GF2X)
-     #define swap_gf2nv CONCAT_NB_WORD_GF2nv_SUP(SWAP)
--    #define set0_gf2nv CONCAT_NB_WORD_GF2nv_SUP(SET0_)
-     #define xorLoadMask1_gf2nv CONCAT_NB_WORD_GF2nv_SUP(XORLOADMASK1_)
- #else
-     #define add_gf2nv(a,b,c) ADD_GF2X(a,b,c,NB_WORD_GF2nv)
-     #define add2_gf2nv(a,b) ADD_2_GF2X(a,b,NB_WORD_GF2nv)
-     #define swap_gf2nv(a,b) SWAP(XOR_2,a,b,NB_WORD_GF2nv)
--    #define set0_gf2nv(c) SET0(c,NB_WORD_GF2nv)
-     #define xorLoadMask1_gf2nv(res,a,b) XORLOADMASK1(res,a,b,NB_WORD_GF2nv)
- #endif
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2v.h b/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2v.h
deleted file mode 100644
index a7f1960..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_include_tools_gf2v.h
+++ /dev/null
@@ -1,26 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/include/tools_gf2v.h
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/include/tools_gf2v.h
-@@ -12,8 +12,11 @@
- /* The number of word that an element of GF(2^v) needs */
- #if (HFEvr)
-     #define NB_WORD_GFqv_TMP (HFEvq+1)
-+    /* Mask for arithmetic in GF(2^v) */
-+    #define HFE_MASKv ((UINT_1<<(HFEvr))-UINT_1)
- #else
-     #define NB_WORD_GFqv_TMP HFEvq
-+    #define HFE_MASKv UINT_M1
- #endif
- 
- #if (NB_WORD_GFqv_TMP == 1)
-@@ -24,10 +27,6 @@
-     #define NB_WORD_GFqv NB_WORD_GFqv_TMP
- #endif
- 
--
--/* Mask for arithmetic in GF(2^v) */
--#define HFE_MASKv maskUINT(HFEvr)
--
- #define HFEvr8 (HFEv&7)
- /* Number of bytes that an element of GF(2^(n+v)) needs */
- #define NB_BYTES_GFqv ((HFEv>>3)+((HFEvr8)?1:0))
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
deleted file mode 100644
index 4f2a7ee..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_changeVariablesMQS_gf2.c
+++ /dev/null
@@ -1,33 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/changeVariablesMQS_gf2.c
-@@ -26,16 +26,14 @@
-  */
- int PREFIX_NAME(changeVariablesMQS_simd_gf2)(mqsnv_gf2n MQS, cst_GLnv_gf2 S)
- {
--    UINT tmp[NB_WORD_GFqn];
--    mqsnv_gf2n MQS2, MQS2_cp;
-+    UINT tmp[NB_WORD_GFqn]={0};
-+    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
-+    UINT MQS2[HFEnv*HFEnv*NB_WORD_GFqn]={0};
-+    UINT *MQS2_cp;
-     cst_mqsnv_gf2n MQS_cpi,MQS_cpj;
-     cst_GLnv_gf2 S_cpi,S_cpj;
-     unsigned int i,j;
- 
--    /* Tmp matrix (n+v)*(n+v) of quadratic terms to compute S*Q */
--    MQS2=(UINT*)malloc(HFEnv*HFEnv*NB_WORD_GFqn*sizeof(UINT));
--    VERIFY_ALLOC_RET(MQS2);
--
-     /* To avoid the constant of MQS */
-     MQS+=NB_WORD_GFqn;
- 
-@@ -129,8 +127,6 @@
-         S_cpj+=NB_WORD_GF2nv;
-     }
- 
--
--    free(MQS2);
-     return 0;
- }
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
deleted file mode 100644
index 406710e..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_chooseRootHFE_gf2nx.c
+++ /dev/null
@@ -1,141 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/chooseRootHFE_gf2nx.c
-@@ -29,7 +29,7 @@
-  * @remark  A part of the implementation is not in constant-time.
-  */
- int PREFIX_NAME(chooseRootHFE_gf2nx)(gf2n root,
--                                     const complete_sparse_monic_gf2nx F,
-+                                     complete_sparse_monic_gf2nx F,
-                                      cst_gf2n U)
- {
-     #if (HFEDeg==1)
-@@ -44,76 +44,74 @@
-             unsigned int j,i,ind=0;
-         #endif
- 
--        vec_gf2n roots;
-+        UINT roots[HFEDeg * NB_WORD_GFqn] = {0};
-         int l;
- 
--        l=findRootsHFE_gf2nx(&roots,F,U);
-+        l=findRootsHFE_gf2nx(roots,F,U);
- 
--        if(!l)
-+        if(l==0)
-         {
-             /* Zero root */
-             return 0;
--        } else
-+        }
-+        if(l==1)
-         {
--            if(l==1)
--            {
--                /* One root */
--                copy_gf2n(root,roots);
--            } else
--            {
--                /* Several roots */
--                #if QUARTZ_ROOT
--                    hash=(UINT*)malloc(l*SIZE_DIGEST_UINT*sizeof(UINT));
--
--                    /* We hash each root */
--                    for(i=0;i<l;++i)
-+            /* One root */
-+            copy_gf2n(root,roots);
-+        }
-+        else
-+        {
-+            /* Several roots */
-+            #if QUARTZ_ROOT
-+                hash=(UINT*)malloc(l*SIZE_DIGEST_UINT*sizeof(UINT));
-+
-+                /* We hash each root */
-+                for(i=0;i<l;++i)
-+                {
-+                    HASH((unsigned char*)(hash+i*SIZE_DIGEST_UINT),
-+                         (unsigned char*)(roots+i*NB_WORD_GFqn),
-+                         NB_BYTES_GFqn);
-+                }
-+
-+                /* We search the smallest hash (seen as an integer) */
-+                for(i=1;i<l;++i)
-+                {
-+                    j=0;
-+                    while((j<SIZE_DIGEST_UINT)&&
-+                          (hash[ind*SIZE_DIGEST_UINT+j]==
-+                           hash[i*SIZE_DIGEST_UINT+j]))
-                     {
--                        HASH((unsigned char*)(hash+i*SIZE_DIGEST_UINT),
--                             (unsigned char*)(roots+i*NB_WORD_GFqn),
--                             NB_BYTES_GFqn);
-+                        ++j;
-                     }
--
--                    /* We search the smallest hash (seen as an integer) */
--                    for(i=1;i<l;++i)
-+                    if((j<SIZE_DIGEST_UINT)&&
-+                       (hash[ind*SIZE_DIGEST_UINT+j]>
-+                        hash[i*SIZE_DIGEST_UINT+j]))
-                     {
--                        j=0;
--                        while((j<SIZE_DIGEST_UINT)&&
--                              (hash[ind*SIZE_DIGEST_UINT+j]==
--                               hash[i*SIZE_DIGEST_UINT+j]))
--                        {
--                            ++j;
--                        }
--                        if((j<SIZE_DIGEST_UINT)&&
--                           (hash[ind*SIZE_DIGEST_UINT+j]>
--                            hash[i*SIZE_DIGEST_UINT+j]))
--                        {
--                            ind=i;
--                        }
-+                        ind=i;
-                     }
-+                }
- 
--                    /* We choose the corresponding root */
--                    copy_gf2n(root,roots+ind*NB_WORD_GFqn);
--
--                    free(hash);
--                #else
-+                /* We choose the corresponding root */
-+                copy_gf2n(root,roots+ind*NB_WORD_GFqn);
- 
--                    /* Sort the roots */
--                    sort_gf2n(roots,l);
-+                free(hash);
-+            #else
- 
--                    #if FIRST_ROOT
--                        /* Choose the first root */
--                        copy_gf2n(root,roots);
--                    #elif DETERMINIST_ROOT
--                        /* Choose a root with a determinist hash */
--                        HASH((unsigned char*)hash,
--                             (unsigned char*)U,NB_BYTES_GFqn);
--                        copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn);
--                    #endif
-+                /* Sort the roots */
-+                sort_gf2n(roots,l);
-+
-+                #if FIRST_ROOT
-+                    /* Choose the first root */
-+                    copy_gf2n(root,roots);
-+                #elif DETERMINIST_ROOT
-+                    /* Choose a root with a determinist hash */
-+                    HASH((unsigned char*)hash,
-+                         (unsigned char*)U,NB_BYTES_GFqn);
-+                    copy_gf2n(root,roots+(hash[0]%l)*NB_WORD_GFqn);
-                 #endif
--            }
--            free(roots);
--            return l;
-+            #endif
-         }
-+        return l;
-     #endif
- }
- #endif
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c
deleted file mode 100644
index e321539..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQS_gf2.c
+++ /dev/null
@@ -1,40 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/convMQS_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/convMQS_gf2.c
-@@ -108,8 +108,7 @@
-     unsigned int j;
- 
-     #if HFEmr8
--        uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION
--                                             *sizeof(uint8_t));
-+        uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0};
- 
-         convMQS_one_to_last_mr8_equations_gf2(pk_U,pk);
-         for(j=0;j<HFEmr8;++j)
-@@ -118,7 +117,6 @@
-                           pk_U+j*NB_BYTES_EQUATION);
-         }
- 
--        free(pk_U);
-     #endif
- 
-     #if HFEmq8
-@@ -186,8 +184,7 @@
-     unsigned int j;
- 
-     #if HFEmr8
--        uint8_t *pk_U=(uint8_t*)malloc(HFEmr8*NB_BYTES_EQUATION
--                                             *sizeof(uint8_t));
-+        uint8_t pk_U[HFEmr8*NB_BYTES_EQUATION]={0};
- 
-         convMQS_one_to_last_mr8_equations_gf2(pk_U,pk);
-     
-@@ -216,8 +213,6 @@
-             pk2_cp+=NB_BYTES_EQUATION;
-             *pk2_cp^=((uint8_t)(val>>(j*HFENr8c)))<<HFENr8;
-         }
--
--        free(pk_U);
-     #endif
- 
-     #if HFEmq8
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQ_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQ_gf2.c
deleted file mode 100644
index b4b8343..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_convMQ_gf2.c
+++ /dev/null
@@ -1,405 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/convMQ_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/convMQ_gf2.c
-@@ -98,7 +98,7 @@
-     /* i == HFEnv */
-     nb_bits=HFEnv;
-     /* For each column */
--    for(j=HFEnv-1;j>=LOST_BITS;--j,++k)
-+    for(j=HFEnv-1;j>=(int)LOST_BITS;--j,++k)
-     {
-         pk2[k>>3]^=((pk[nb_bits>>3]>>(nb_bits&7))&ONE8)<<(k&7);
-         nb_bits+=j;
-@@ -135,10 +135,15 @@
-  */
- UINT PREFIX_NAME(convMQ_last_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk)
- {
--    const uint64_t *pk64;
-     unsigned int iq,ir,k,nb_bits;
-+    uint64_t t1, t2;
-+    const uint8_t *pk64 = pk;
-+    #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-+    const uint8_t *pk_end;
-+    uint64_t end;
-+    unsigned int l;
-+    #endif
- 
--    pk64=(uint64_t*)pk;
- 
-     nb_bits=1;
-     /* For each row */
-@@ -150,29 +155,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -184,16 +194,19 @@
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=(pk64[k]>>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63)));
-+                LOAD_UINT(t1, &pk64[8*k])
-+                LOAD_UINT(t2, &pk64[8*(k+1)])
-+                pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63)));
-             }
-         } else
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
--        pk64+=iq+1;
-+        pk64+=8*(iq+1);
-         pk2+=iq+1;
-         nb_bits+=(iq+1)<<6;
-     }
-@@ -205,29 +218,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -241,14 +259,7 @@
-     #define LAST_ROW_R ((HFEnv-LOST_BITS)&63)
-     iq=LAST_ROW_Q;
- 
--    #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
--        uint8_t *pk_end;
--        uint64_t end;
--        unsigned int l;
--    #endif
--
-     #if LAST_ROW_R
--        ir=LAST_ROW_R;
-         if(nb_bits&63)
-         {
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-@@ -257,15 +268,18 @@
- 
-                 for(k=0;k<NB_WHOLE_BLOCKS;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
-                 #if (NB_WHOLE_BLOCKS<LAST_ROW_Q)
--                    pk2[k]=pk64[k]>>(nb_bits&63);
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1>>(nb_bits&63);
- 
-                     end=0;
--                    pk_end=(uint8_t*)(pk64+k+1);
-+                    pk_end=pk64+8*(k+1);
-                     for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                     {
-                         end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -274,12 +288,13 @@
-                     pk2[k]^=end<<(64-(nb_bits&63));
-                     pk2[k+1]=end>>(nb_bits&63);
-                 #else
--                    pk2[k]=pk64[k]>>(nb_bits&63);
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1>>(nb_bits&63);
- 
--                    if(((nb_bits&63)+ir)>64)
-+                    if(((nb_bits&63)+LAST_ROW_R)>64)
-                     {
-                         end=0;
--                        pk_end=(uint8_t*)(pk64+k+1);
-+                        pk_end=pk64+8*(k+1);
-                         for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                         {
-                             end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -290,14 +305,18 @@
-             #else
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
--                if(((nb_bits&63)+ir)>64)
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-+                if(((nb_bits&63)+LAST_ROW_R)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
-             #endif
-         } else
-@@ -305,11 +324,12 @@
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
- 
-                 end=0;
--                pk_end=(uint8_t*)(pk64+k);
-+                pk_end=pk64+8*k;
-                 for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                 {
-                     end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -318,7 +338,8 @@
-             #else
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             #endif
-         }
-@@ -328,13 +349,16 @@
-             #if (((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7)
-                 for(k=0;k<(iq-1);++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
- 
-                 end=0;
--                pk_end=(uint8_t*)(pk64+k+1);
-+                pk_end=pk64+8*(k+1);
-                 for(l=0;l<(((NB_MONOMIAL_PK-LOST_BITS+7)>>3)&7);++l)
-                 {
-                     end^=((uint64_t)(pk_end[l]))<<(l<<3);
-@@ -343,15 +367,18 @@
-             #else
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
-             #endif
-         } else
-         {
-             for(k=0;k<iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
-     #endif
-@@ -380,10 +407,11 @@
-  */
- UINT PREFIX_NAME(convMQ_uncompressL_gf2)(uint64_t* pk2, const uint8_t* pk)
- {
--    const uint64_t *pk64;
-+    const uint8_t *pk64;
-     unsigned int iq,ir,k,nb_bits;
-+    uint64_t t1, t2;
- 
--    pk64=(uint64_t*)pk;
-+    pk64=pk;
- 
-     nb_bits=1;
-     /* For each row */
-@@ -395,29 +423,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-@@ -429,16 +462,19 @@
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=(pk64[k]>>(nb_bits&63))^(pk64[k+1]<<(64-(nb_bits&63)));
-+                LOAD_UINT(t1, &pk64[8*k])
-+                LOAD_UINT(t2, &pk64[8*(k+1)])
-+                pk2[k]=(t1>>(nb_bits&63))^(t2<<(64-(nb_bits&63)));
-             }
-         } else
-         {
-             for(k=0;k<=iq;++k)
-             {
--                pk2[k]=pk64[k];
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1;
-             }
-         }
--        pk64+=iq+1;
-+        pk64+=8*(iq+1);
-         pk2+=iq+1;
-         nb_bits+=(iq+1)<<6;
-     }
-@@ -450,29 +486,34 @@
-             {
-                 for(k=0;k<iq;++k)
-                 {
--                    pk2[k]=(pk64[k]>>(nb_bits&63))
--                          ^(pk64[k+1]<<(64-(nb_bits&63)));
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    LOAD_UINT(t2, &pk64[8*(k+1)])
-+                    pk2[k]=(t1>>(nb_bits&63))
-+                          ^(t2<<(64-(nb_bits&63)));
-                 }
- 
--                pk2[k]=pk64[k]>>(nb_bits&63);
-+                LOAD_UINT(t1, &pk64[8*k])
-+                pk2[k]=t1>>(nb_bits&63);
-                 if(((nb_bits&63)+ir)>64)
-                 {
--                    pk2[k]^=pk64[k+1]<<(64-(nb_bits&63));
-+                    LOAD_UINT(t1, &pk64[8*(k+1)])
-+                    pk2[k]^=t1<<(64-(nb_bits&63));
-                 }
- 
-                 if(((nb_bits&63)+ir)>=64)
-                 {
--                    ++pk64;
-+                    pk64+=8;
-                 }
-             } else
-             {
-                 for(k=0;k<=iq;++k)
-                 {
--                    pk2[k]=pk64[k];
-+                    LOAD_UINT(t1, &pk64[8*k])
-+                    pk2[k]=t1;
-                 }
-             }
- 
--            pk64+=iq;
-+            pk64+=8*iq;
-             /* 0 padding on the last word */
-             pk2[iq]&=(ONE64<<ir)-ONE64;
-             pk2+=iq+1;
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_conv_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_conv_gf2nx.c
deleted file mode 100644
index 547cd27..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_conv_gf2nx.c
+++ /dev/null
@@ -1,12 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/conv_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/conv_gf2nx.c
-@@ -41,7 +41,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(convHFEpolynomialSparseToDense_gf2nx)(gf2nx F_dense,
--                                          const complete_sparse_monic_gf2nx F)
-+                                          complete_sparse_monic_gf2nx F)
- {
-     cst_sparse_monic_gf2nx F_cp=F.poly;
-     gf2nx F_dense_cp=F_dense;
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantn_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantn_gf2.c
deleted file mode 100644
index 5598464..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantn_gf2.c
+++ /dev/null
@@ -1,66 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/determinantn_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/determinantn_gf2.c
-@@ -26,7 +26,7 @@
- 
- #define ADDROW(LOOPK) \
-         /* pivot */\
--        pivot=-(((*S_cpj)>>ir)&1);\
-+        pivot=1+~((((*S_cpj)>>ir)&1));\
-         LOOPK;
- 
- 
-@@ -44,7 +44,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -117,7 +117,12 @@
- 
-     /* We know there are 1 on diagonal excepted for the last line */
-     bit_ir&=*S_cpi;
--    return bit_ir?1:0;
-+    if (bit_ir) {
-+      bit_ir = 1;
-+    } else {
-+      bit_ir = 0;
-+    }
-+    return (gf2) bit_ir;
- }
- 
- 
-@@ -133,10 +138,10 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                         LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\
- \
-         /* Here, the pivot is 1 if S is invertible */\
-@@ -158,7 +163,7 @@
-            algorithm. */\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                         LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\
- \
-         /* Next row */\
-@@ -214,7 +219,7 @@
-         det_i&=(*S_cpi)>>ir;
-     #endif
- 
--    return det_i;
-+    return (gf2) det_i;
- }
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
deleted file mode 100644
index 4a86c2a..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_determinantnv_gf2.c
+++ /dev/null
@@ -1,66 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/determinantnv_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/determinantnv_gf2.c
-@@ -26,7 +26,7 @@
- 
- #define ADDROW(LOOPK) \
-         /* pivot */\
--        pivot=-(((*S_cpj)>>ir)&1);\
-+        pivot=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;
- 
- 
-@@ -44,7 +44,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -117,7 +117,12 @@
- 
-     /* We know there are 1 on diagonal excepted for the last line */
-     bit_ir&=*S_cpi;
--    return bit_ir?1:0;
-+    if (bit_ir) {
-+      bit_ir = 1;
-+    } else {
-+      bit_ir = 0;
-+    }
-+    return (gf2) bit_ir;
- }
- 
- 
-@@ -133,10 +138,10 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                         LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)});\
- \
-         /* Here, the pivot is 1 if S is invertible */\
-@@ -158,7 +163,7 @@
-            algorithm. */\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                         LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)});\
- \
-         /* Next row */\
-@@ -214,7 +219,7 @@
-         det_i&=(*S_cpi)>>ir;
-     #endif
- 
--    return det_i;
-+    return (gf2) det_i;
- }
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c
deleted file mode 100644
index 19f80aa..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_div_gf2nx.c
+++ /dev/null
@@ -1,62 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/div_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/div_gf2nx.c
-@@ -120,8 +120,11 @@
-         }
- 
-         leading_coef=A+da*NB_WORD_GFqn;
--        i=(db<<1)-da;
--        i=MAXI(0,(int)i);
-+        i=0;
-+        if(2*db > da)
-+        {
-+            i = 2*db - da;
-+        }
-         res=A+(da-db+i)*NB_WORD_GFqn;
- 
-         for(;i<db;++i)
-@@ -274,7 +277,7 @@
-  * @remark  This implementation is not in constant-time.
-  */
- unsigned int PREFIX_NAME(div_r_HFE_gf2nx)(gf2nx poly, unsigned int dp,
--                                          const complete_sparse_monic_gf2nx F,
-+                                          complete_sparse_monic_gf2nx F,
-                                           cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -343,7 +346,7 @@
-  * @remark  Constant-time implementation when dp is not secret.
-  */
- void PREFIX_NAME(div_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -385,7 +388,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(div_r_HFE_cst_gf2nx)(gf2nx poly,
--                                      const complete_sparse_monic_gf2nx F,
-+                                      complete_sparse_monic_gf2nx F,
-                                       cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -430,7 +433,7 @@
-  * @remark  Constant-time implementation when dp is not secret.
-  */
- void PREFIX_NAME(divsqr_r_HFE_cstdeg_gf2nx)(gf2nx poly, unsigned int dp,
--                                           const complete_sparse_monic_gf2nx F,
-+                                           complete_sparse_monic_gf2nx F,
-                                            cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-@@ -489,7 +492,7 @@
-  * @remark  Constant-time implementation.
-  */
- void PREFIX_NAME(divsqr_r_HFE_cst_gf2nx)(gf2nx poly,
--                                         const complete_sparse_monic_gf2nx F,
-+                                         complete_sparse_monic_gf2nx F,
-                                          cst_gf2n cst)
- {
-     static_gf2n mul_coef[NB_WORD_GFqn];
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
deleted file mode 100644
index 9b98b4f..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_gf2.c
+++ /dev/null
@@ -1,39 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/evalMQSnocst8_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/evalMQSnocst8_gf2.c
-@@ -35,7 +35,7 @@
-     #define NB_BYTES_EQ NB_EQq
- #endif
- 
--#define MASK_EQ mask64(NB_EQ&63)
-+#define MASK_EQ ((UINT_1<<(NB_EQ&63))-UINT_1)
- #if (NB_EQ&63)
-     #define MASK_64(c) (c)&=MASK_EQ;
-     #define MASK2_64(c,a) (c)=(a)&MASK_EQ;
-@@ -103,7 +103,7 @@
- 
- 
- #define LOOPJR_NOCST_64(START,NB_IT) \
--    for(jr=START;jr<NB_IT;++jr)\
-+    for(jr=(START);jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
-@@ -120,7 +120,7 @@
- #else
- 
- #define LOOPJR_UNROLLED_64(START,NB_IT) \
--    for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-+    for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-     {\
-         for(h=0;h<LEN_UNROLLED_64;++h)\
-         {\
-@@ -132,7 +132,7 @@
-             xj>>=1;\
-         }\
-     }\
--    for(;jr<NB_IT;++jr)\
-+    for(;jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
deleted file mode 100644
index 1b3ab3e..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSnocst8_quo_gf2.c
+++ /dev/null
@@ -1,129 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/evalMQSnocst8_quo_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/evalMQSnocst8_quo_gf2.c
-@@ -1,4 +1,4 @@
--#include "evalMQSnocst8_gf2.h"
-+#include "evalMQSnocst8_quo_gf2.h"
- #include "add_gf2x.h"
- #include "init.h"
- #include "simd.h"
-@@ -49,14 +49,15 @@
-     #define NB_BYTES_EQ NB_EQq
- #endif
- 
--#define MASK_EQ mask64(NB_EQ&63)
- #if (NB_EQ&63)
-     #define MASK_64(c) (c)&=MASK_EQ;
-     #define MASK2_64(c,a) (c)=(a)&MASK_EQ;
-+    #define MASK_EQ ((UINT_1<<(NB_EQ&63))-UINT_1)
- #else
-     /* The AND is useless here */
-     #define MASK_64(c)
-     #define MASK2_64(c,a)
-+    #define MASK_EQ UINT_M1
- #endif
- 
- 
-@@ -89,68 +90,45 @@
- 
- #define CONCAT_NB_WORD_EQ_SUP(name) CONCAT(name,NB_WORD_EQ)
- 
--
--
- /*** Tuning ***/
- #define LEN_UNROLLED_64 4
- 
--
--
--
--
--
--
- /* 64-bit version */
--
--#if (NB_WORD_EQ<7)
--    #define XOR_ELEM CONCAT(CONCAT_NB_WORD_EQ_SUP(ADD),_2_GF2X)
--#else
--    #define XOR_ELEM(a,b) ADD_2_GF2X(a,b,NB_WORD_EQ);
--#endif
--
--#if (NB_WORD_EQ<10)
--    #define COPY_64bits_variables CONCAT_NB_WORD_EQ_SUP(COPY)
--#else
--    #define COPY_64bits_variables(c,a) COPY(c,a,NB_WORD_EQ)
--#endif
--
--
-+#define XOR_ELEM(a,b) ADD_2_GF2X((unsigned char *)(a),(unsigned char *)(b),8*NB_WORD_EQ);
- 
- #define LOOPJR_NOCST_64(START,NB_IT) \
--    for(jr=START;jr<NB_IT;++jr)\
-+    for(jr=(START);jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
--            XOR_ELEM(c,(const UINT*)pk);\
-+            XOR_ELEM(c,pk);\
-         }\
-         pk+=NB_BYTES_EQ;\
-         xj>>=1;\
-     }
- 
--
--
- #if (LEN_UNROLLED_64==1)
-     #define LOOPJR_UNROLLED_64 LOOPJR_NOCST_64
- #else
- 
- #define LOOPJR_UNROLLED_64(START,NB_IT) \
--    for(jr=START;jr<(NB_IT-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-+    for(jr=(START);jr<((NB_IT)-LEN_UNROLLED_64+1);jr+=LEN_UNROLLED_64)\
-     {\
-         for(h=0;h<LEN_UNROLLED_64;++h)\
-         {\
-             if(xj&UINT_1)\
-             {\
--                XOR_ELEM(c,(const UINT*)pk);\
-+                XOR_ELEM(c,pk);\
-             }\
-             pk+=NB_BYTES_EQ;\
-             xj>>=1;\
-         }\
-     }\
--    for(;jr<NB_IT;++jr)\
-+    for(;jr<(NB_IT);++jr)\
-     {\
-         if(xj&UINT_1)\
-         {\
--            XOR_ELEM(c,(const UINT*)pk);\
-+            XOR_ELEM(c,pk);\
-         }\
-         pk+=NB_BYTES_EQ;\
-         xj>>=1;\
-@@ -172,7 +150,7 @@
-     #endif
- 
-     /* Constant cst_pk */
--    COPY_64bits_variables(c,(const UINT*)pk);
-+    LOAD_UINT_ARRAY(c, pk, NB_WORD_EQ)
-     pk+=NB_BYTES_EQ;
- 
-     /* for each row of the quadratic matrix of pk, excepted the last block */
-@@ -186,7 +164,7 @@
-                 /* for each column of the quadratic matrix of pk */
- 
-                 /* xj=xi=1 */
--                XOR_ELEM(c,(const UINT*)pk);
-+                XOR_ELEM(c,pk);
-                 pk+=NB_BYTES_EQ;
- 
-                 xj=xi>>1;
-@@ -222,7 +200,7 @@
-                 /* for each column of the quadratic matrix of pk */
- 
-                 /* xj=xi=1 */
--                XOR_ELEM(c,(const UINT*)pk);
-+                XOR_ELEM(c,pk);
-                 pk+=NB_BYTES_EQ;
- 
-                 xj=xi>>1;
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
deleted file mode 100644
index 6b145cd..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_evalMQSv_gf2.c
+++ /dev/null
@@ -1,45 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/evalMQSv_gf2.c
-@@ -109,18 +109,6 @@
- /**************************************************************************/
- 
- 
--#if NB_VARr
--    #define REM_X \
--        xi=m[i];\
--        for(j=0;j<NB_VARr;++j,++k)\
--        {\
--            x[k]=-((xi>>j)&UINT_1);\
--        }
--#else
--    #define REM_X
--#endif
--
--
- 
- 
- /* Input:
-@@ -139,16 +127,18 @@
-     unsigned int i,j,k;
- 
-     /* Compute one time all -((xi>>1)&UINT_1) */
-+    i=0;
-     k=0;
--    for(i=0;i<NB_VARq;++i)
-+    while(k < NB_VAR)
-     {
-         xi=m[i];
--        for(j=0;j<NB_BITS_UINT;++j,++k)
-+        for(j=0; (j<NB_BITS_UINT) && (k<NB_VAR); ++j)
-         {
--            x[k]=-((xi>>j)&UINT_1);
-+              x[k]=(1+~((xi>>j)&UINT_1));
-+              ++k;
-         }
-+        ++i;
-     }
--    REM_X;
- 
-     /* Constant cst_pk */
-     COPY_64bits_variables(c,pk);
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
deleted file mode 100644
index 0d8ff39..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRootsSplit_gf2nx.c
+++ /dev/null
@@ -1,74 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/findRootsSplit_gf2nx.c
-@@ -41,7 +41,15 @@
-     i=1;
-     /* (2^i) < deg does not require modular reduction by f */
-     #if(HFEn<33)
--        const unsigned int min=(deg<(1U<<HFEn))?deg:HFEn;
-+        const unsigned int min;
-+        if (deg<(1U<<HFEn))
-+        {
-+          min=deg;
-+        }
-+        else
-+        {
-+          min=HFEn;
-+        }
-         while((1U<<i)<min)
-     #else
-         while((1U<<i)<deg)
-@@ -107,9 +115,12 @@
- void PREFIX_NAME(findRootsSplit_gf2nx)(vec_gf2n roots, gf2nx f,
-                                        unsigned int deg)
- {
--    gf2nx poly_trace,f_cp,tmp_p;
--    gf2nx poly_frob;
--    static_gf2n inv[NB_WORD_GFqn];
-+    UINT poly_frob[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p1[HFEDeg*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *poly_trace=p1;
-+    UINT *f_cp=p2;
-+    static_gf2n inv[NB_WORD_GFqn]={0};
-     unsigned int b,i,l,d;
- 
-     if(deg==1)
-@@ -119,11 +130,6 @@
-         return;
-     }
- 
--    poly_frob=(UINT*)malloc(((deg<<1)-1)*NB_WORD_GFqn*sizeof(UINT));
--    /* poly_trace is modulo f, this degree is strictly less than deg */
--    poly_trace=(UINT*)malloc(deg*NB_WORD_GFqn*sizeof(UINT));
--    /* f_cp a copy of f */
--    f_cp=(UINT*)malloc((deg+1)*NB_WORD_GFqn*sizeof(UINT));
-     do
-     {
-         /* Set poly_frob to zero */
-@@ -151,16 +157,12 @@
-         l=gcd_gf2nx(&b,f_cp,deg,poly_trace,d);
- 
-     } while((!l)||(l==deg));
--    free(poly_frob);
- 
-     if(b)
-     {
--        tmp_p=poly_trace;
--        poly_trace=f_cp;
--        f_cp=tmp_p;
-+        f_cp=poly_trace;
-     }
-     /* Here, f_cp is a non-trivial divisor of degree l */
--    free(poly_trace);
- 
-     /* f_cp is the gcd */
-     /* Here, it becomes monic */
-@@ -180,7 +182,6 @@
-     /* f_cp is monic */
-     /* We can apply findRootsSplit_gf2nx recursively */
-     findRootsSplit_gf2nx(roots,f_cp,l);
--    free(f_cp);
- 
-     /* f is monic and f_cp is monic so Q is monic */
-     /* We can apply findRootsSplit_gf2nx recursively */
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
deleted file mode 100644
index ca81282..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_findRoots_gf2nx.c
+++ /dev/null
@@ -1,142 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/findRoots_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/findRoots_gf2nx.c
-@@ -27,33 +27,23 @@
-  * @remark  Requirement: F.L must be initialized with initListDifferences_gf2nx.
-  * @remark  A part of the implementation is not in constant-time.
-  */
--int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n* roots,
--                                    const complete_sparse_monic_gf2nx F,
-+int PREFIX_NAME(findRootsHFE_gf2nx)(vec_gf2n roots,
-+                                    complete_sparse_monic_gf2nx F,
-                                     cst_gf2n U)
- {
-     #if (HFEDeg==1)
--        *roots=(UINT*)malloc(NB_WORD_GFqn*sizeof(UINT));
--        VERIFY_ALLOC_RET(roots);
--        add_gf2n(*roots,F.poly,U);
-+        add_gf2n(roots,F.poly,U);
-         return 1;
-     #else
- 
--    gf2nx tmp_p, poly, poly2;
-+    UINT p1[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *tmp_p;
-+    UINT *poly=p1;
-+    UINT *poly2=p2;
-     unsigned int i,l,d2;
- 
-     d2=HFEDeg;
--    poly=(UINT*)calloc(((HFEDeg<<1)-1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly) 
--    {
--        return ERROR_ALLOC;
--    }
--
--    poly2=(UINT*)calloc((HFEDeg+1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly2) 
--    {
--        free(poly);
--        return ERROR_ALLOC;
--    }
- 
-     /* X^(2^n) - X mod (F-U) */
-     l=best_frobeniusMap_HFE_gf2nx(poly,F,U);
-@@ -78,26 +68,14 @@
-         /* The gcd is a constant (!=0) */
-         /* Irreducible: 0 root */
-         /* l=0; */
--        free(poly);
--        free(poly2);
-         return 0;
--    } else
--    {
--        /* poly2 is the gcd */
--        /* Here, it becomes monic */
--        convMonic_gf2nx(poly2,l);
-     }
--    free(poly);
-+    /* poly2 is the gcd */
-+    /* Here, it becomes monic */
-+    convMonic_gf2nx(poly2,l);
- 
- 
--    *roots=(UINT*)malloc(l*NB_WORD_GFqn*sizeof(UINT));
--    if(!roots) 
--    {
--        free(poly2);
--        return ERROR_ALLOC;
--    }
--    findRootsSplit_gf2nx(*roots,poly2,l);
--    free(poly2);
-+    findRootsSplit_gf2nx(roots,poly2,l);
- 
-     return (int)l;
-     #endif
-@@ -119,7 +97,7 @@
-  * @remark  A part of the implementation is not in constant-time.
-  */
- int PREFIX_NAME(findUniqRootHFE_gf2nx)(gf2n root,
--                                       const complete_sparse_monic_gf2nx F,
-+                                       complete_sparse_monic_gf2nx F,
-                                        cst_gf2n U)
- {
-     #if (HFEDeg==1)
-@@ -127,23 +105,15 @@
-         return 1;
-     #else
- 
--    static_gf2n inv[NB_WORD_GFqn];
--    gf2nx tmp_p, poly, poly2;
-+    static_gf2n inv[NB_WORD_GFqn]={0};
-+    UINT p1[((HFEDeg<<1)-1)*NB_WORD_GFqn]={0};
-+    UINT p2[(HFEDeg+1)*NB_WORD_GFqn]={0};
-+    UINT *tmp_p;
-+    UINT *poly=p1;
-+    UINT *poly2=p2;
-     unsigned int i,l,d2;
- 
-     d2=HFEDeg;
--    poly=(UINT*)calloc(((HFEDeg<<1)-1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly) 
--    {
--        return ERROR_ALLOC;
--    }
--
--    poly2=(UINT*)calloc((HFEDeg+1)*NB_WORD_GFqn,sizeof(UINT));
--    if(!poly2) 
--    {
--        free(poly);
--        return ERROR_ALLOC;
--    }
- 
-     /* X^(2^n) - X mod (F-U) */
-     l=best_frobeniusMap_HFE_gf2nx(poly,F,U);
-@@ -171,20 +141,14 @@
- 
-         /* else, l roots */
- 
--        free(poly);
--        free(poly2);
-         return 0;
--    } else
--    {
--        /* poly2 is the gcd such that poly2 = a*x+b. */
--        /* The root is b*a^(-1). */
--        inv_gf2n(inv,poly2+NB_WORD_GFqn);
--        mul_gf2n(root,inv,poly2);
--
--        free(poly);
--        free(poly2);
--        return 1;
-     }
-+    /* poly2 is the gcd such that poly2 = a*x+b. */
-+    /* The root is b*a^(-1). */
-+    inv_gf2n(inv,poly2+NB_WORD_GFqn);
-+    mul_gf2n(root,inv,poly2);
-+
-+    return 1;
-     #endif
- }
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
deleted file mode 100644
index 340a1cb..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_frobeniusMap_gf2nx.c
+++ /dev/null
@@ -1,21 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/frobeniusMap_gf2nx.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/frobeniusMap_gf2nx.c
-@@ -26,7 +26,7 @@
-  * @remark  Requirement: F.L must be initialized with initListDifferences_gf2nx.
-  * @remark  Constant-time implementation when CONSTANT_TIME!=0.
-  */
--unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn, const
-+unsigned int PREFIX_NAME(frobeniusMap_HFE_gf2nx)(gf2nx Xqn,
-                                     complete_sparse_monic_gf2nx F, cst_gf2n U)
- {
-     static_gf2n cst[NB_WORD_GFqn];
-@@ -125,7 +125,7 @@
-             b=isNot0_gf2n(Xqn+i*NB_WORD_GFqn);
-             mask|=b;
-             /* We add 1 to d as soon as we exceed all left zero coefficients */
--            d+=mask;
-+            d+=(unsigned int)mask;
-         }
-     #else
-         if(d==1)
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
deleted file mode 100644
index 87cb9fb..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_genSecretMQS_gf2.c
+++ /dev/null
@@ -1,322 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/genSecretMQS_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/genSecretMQS_gf2.c
-@@ -30,7 +30,7 @@
- 
- #if HFEv
-     #define LINEAR_VCASE_REF(a_veci) \
--        a_veci-=(HFEn-1)*NB_WORD_GFqn;\
-+        (a_veci)-=(HFEn-1)*NB_WORD_GFqn;\
-         MQS_cp=MQS+(HFEn+1)*NB_WORD_GFqn;\
-         for(j=0;j<HFEv;++j)\
-         {\
-@@ -47,7 +47,7 @@
-                 add2_gf2n(MQS_cp,tmp1);\
-                 MQS_cp+=NB_WORD_GFqn;\
-             }\
--            a_veci+=NB_WORD_GFqn;\
-+            (a_veci)+=NB_WORD_GFqn;\
-         }\
-         F_cp+=HFEv*NB_WORD_GFqn;
- #else
-@@ -64,7 +64,7 @@
-     for(j=1;j<HFEn;++j)\
-     {\
-         mul_gf2n(lin_cp,F_cp,a_vec);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;
-@@ -80,7 +80,7 @@
-     {\
-         mul_gf2n(tmp1,F_cp,a_vec);\
-         add2_gf2n(lin_cp,tmp1);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;\
-@@ -97,7 +97,7 @@
-     for(j=1;j<HFEn;++j)\
-     {\
-         add2_gf2n(lin_cp,a_vec);\
--        a_vec+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-         lin_cp+=NB_WORD_GFqn;\
-     }
- 
-@@ -124,7 +124,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_vec+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_vec)+ja*NB_WORD_GFqn);\
-         mul_gf2n(MQS_cp,tmp1,F_cp);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
-@@ -143,16 +143,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,tmp_i,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_i,(a_vecj)+ja*NB_WORD_GFqn);\
-             copy_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,tmp_j,a_vec+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_j,(a_vec)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_vec+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     F_cp+=NB_WORD_GFqn;
- 
-@@ -175,7 +175,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_veci+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_veci)+ja*NB_WORD_GFqn);\
-         mul_gf2n(tmp_i,tmp1,F_cp);\
-         add2_gf2n(MQS_cp,tmp_i);\
-         MQS_cp+=NB_WORD_GFqn;\
-@@ -196,16 +196,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,tmp_i,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_i,(a_vecj)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,tmp_j,a_veci+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,tmp_j,(a_veci)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_veci+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_veci)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     /* Here, a_veci = row i+1 */\
-     /* Here, a_vecj = row j+1 */\
-@@ -229,7 +229,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(tmp1,a_vecj+ja*NB_WORD_GFqn,a_veci+ja*NB_WORD_GFqn);\
-+        add_gf2n(tmp1,(a_vecj)+ja*NB_WORD_GFqn,(a_veci)+ja*NB_WORD_GFqn);\
-         add2_gf2n(MQS_cp,tmp1);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
-@@ -246,16 +246,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,a_veci,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_veci,(a_vecj)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,a_vecj,a_veci+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vecj,(a_veci)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_veci+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_veci)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }\
-     /* Here, a_veci = row i+1 */\
-     /* Here, a_vecj = row j+1 */
-@@ -276,7 +276,7 @@
-     for(ja=0;ja<HFEn-1;++ja)\
-     {\
-         /* x_0*x_(ja+1) + x_(ja+1)*x_0 */\
--        add_gf2n(MQS_cp,a_vecj+ja*NB_WORD_GFqn,a_vec+ja*NB_WORD_GFqn);\
-+        add_gf2n(MQS_cp,(a_vecj)+ja*NB_WORD_GFqn,(a_vec)+ja*NB_WORD_GFqn);\
-         MQS_cp+=NB_WORD_GFqn;\
-     }\
- \
-@@ -291,16 +291,16 @@
-         for(ja=1;ja<(HFEn-ia);++ja)\
-         {\
-             /* Compute the coefficient of x_ia*x_(ja+1) */\
--            mul_gf2n(tmp1,a_vec,a_vecj+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vec,(a_vecj)+ja*NB_WORD_GFqn);\
-             copy_gf2n(MQS_cp,tmp1);\
-             /* Compute the coefficient of x_(ja+1)*x_ia */\
--            mul_gf2n(tmp1,a_vecj,a_vec+ja*NB_WORD_GFqn);\
-+            mul_gf2n(tmp1,a_vecj,(a_vec)+ja*NB_WORD_GFqn);\
-             add2_gf2n(MQS_cp,tmp1);\
-             MQS_cp+=NB_WORD_GFqn;\
-         }\
-         JUMP_VINEGAR_REF;\
--        a_vec+=NB_WORD_GFqn;\
--        a_vecj+=NB_WORD_GFqn;\
-+        (a_vec)+=NB_WORD_GFqn;\
-+        (a_vecj)+=NB_WORD_GFqn;\
-     }
- 
- 
-@@ -322,6 +322,11 @@
- int PREFIX_NAME(genSecretMQS_gf2_ref)(mqsnv_gf2n MQS, cst_sparse_monic_gf2nx F)
- {
-     /* if there is not quadratic terms X^(2^i + 2^j) */
-+    mqsnv_gf2n MQS_cp;
-+    UINT lin[HFEn*NB_WORD_GFqn]={0};
-+    UINT *lin_cp;
-+    cst_vec_gf2n a_vec;
-+    unsigned int i,j;
-     #if (HFEDeg<3)
-         #if (HFEDeg==2)
-             cst_vec_gf2n a_veci;
-@@ -329,63 +334,41 @@
-         #if ((HFEDeg==2)||(HFEv))
-             cst_sparse_monic_gf2nx F_cp;
-         #endif
--        mqsnv_gf2n MQS_cp;
--        vecn_gf2n lin,lin_cp;
--        cst_vec_gf2n a_vec;
--        unsigned int i,j;
-     #else
--        mqsnv_gf2n MQS_cp;
--        vecn_gf2n lin,lin_cp;
-         static_gf2n tmp1[NB_WORD_GFqn];
-         #if (HFEDeg!=3)
-             static_gf2n tmp_i[NB_WORD_GFqn],tmp_j[NB_WORD_GFqn];
-         #endif
-         cst_sparse_monic_gf2nx F_cp;
--        cst_vec_gf2n a_vec,a_veci,a_vecj;
--        unsigned int i,j,ia,ja;
--    #endif
--
--    #if(HFEDeg<3)
--        /* there are not quadratic terms X^(2^i + 2^j) */
--        for(i=0;i<MQnv_GFqn_SIZE;++i)
--        {
--            MQS[i]=0;
--        }
-+        cst_vec_gf2n a_veci,a_vecj;
-+        unsigned int ia,ja;
-     #endif
- 
-     /* Precompute alpha_vec is disabled in the submission */
-     #if PRECOMPUTED_CBASIS
-         static cst_vec_gf2n alpha_vec=cbasis_h;
-     #else
--        vec_gf2n alpha_vec;
--
-         /* Matrix in GF(2^n) with (HFEDegI+1) rows and HFEn-1 columns */
-         /* calloc is useful when it initialises a multiple precision element
-            to 1 */
-         #if(HFEDegI!=HFEDegJ)
--            alpha_vec=(UINT*)calloc((HFEDegI+1)*(HFEn-1)*NB_WORD_GFqn,
--                                    sizeof(UINT));
-+            UINT alpha_vec[(HFEDegI+1)*(HFEn-1)*NB_WORD_GFqn] = {0};
-         #else
-             /* An additional row for the leading term X^(2^i + 2^j)
-                                                     = X^(2^(i+1)) */
--            alpha_vec=(UINT*)calloc((HFEDegI+2)*(HFEn-1)*NB_WORD_GFqn,
--                                    sizeof(UINT));
-+            UINT alpha_vec[(HFEDegI+2)*(HFEn-1)*NB_WORD_GFqn] = {0};
-         #endif
--        VERIFY_ALLOC_RET(alpha_vec);
- 
-         genCanonicalBasis_gf2n(alpha_vec);
-     #endif
- 
--    /* Temporary linear vector */
--    lin=(UINT*)calloc(HFEn*NB_WORD_GFqn,sizeof(UINT));
--    if(!lin)
--    {
--        #if (!PRECOMPUTED_CBASIS)
--            free(alpha_vec);
--        #endif
--        return ERROR_ALLOC;
--    }
--
-+    #if(HFEDeg<3)
-+        /* there are not quadratic terms X^(2^i + 2^j) */
-+        for(i=0;i<MQnv_GFqn_SIZE;++i)
-+        {
-+            MQS[i]=0;
-+        }
-+    #endif
- 
-     /* Constant : copy the first coefficient of F in MQS */
-     copy_gf2n(MQS,F);
-@@ -482,16 +465,13 @@
-     /* The current term is X^(q^i + q^j) */
-     for(i=2;i<HFEDegI;++i)
-     {
--        /* Here a_vec = row i */
-+        j = 0;
-         #if ENABLED_REMOVE_ODD_DEGREE
--        j=(((ONE32<<i)+ONE32)<=HFE_odd_degree)?0:1;
-+        if (((ONE32<<i)+ONE32) > HFE_odd_degree) j=1;
-+        #endif
-+        /* Here a_vec = row i */
-         a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
-         for(;j<i;++j)
--        #else
--        /* Here a_vec = row i */
--        a_vecj=alpha_vec;
--        for(j=0;j<i;++j)
--        #endif
-         {
-             a_veci=a_vec;
-             QUADRATIC_CASE_REF(a_veci,a_vecj);
-@@ -509,19 +489,19 @@
-     /* The current term is X^(q^HFEDegi + q^j) */
- 
-     /* Here a_vec = row i */
-+    j = 0;
-     #if ENABLED_REMOVE_ODD_DEGREE
--    j=(((ONE32<<i)+ONE32)<=HFE_odd_degree)?0:1;
--    a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
--    for(;j<HFEDegJ;++j)
--    #else
--    /* Here a_vec = row i */
--    a_vecj=alpha_vec;
--    for(j=0;j<HFEDegJ;++j)
-+    if (((ONE32<<i)+ONE32) > HFE_odd_degree) j=1;
-     #endif
-+    /* Here a_vec = row i */
-+    a_vecj=alpha_vec+j*(HFEn-1)*NB_WORD_GFqn;
-+    #if HFEDegJ
-+    for(; j<HFEDegJ;++j)
-     {
-         a_veci=a_vec;
-         QUADRATIC_CASE_REF(a_veci,a_vecj);
-     }
-+    #endif
-     /* Here a_veci = row i+1 */
- 
-     /* j=HFEDegJ */
-@@ -539,10 +519,6 @@
-     #endif
-     #endif
- 
--    #if (!PRECOMPUTED_CBASIS)
--        free(alpha_vec);
--    #endif
--
-     /* Put linear part on "diagonal" of MQS */
-     lin_cp=lin;
-     MQS_cp=MQS+NB_WORD_GFqn;
-@@ -553,7 +529,6 @@
-         MQS_cp+=i*NB_WORD_GFqn;
-     }
- 
--    free(lin);
-     return 0;
- }
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
deleted file mode 100644
index 6e0989e..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_initMatrixId_gf2.c
+++ /dev/null
@@ -1,140 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/initMatrixId_gf2.c
-@@ -1,59 +1,93 @@
- #include "initMatrixId_gf2.h"
- 
--
--#define INITMATID_GF2(NAME,MATRIX_SIZE,nq,nr,NB_WORD_n) \
--void NAME(Mn_gf2 S)\
--{\
--    UINT bit_ir;\
--    unsigned int iq,ir;\
--\
--    /* Initialize to 0 */\
--    for(iq=0;iq<MATRIX_SIZE;++iq)\
--    {\
--        S[iq]=0;\
--    }\
--\
--    /* for each row excepted the last block */\
--    for(iq=0;iq<nq;++iq)\
--    {\
--        bit_ir=1;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            *S=bit_ir;\
--            /* Next row */\
--            S+=NB_WORD_n;\
--            bit_ir<<=1;\
--        }\
--        /* Next block of column */\
--        ++S;\
--    }\
--\
--    /* for each row of the last block */\
--    bit_ir=1;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        *S=bit_ir;\
--        /* Next row */\
--        S+=NB_WORD_n;\
--        bit_ir<<=1;\
--    }\
--}
--
- /**
-  * @brief   Generate the identity matrix (n,n) in GF(2).
-  * @param[out]  S   The identity matrix (n,n) in GF(2).
-  * @remark  Requires to allocate n elements of GF(2^n) for S.
-  * @remark  This function does not require a constant-time implementation.
-  */
--INITMATID_GF2(PREFIX_NAME(initMatrixIdn_gf2),MATRIXn_SIZE,HFEnq,HFEnr,
--              NB_WORD_GFqn);
-+void PREFIX_NAME(initMatrixIdn_gf2)(Mn_gf2 S)
-+{
-+    UINT bit_ir;
-+    unsigned int iq,ir;
-+
-+    /* Initialize to 0 */
-+    for(iq=0;iq<MATRIXn_SIZE;++iq)
-+    {
-+        S[iq]=0;
-+    }
-+
-+    /* for each row excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=1;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            *S=bit_ir;
-+            /* Next row */
-+            S+=NB_WORD_GFqn;
-+            bit_ir<<=1;
-+        }
-+        /* Next block of column */
-+        ++S;
-+    }
-+
-+    #if HFEnr
-+    /* for each row of the last block */
-+    bit_ir=1;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        *S=bit_ir;
-+        /* Next row */
-+        S+=NB_WORD_GFqn;
-+        bit_ir<<=1;
-+    }
-+    #endif
-+}
-+
- /**
-  * @brief   Generate the identity matrix (n+v,n+v) in GF(2).
-  * @param[out]  S   The identity matrix (n+v,n+v) in GF(2).
-  * @remark  Requires to allocate n+v elements of GF(2^(n+v)) for S.
-  * @remark  This function does not require a constant-time implementation.
-  */
--INITMATID_GF2(PREFIX_NAME(initMatrixIdnv_gf2),MATRIXnv_SIZE,HFEnvq,HFEnvr,
--              NB_WORD_GF2nv);
-+void PREFIX_NAME(initMatrixIdnv_gf2)(Mn_gf2 S)
-+{
-+    UINT bit_ir;
-+    unsigned int iq,ir;
-+
-+    /* Initialize to 0 */
-+    for(iq=0;iq<MATRIXnv_SIZE;++iq)
-+    {
-+        S[iq]=0;
-+    }
-+
-+    /* for each row excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=1;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            *S=bit_ir;
-+            /* Next row */
-+            S+=NB_WORD_GF2nv;
-+            bit_ir<<=1;
-+        }
-+        /* Next block of column */
-+        ++S;
-+    }
-+
-+    /* for each row of the last block */
-+    #if HFEnvr
-+    bit_ir=1;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        *S=bit_ir;
-+        /* Next row */
-+        S+=NB_WORD_GF2nv;
-+        bit_ir<<=1;
-+    }
-+    #endif
-+}
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
deleted file mode 100644
index 812cd61..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixn_gf2.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/invMatrixn_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/invMatrixn_gf2.c
-@@ -28,7 +28,7 @@
- 
- #define ADDROW(LOOPK,LOOPKINV) \
-         /* pivot */\
--        mask=-(((*S_cpj)>>ir)&1);\
-+        mask=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;\
-         LOOPKINV;
- 
-@@ -49,7 +49,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=UINT_1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -70,7 +70,7 @@
- 
- /* Only modify S_inv */
- #define LOOPIR_DOWN_TO_UP(NB_IT) \
--    for(ir=NB_IT-1;ir!=(unsigned int)(-1);--ir,--i)\
-+    for(ir=(NB_IT)-1;ir!=(unsigned int)(-1);--ir,--i)\
-     {\
-         S_cpj=S_cpi;\
-         Sinv_cpj=Sinv_cpi;\
-@@ -81,7 +81,7 @@
-             S_cpj-=NB_WORD_GFqn;\
-             Sinv_cpj-=NB_WORD_GFqn;\
-             /* pivot */\
--            mask=-(((*S_cpj)>>ir)&1);\
-+            mask=(1+~(((*S_cpj)>>ir)&1));\
-             xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\
-         }\
- \
-@@ -133,7 +133,6 @@
-         LOOPIR(HFEnr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask);
- 
-         /* Step 2 */
--        bit_ir=UINT_1<<(HFEnr-1);
-         LOOPIR_DOWN_TO_UP(HFEnr);
-     #else
-         /* To begin to last row */
-@@ -178,16 +177,16 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                    LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\
-                    xorLoadMask1_gf2n(Sinv_cpi,Sinv_cpj,mask);\
-                   });\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                    LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\
-                    xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);\
-                   });\
-@@ -314,7 +313,7 @@
-                 Sinv_cpj+=NB_WORD_GFqn;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -343,7 +342,7 @@
-                 Sinv_cpj+=NB_WORD_GFqn;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -381,7 +380,7 @@
-         for(j=0;j<i;++j)
-         {
-             /* pivot */
--            mask=-(((U[j>>6])>>(j&63U))&1U);
-+            mask=(1+~(((U[j>>6])>>(j&63U))&1U));
-             xorLoadMask1_gf2n(Sinv_cpj,Sinv_cpi,mask);
- 
-             /* next row */
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
deleted file mode 100644
index 3c998c6..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_invMatrixnv_gf2.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/invMatrixnv_gf2.c
-@@ -28,7 +28,7 @@
- 
- #define ADDROW(LOOPK,LOOPKINV) \
-         /* pivot */\
--        mask=-(((*S_cpj)>>ir)&1);\
-+        mask=(1+~(((*S_cpj)>>ir)&1));\
-         LOOPK;\
-         LOOPKINV;
- 
-@@ -49,7 +49,7 @@
- 
- #define LOOPIR(NB_IT,LOOPK1,LOOPK2) \
-     bit_ir=UINT_1;\
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* If the pivot is 0, search the pivot */\
-         if(!((*S_cpi)&bit_ir))\
-@@ -70,7 +70,7 @@
- 
- /* Only modify S_inv */
- #define LOOPIR_DOWN_TO_UP(NB_IT) \
--    for(ir=NB_IT-1;ir!=(unsigned int)(-1);--ir,--i)\
-+    for(ir=(NB_IT)-1;ir!=(unsigned int)(-1);--ir,--i)\
-     {\
-         S_cpj=S_cpi;\
-         Sinv_cpj=Sinv_cpi;\
-@@ -81,7 +81,7 @@
-             S_cpj-=NB_WORD_GF2nv;\
-             Sinv_cpj-=NB_WORD_GF2nv;\
-             /* pivot */\
--            mask=-(((*S_cpj)>>ir)&1);\
-+            mask=(1+~(((*S_cpj)>>ir)&1));\
-             xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\
-         }\
- \
-@@ -133,7 +133,6 @@
-         LOOPIR(HFEnvr-1,SWAP_WORD(*S_cpj,*S_cpi),*S_cpj^=*S_cpi&mask);
- 
-         /* Step 2 */
--        bit_ir=UINT_1<<(HFEnvr-1);
-         LOOPIR_DOWN_TO_UP(HFEnvr);
-     #else
-         /* To begin to last row */
-@@ -178,16 +177,16 @@
-     }
- 
- #define LOOPIR_CST(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir,++i)\
-+    for(ir=0;ir<(NB_IT);++ir,++i)\
-     {\
-         /* row i += (1-pivot_i)* row j */\
--        LOOPJ_CST({mask=(-(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-+        LOOPJ_CST({mask=(1+~(UINT_1-(((*S_cpi)>>ir)&UINT_1)));\
-                    LOOPK(XORLOADMASK1_1(S_cpi+k,S_cpj+k,mask);)\
-                    xorLoadMask1_gf2nv(Sinv_cpi,Sinv_cpj,mask);\
-                   });\
- \
-         /* row j += (pivot_j) * row_i */\
--        LOOPJ_CST({mask=(-(((*S_cpj)>>ir)&UINT_1));\
-+        LOOPJ_CST({mask=(1+~(((*S_cpj)>>ir)&UINT_1));\
-                    LOOPK(XORLOADMASK1_1(S_cpj+k,S_cpi+k,mask);)\
-                    xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);\
-                   });\
-@@ -315,7 +314,7 @@
-                 Sinv_cpj+=NB_WORD_GF2nv;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -344,7 +343,7 @@
-                 Sinv_cpj+=NB_WORD_GF2nv;
-                 L_cpj+=(j>>6)+1;
- 
--                mask=(-(((*L_cpj)>>ir)&UINT_1));
-+                mask=(1+~(((*L_cpj)>>ir)&UINT_1));
-                 for(k=0;k<=iq;++k)
-                 {
-                     XORLOADMASK1_1(Sinv_cpj+k,Sinv_cpi+k,mask);
-@@ -382,7 +381,7 @@
-         for(j=0;j<i;++j)
-         {
-             /* pivot */
--            mask=-(((U[j>>6])>>(j&63U))&1U);
-+            mask=(1+~(((U[j>>6])>>(j&63U))&1U));
-             xorLoadMask1_gf2nv(Sinv_cpj,Sinv_cpi,mask);
- 
-             /* next row */
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
deleted file mode 100644
index 9058654..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mixEquationsMQS_gf2.c
+++ /dev/null
@@ -1,20 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/mixEquationsMQS_gf2.c
-@@ -51,14 +51,13 @@
- 
-         for(i=0;i<HFEmq;++i)
-         {
--            *((UINT*)pk)=pk_last[i];
-+            STORE_UINT(pk, pk_last[i])
-             pk+=8;
-         }
- 
--        /* We fill the last bytes of pk without 64-bit cast */
-         for(i=0;i<(NB_BYTES_GFqm&7);++i)
-         {
--            pk[i]=pk_last[NB_WORD_GF2m-1]>>(i<<3);
-+            pk[i]=(uint8_t)(pk_last[NB_WORD_GF2m-1]>>(i<<3));
-         }
-     #endif
- }
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c
deleted file mode 100644
index 3c33c47..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_mul_gf2n.c
+++ /dev/null
@@ -1,213 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/mul_gf2n.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/mul_gf2n.c
-@@ -28,7 +28,7 @@
-     for(i=0;i<HFEnq;++i)
-     {
-         /* j=0 */
--        mask_B=-((*B)&ONE64);
-+        mask_B=(1+~((*B)&ONE64));
-         for(k=0;k<NB_WORD_GFqn;++k)
-         {
-             C[k]^=A[k]&mask_B;
-@@ -40,7 +40,7 @@
-             for(j=1;j<=(64-HFEnr);++j)
-             {
-                 jc=64-j;
--                mask_B=-(((*B)>>j)&ONE64);
-+                mask_B=(1+~(((*B)>>j)&ONE64));
-                 /* k=0 */
-                 tmp_A=(*A)&mask_B;
-                 C[0]^=tmp_A<<j;
-@@ -57,7 +57,7 @@
-         #endif
-         {
-             jc=64-j;
--            mask_B=-(((*B)>>j)&ONE64);
-+            mask_B=(1+~(((*B)>>j)&ONE64));
-             /* k=0 */
-             tmp_A=(*A)&mask_B;
-             C[0]^=tmp_A<<j;
-@@ -77,7 +77,7 @@
- 
-     #if HFEnr
-         /* j=0 */
--        mask_B=-((*B)&ONE64);
-+        mask_B=(1+~((*B)&ONE64));
-         for(k=0;k<NB_WORD_GFqn;++k)
-         {
-             C[k]^=A[k]&mask_B;
-@@ -92,7 +92,7 @@
-         #endif
-         {
-             jc=64-j;
--            mask_B=-(((*B)>>j)&ONE64);
-+            mask_B=(1+~(((*B)>>j)&ONE64));
-             /* k=0 */
-             tmp_A=(*A)&mask_B;
-             C[0]^=tmp_A<<j;
-@@ -108,7 +108,7 @@
-             for(;j<HFEnr;++j)
-             {
-                 jc=64-j;
--                mask_B=-(((*B)>>j)&ONE64);
-+                mask_B=(1+~(((*B)>>j)&ONE64));
-                 /* k=0 */
-                 tmp_A=(*A)&mask_B;
-                 C[0]^=tmp_A<<j;
-@@ -132,44 +132,6 @@
- /***********************************************************************/
- /***********************************************************************/
- 
--
--/* When rem is a macro */
--#if (K3!=1)
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q,R;\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q;\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--#define MUL_MOD_MACRO64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],Q,R;\
--        mul_function;\
--        rem_function;\
--    }
--
--/* HFEn == 64 */
--#define MUL_MOD_MACRO64_K64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],R;\
--        mul_function;\
--        rem_function;\
--    }
--
--
- #if HFEnr
-     #define INIT_Q(size2) uint64_t Q[size2];
- #else
-@@ -177,67 +139,60 @@
-     #define INIT_Q(size2)
- #endif
- 
--#if ((HFEn==312)||(HFEn==402)||(HFEn==544))
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size];\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size],R;\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--/* When rem is a function */
--#define MUL_MOD_FUNCTION32(name_function,mul_function,rem_function) \
--    name_function\
--    {\
--        uint64_t res_mul;\
--        mul_function;\
--        rem_function;\
--    }
--
--#define MUL_MOD_FUNCTION(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size];\
--        mul_function;\
--        rem_function;\
--    }
- 
- #if (REM_MACRO)
-     #if (NB_WORD_GFqn!=1)
--        MUL_MOD_MACRO(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL], R;
-+            INIT_Q(NB_WORD_GFqn);
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(res,res_mul,Q,R);
-+        }
-     #elif (HFEn<33)
--        MUL_MOD_MACRO32(MUL_THEN_REM_GF2N,mul_gf2x(&res_mul,A,B),
--                        REM_GF2N(*res,res_mul,Q,R));
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            mul_gf2x(&res_mul,A,B);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-     #elif HFEnr
--        MUL_MOD_MACRO64(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-     #else
-         /* HFEn == 64 */
--        MUL_MOD_MACRO64_K64(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
-+        void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            mul_gf2x(res_mul,A,B);
-+            REM_GF2N(*res,res_mul,,R);
-+        }
-     #endif
- #elif (NB_WORD_GFqn!=1)
--    MUL_MOD_FUNCTION(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                     rem_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        mul_gf2x(res_mul,A,B);
-+        rem_gf2n(res,res_mul);
-+    }
- #elif (HFEn<33)
--    MUL_MOD_FUNCTION32(MUL_THEN_REM_GF2N,mul_gf2x(&res_mul,A,B),
--                       rem_gf2n(res,&res_mul));
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        mul_gf2x(&res_mul,A,B);
-+        rem_gf2n(res,&res_mul);
-+    }
- #else
--    MUL_MOD_FUNCTION(MUL_THEN_REM_GF2N,mul_gf2x(res_mul,A,B),
--                     rem_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(mul_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn], const uint64_t B[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        mul_gf2x(res_mul,A,B);
-+        rem_gf2n(res,res_mul);
-+    }
- #endif
- 
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_predicate.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_predicate.c
deleted file mode 100644
index 6ba5e2c..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_predicate.c
+++ /dev/null
@@ -1,57 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/predicate.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/predicate.c
-@@ -43,7 +43,7 @@
-         r|=a[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -66,7 +66,7 @@
-         r|=a[i];
-     }
-     ORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -115,7 +115,7 @@
-         r|=a[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -162,7 +162,7 @@
-         r|=a[i]^b[i];
-     }
-     NORBITS_UINT(r);
--    return r;
-+    return (int) r;
- }
- 
- 
-@@ -185,7 +185,7 @@
-             return (a[i]<b[i]);
-         }
-     }
--    return (a[0]<b[0]);
-+    return (int) (a[0]<b[0]);
- }
- 
- 
-@@ -219,7 +219,7 @@
-     {
-         bo=i^d;
-         NORBITS_UINT(bo);
--        mask|=(-bo)&CMP_LT_UINT(*a,*b);
-+        mask|=(1+~bo)&CMP_LT_UINT(*a,*b);
-         ++a;
-         ++b;
-     }
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_randMatrix_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
deleted file mode 100644
index 6a266c8..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_randMatrix_gf2.c
+++ /dev/null
@@ -1,397 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/randMatrix_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/randMatrix_gf2.c
-@@ -135,92 +135,85 @@
- #endif
- 
- 
--#define CLEANLOWMATRIX_GF2(NAME,LTRIANGULAR_SIZE,nq,nr) \
--void NAME(Tn_gf2 L) \
--{\
--    UINT mask;\
--    unsigned int iq,ir;\
--\
--    /* for each row */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        mask=0;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--            *L&=mask;\
--            *L^=UINT_1<<ir;\
--            mask<<=1;\
--            ++mask;\
--\
--            L+=iq;\
--        }\
--        /* Next column */\
--        ++L;\
--    }\
--\
--    /* iq = HFEnq */\
--    mask=0;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--        *L&=mask;\
--        *L^=UINT_1<<ir;\
--        mask<<=1;\
--        ++mask;\
--\
--        L+=iq;\
--    }\
--}
-+void PREFIX_NAME(cleanLowerMatrixn)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
- 
-+    #if HFEnr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
- 
--CLEANLOWMATRIX_GF2(PREFIX_NAME(cleanLowerMatrixn),LTRIANGULAR_N_SIZE,HFEnq,
--                   HFEnr);
-+        L+=iq;
-+    }
-+    #endif
-+}
- #if HFEv
--    CLEANLOWMATRIX_GF2(PREFIX_NAME(cleanLowerMatrixnv),LTRIANGULAR_NV_SIZE,
--                       HFEnvq,HFEnvr);
--#endif
-+void PREFIX_NAME(cleanLowerMatrixnv)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnvq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
- 
-+    #if HFEnvr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
- 
--#define GENLOWMATRIX_GF2(NAME,LTRIANGULAR_SIZE,nq,nr) \
--void NAME(Tn_gf2 L) \
--{\
--    UINT mask;\
--    unsigned int iq,ir;\
--\
--    randombytes((unsigned char*)L,LTRIANGULAR_SIZE<<3);\
--\
--    /* for each row */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        mask=0;\
--        for(ir=0;ir<NB_BITS_UINT;++ir)\
--        {\
--            /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--            *L&=mask;\
--            *L^=UINT_1<<ir;\
--            mask<<=1;\
--            ++mask;\
--\
--            L+=iq;\
--        }\
--        /* Next column */\
--        ++L;\
--    }\
--\
--    /* iq = HFEnq */\
--    mask=0;\
--    for(ir=0;ir<nr;++ir)\
--    {\
--        /* Put the bit of diagonal to 1 + zeros after the diagonal */\
--        *L&=mask;\
--        *L^=UINT_1<<ir;\
--        mask<<=1;\
--        ++mask;\
--\
--        L+=iq;\
--    }\
-+        L+=iq;
-+    }
-+    #endif
- }
-+#endif
-+
- 
- /**
-  * @brief   Generate a random invertible lower triangular matrix (n,n) L in 
-@@ -231,7 +224,47 @@
-  * words equal to zero in the upper trigular part are not stored.
-  * @remark  Constant-time implementation.
-  */
--GENLOWMATRIX_GF2(PREFIX_NAME(genLowerMatrixn),LTRIANGULAR_N_SIZE,HFEnq,HFEnr);
-+void PREFIX_NAME(genLowerMatrixn)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    randombytes((unsigned char*)L,LTRIANGULAR_N_SIZE<<3);
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
-+
-+    #if HFEnr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
-+
-+        L+=iq;
-+    }
-+    #endif
-+}
-+
- #if HFEv
-     /**
-      * @brief   Generate a random invertible lower triangular matrix (n+v,n+v) L
-@@ -243,15 +276,54 @@
-      * the words equal to zero in the upper trigular part are not stored.
-      * @remark  Constant-time implementation.
-      */
--    GENLOWMATRIX_GF2(PREFIX_NAME(genLowerMatrixnv),LTRIANGULAR_NV_SIZE,HFEnvq,
--                     HFEnvr);
-+void PREFIX_NAME(genLowerMatrixnv)(Tn_gf2 L)
-+{
-+    UINT mask;
-+    unsigned int iq,ir;
-+
-+    randombytes((unsigned char*)L,LTRIANGULAR_NV_SIZE<<3);
-+
-+    /* for each row */
-+    for(iq=1;iq<=HFEnvq;++iq)
-+    {
-+        mask=0;
-+        for(ir=0;ir<NB_BITS_UINT;++ir)
-+        {
-+            /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+            *L&=mask;
-+            *L^=UINT_1<<ir;
-+            mask<<=1;
-+            ++mask;
-+
-+            L+=iq;
-+        }
-+        /* Next column */
-+        ++L;
-+    }
-+
-+    #if HFEnvr
-+    /* iq = HFEnq */
-+    mask=0;
-+    for(ir=0;ir<HFEnvr;++ir)
-+    {
-+        /* Put the bit of diagonal to 1 + zeros after the diagonal */
-+        *L&=mask;
-+        *L^=UINT_1<<ir;
-+        mask<<=1;
-+        ++mask;
-+
-+        L+=iq;
-+    }
-+    #endif
-+}
- #endif
- 
- 
- #define LOOPJR(NB_IT) \
--    mini=MINI(iq,jq);\
-+    if (iq<jq) mini=iq; \
-+    else mini=jq; \
-     *S=0;\
--    for(jr=0;jr<NB_IT;++jr)\
-+    for(jr=0;jr<(NB_IT);++jr)\
-     {\
-         /* Dot product */\
-         tmp=L_cp[0]&U_cp[0];\
-@@ -267,36 +339,19 @@
- 
- 
- #define LOOPIR(NB_IT,nq,nr,REM) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         U_cp=U;\
-         /* for each row of U (multiply by the transpose) */\
--        for(jq=1;jq<=nq;++jq)\
-+        for(jq=1;jq<=(nq);++jq)\
-         {\
-             LOOPJR(NB_BITS_UINT);\
-         }\
--        REM;\
-+        { REM };\
-         L_cp+=iq;\
-     }
- 
- 
--#define MULMATRICESLU_GF2(NAME,nq,nr,REM) \
--void NAME(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U) \
--{\
--    cst_Tn_gf2 L_cp,U_cp;\
--    UINT tmp;\
--    unsigned int iq,ir,jq,jr,k,mini;\
--\
--    /* Computation of S = L*U */\
--    L_cp=L;\
--    /* for each row of L (and S) */\
--    for(iq=1;iq<=nq;++iq)\
--    {\
--        LOOPIR(NB_BITS_UINT,nq,nr,REM);\
--    }\
--    LOOPIR(nr,nq,nr,REM);\
--}
--
- /**
-  * @brief   Compute the matrix (n,n) L*U in GF(2).
-  * @param[out]  S   S=L*U is a matrix (n,n) in GF(2).
-@@ -304,9 +359,37 @@
-  * @remark  Constant-time implementation.
-  */
- #if HFEnr
--MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_n),HFEnq,HFEnr,LOOPJR(HFEnr));
-+void PREFIX_NAME(mulMatricesLU_gf2_n)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+{
-+    cst_Tn_gf2 L_cp,U_cp;
-+    UINT tmp;
-+    unsigned int iq,ir,jq,jr,k,mini;
-+
-+    /* Computation of S = L*U */
-+    L_cp=L;
-+    /* for each row of L (and S) */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        LOOPIR(NB_BITS_UINT,HFEnq,HFEnr,LOOPJR(HFEnr));
-+    }
-+    LOOPIR(HFEnr,HFEnq,HFEnr,LOOPJR(HFEnr));
-+}
- #else
--MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_n),HFEnq,HFEnr,);
-+void PREFIX_NAME(mulMatricesLU_gf2_n)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+{
-+    cst_Tn_gf2 L_cp,U_cp;
-+    UINT tmp;
-+    unsigned int iq,ir,jq,jr,k,mini;
-+
-+    /* Computation of S = L*U */
-+    L_cp=L;
-+    /* for each row of L (and S) */
-+    for(iq=1;iq<=HFEnq;++iq)
-+    {
-+        LOOPIR(NB_BITS_UINT,HFEnq,HFEnr,);
-+    }
-+    LOOPIR(HFEnr,HFEnq,HFEnr,);
-+}
- #endif
- 
- #if HFEv
-@@ -317,10 +400,36 @@
-      * @remark  Constant-time implementation.
-      */
-     #if HFEnvr
--        MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_nv),HFEnvq,HFEnvr,
--                          LOOPJR(HFEnvr));
-+        void PREFIX_NAME(mulMatricesLU_gf2_nv)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+        {
-+            cst_Tn_gf2 L_cp,U_cp;
-+            UINT tmp;
-+            unsigned int iq,ir,jq,jr,k,mini;
-+
-+            /* Computation of S = L*U */
-+            L_cp=L;
-+            /* for each row of L (and S) */
-+            for(iq=1;iq<=HFEnvq;++iq)
-+            {
-+                LOOPIR(NB_BITS_UINT,HFEnvq,HFEnvr,LOOPJR(HFEnvr));
-+            }
-+            LOOPIR(HFEnvr,HFEnvq,HFEnvr,LOOPJR(HFEnvr));
-+        }
-     #else
--        MULMATRICESLU_GF2(PREFIX_NAME(mulMatricesLU_gf2_nv),HFEnvq,HFEnvr,);
-+        void PREFIX_NAME(mulMatricesLU_gf2_nv)(Mn_gf2 S, cst_Tn_gf2 L, cst_Tn_gf2 U)
-+        {
-+            cst_Tn_gf2 L_cp,U_cp;
-+            UINT tmp;
-+            unsigned int iq,ir,jq,jr,k,mini;
-+
-+            /* Computation of S = L*U */
-+            L_cp=L;
-+            /* for each row of L (and S) */
-+            for(iq=1;iq<=HFEnvq;++iq)
-+            {
-+                LOOPIR(NB_BITS_UINT,HFEnvq,HFEnvr,);
-+            }
-+        }
-     #endif
- #endif
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign.c
deleted file mode 100644
index c91ae84..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign.c
+++ /dev/null
@@ -1,94 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/sign.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/sign.c
-@@ -1,7 +1,6 @@
- #include "api.h"
--#if SUPERCOP
--    #include "crypto_sign.h"
--#endif
-+#include <stddef.h>
-+#include <stdint.h>
- #include <string.h>
- 
- #include "sign_keypairHFE.h"
-@@ -15,11 +14,7 @@
-  * @param[out]   sk  The secret-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
--int crypto_sign_keypair(unsigned char *pk, unsigned char *sk)
--#else
--int PREFIX_NAME(crypto_sign_keypair)(unsigned char *pk, unsigned char *sk)
--#endif
-+int crypto_sign_keypair(uint8_t *pk, uint8_t *sk)
- {
-     return sign_keypairHFE(pk,(UINT*)sk);
- }
-@@ -34,18 +29,14 @@
-  * @param[in]   sk  The secret-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
- int crypto_sign(
--#else
--int PREFIX_NAME(crypto_sign)(
--#endif
--  unsigned char *sm, unsigned long long *smlen,
--  const unsigned char *m, unsigned long long mlen,
--  const unsigned char *sk)
-+  uint8_t *sm, size_t *smlen,
-+  const uint8_t *m, size_t mlen,
-+  const uint8_t *sk)
- {
-     *smlen=mlen+CRYPTO_BYTES;
--    memcpy(sm+CRYPTO_BYTES,m,(size_t)mlen);
--    return signHFE(sm,m,(size_t)mlen,(UINT*)sk);
-+    memmove(sm+CRYPTO_BYTES,m,mlen);
-+    return signHFE(sm,m,mlen,(UINT*)sk);
- }
- 
- 
-@@ -58,21 +49,35 @@
-  * @param[in]   pk  The public-key.
-  * @return  Zero if the function runs correctly, non-zero else.
-  */
--#if SUPERCOP
- int crypto_sign_open(
--#else
--int PREFIX_NAME(crypto_sign_open)(
--#endif
--  unsigned char *m, unsigned long long *mlen,
--  const unsigned char *sm, unsigned long long smlen,
--  const unsigned char *pk)
-+  uint8_t *m, size_t *mlen,
-+  const uint8_t *sm, size_t smlen,
-+  const uint8_t *pk)
- {
-     int result;
-     *mlen=smlen-CRYPTO_BYTES;
-     result=sign_openHFE(sm+CRYPTO_BYTES,(size_t)(*mlen),sm,pk);
-     /* For compatibily with SUPERCOP, the memcpy is done only after sign_open */
--    memcpy(m,sm+CRYPTO_BYTES,(size_t)(*mlen));
-+    memmove(m,sm+CRYPTO_BYTES,(size_t)(*mlen));
-     return result;
- }
- 
- 
-+int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk)
-+{
-+    int result;
-+    *siglen = CRYPTO_BYTES;
-+    result = signHFE(sig,m,mlen,(UINT*)sk);
-+    return result;
-+}
-+
-+int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk)
-+{
-+    int result;
-+    if (siglen == CRYPTO_BYTES) {
-+      result = sign_openHFE(m,mlen,sig,pk);
-+    } else {
-+      result = -1;
-+    }
-+    return result;
-+}
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c
deleted file mode 100644
index 57c3fcd..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_signHFE.c
+++ /dev/null
@@ -1,220 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/signHFE.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/signHFE.c
-@@ -83,8 +83,10 @@
-         for(k1=1;k1<NB_ITE;++k1)
-         {
-             /* Number of bits to complete the byte of sm8, in [0,7] */
--            val_n=((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))?(HFEDELTA+HFEv)
--                  :((8-(nb_bits&7U))&7U);
-+            if ((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))
-+                val_n=(HFEDELTA+HFEv);
-+            else
-+                val_n=((8-(nb_bits&7U))&7U);
- 
-             /* First byte of sm8 */
-             if(nb_bits&7U)
-@@ -193,10 +195,6 @@
- 
- 
-     #if GEN_INVERTIBLE_MATRIX_LU
--        ALIGNED_GFqn_MALLOC(sk_HFE->sk_uncomp,UINT*,NB_UINT_HFEVPOLY
--                                  +(LTRIANGULAR_NV_SIZE<<1)
--                                  +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t
--                                  +MATRIXnv_SIZE+MATRIXn_SIZE,sizeof(UINT));
-         expandSeed((uint8_t*)(sk_HFE->sk_uncomp),(NB_UINT_HFEVPOLY
-                                      +(LTRIANGULAR_NV_SIZE<<1)
-                                      +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3,
-@@ -313,7 +311,8 @@
- {
-     #if HFEv
-         cst_sparse_monic_gf2nx F_HFEv;
--        UINT* F;
-+        sparse_monic_gf2nx F;
-+        sparse_monic_gf2nx F_cp;
-         unsigned int i;
-     #endif
- 
-@@ -333,13 +332,10 @@
-     #endif
- 
-     #if HFEv
-+        F=sk_HFE->F_struct.poly;
-         F_HFEv=sk_HFE->F_HFEv;
- 
--        ALIGNED_GFqn_MALLOC(F,UINT*,NB_UINT_HFEPOLY,sizeof(UINT));
--        VERIFY_ALLOC_RET(F);
--
-         #if (HFEDeg>1)
--        UINT *F_cp;
-         unsigned int j;
- 
-         /* X^(2^0) */
-@@ -351,11 +347,11 @@
-         {
-             /* Copy i quadratic terms */
- 
-+            j=0;
-             #if ENABLED_REMOVE_ODD_DEGREE
--            for(j=(((1U<<i)+1U)<=HFE_odd_degree)?0:1;j<i;++j)
--            #else
--            for(j=0;j<i;++j)
-+            if(((1U<<i)+1U)>HFE_odd_degree) ++j;
-             #endif
-+            for(;j<i;++j)
-             {
-                 /* X^(2^i + 2^j) */
-                 copy_gf2n(F_cp,F_HFEv);
-@@ -370,11 +366,11 @@
-         }
-         #if HFEDegJ
-             /* X^(2^HFEDegI + 2^j) */
-+            j=0;
-             #if ENABLED_REMOVE_ODD_DEGREE
--            for(j=(((1U<<i)+1U)<=HFE_odd_degree)?0:1;j<HFEDegJ;++j)
--            #else
--            for(j=0;j<HFEDegJ;++j)
-+            if(((1U<<i)+1U)>HFE_odd_degree) ++j;
-             #endif
-+            for(;j<HFEDegJ;++j)
-             {
-                 copy_gf2n(F_cp,F_HFEv);
-                 F_HFEv+=NB_WORD_GFqn;
-@@ -382,7 +378,6 @@
-             }
-         #endif
-         #endif
--        sk_HFE->F_struct.poly=F;
-     #else
-         sk_HFE->F_struct.poly=sk_HFE->F_HFEv;
-     #endif
-@@ -447,7 +442,7 @@
-     #endif
- 
-     int nb_root;
--    secret_key_HFE sk_HFE;
-+    secret_key_HFE sk_HFE={0};
- 
-     #if(HFEv)
-         UINT* F;
-@@ -666,9 +661,6 @@
-             if(nb_root<0)
-             {
-                 /* Error from chooseRootHFE */
--                #if HFEv
--                    ALIGNED_GFqn_FREE(F);
--                #endif
-                 return nb_root;
-             }
- 
-@@ -677,7 +669,7 @@
-                 /* Add the v bits to DR */
-                 #if HFEnr
-                     DR[NB_WORD_GFqn-1]^=V[0]<<HFEnr;
--                    for(i=0;i<(NB_WORD_GFqv-1);++i)
-+                    for(i=0;(int)i<(NB_WORD_GFqv-1);++i)
-                     {
-                         DR[NB_WORD_GFqn+i]=(V[i]>>(64-HFEnr))^(V[i+1]<<HFEnr);
-                     }
-@@ -685,7 +677,7 @@
-                         DR[NB_WORD_GFqn+i]=V[i]>>(64-HFEnr);
-                     #endif
-                 #else
--                    for(i=0;i<NB_WORD_GFqv;++i)
-+                    for(i=0;(int)i<NB_WORD_GFqv;++i)
-                     {
-                         DR[NB_WORD_GFqn+i]=V[i];
-                     }
-@@ -728,14 +720,6 @@
-         }
-     } while(b);
- 
--    #if ENABLED_SEED_SK
--        free(sk_HFE.sk_uncomp);
--    #endif
--    #if HFEv
--        ALIGNED_GFqn_FREE(F);
--    #endif
--
--
-     /* Copy the salt in the signature */
-     for(k=0;k<SIZE_SALT_WORD;++k)
-     {
-@@ -770,9 +754,10 @@
- {
-     UINT sm[SIZE_SIGN_UNCOMPRESSED-SIZE_SALT_WORD]={0};
- 
--    static_vecnv_gf2 DR[NB_WORD_GF2nv];
--    static_gf2n U[NB_WORD_GFqn];
--    UINT Hi_tab[SIZE_DIGEST_UINT],Hi1_tab[SIZE_DIGEST_UINT];
-+    static_vecnv_gf2 DR[NB_WORD_GF2nv] = {0};
-+    static_gf2n U[NB_WORD_GFqn] = {0};
-+    UINT Hi_tab[SIZE_DIGEST_UINT] = {0};
-+    UINT Hi1_tab[SIZE_DIGEST_UINT] = {0};
-     UINT *tmp, *Hi=Hi_tab,*Hi1=Hi1_tab;
-     unsigned int k;
-     #if (HFEnv!=HFEm)
-@@ -786,7 +771,7 @@
-     #endif
- 
-     int nb_root;
--    secret_key_HFE sk_HFE;
-+    secret_key_HFE sk_HFE={0};
- 
-     #if(HFEv)
-         UINT* F;
-@@ -824,13 +809,6 @@
-     /* Compute H1 = H(m) */
-     HASH((unsigned char*)Hi,m,len);
- 
--    /* It is to initialize S0 to 0, because Sk||Xk is stored in sm */
--    for(k=0;k<NB_WORD_GF2nv;++k)
--    {
--        sm[k]=0;
--        DR[k]=0;
--    }
--
-     for(k=1;k<=NB_ITE;++k)
-     {
-         #ifdef KAT_INT
-@@ -967,9 +945,6 @@
-             if(nb_root<0)
-             {
-                 /* Error from chooseRootHFE */
--                #if HFEv
--                    ALIGNED_GFqn_FREE(F);
--                #endif
-                 return nb_root;
-             }
- 
-@@ -981,7 +956,7 @@
-             /* Add the v bits to DR */
-             #if HFEnr
-                 DR[NB_WORD_GFqn-1]^=V[0]<<HFEnr;
--                for(i=0;i<(NB_WORD_GFqv-1);++i)
-+                for(i=0;(int)i<(NB_WORD_GFqv-1);++i)
-                 {
-                     DR[NB_WORD_GFqn+i]=(V[i]>>(64-HFEnr))^(V[i+1]<<HFEnr);
-                 }
-@@ -989,7 +964,7 @@
-                     DR[NB_WORD_GFqn+i]=V[i]>>(64-HFEnr);
-                 #endif
-             #else
--                for(i=0;i<NB_WORD_GFqv;++i)
-+                for(i=0;(int)i<NB_WORD_GFqv;++i)
-                 {
-                     DR[NB_WORD_GFqn+i]=V[i];
-                 }
-@@ -1026,12 +1001,6 @@
-         }
-     }
- 
--    #if ENABLED_SEED_SK
--        free(sk_HFE.sk_uncomp);
--    #endif
--    #if HFEv
--        ALIGNED_GFqn_FREE(F);
--    #endif
-     #ifdef KAT_INT
-         CLOSE_KAT_INT_FILE;
-     #endif
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_keypairHFE.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
deleted file mode 100644
index 25b5871..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_keypairHFE.c
+++ /dev/null
@@ -1,200 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/sign_keypairHFE.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/sign_keypairHFE.c
-@@ -112,10 +112,16 @@
-     #if (ENABLED_SEED_SK&&GEN_INV_MATRIX_TRIAL_ERROR)
-         expandSeedCxtDeclaration;
-     #endif
--    mqsnv_gf2n Q;
-+    UINT Q[MQnv_GFqn_SIZE]={0};
-     sparse_monic_gf2nx F;
--    GLnv_gf2 S;
--    GLn_gf2 T;
-+    /* Generate S for the linear transformation on variables */
-+    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
-+        UINT S[MATRIXnv_SIZE]={0};
-+        UINT *T = S;
-+    #else
-+    UINT *S;
-+    UINT *T;
-+    #endif
-     #if AFFINE_TRANSFORMATION_BY_t
-         vecm_gf2 t;
-     #endif
-@@ -125,11 +131,17 @@
-         Mnv_gf2 S_buf;
-     #endif
-     #if ENABLED_SEED_SK
--        UINT *sk_uncomp;
-+        UINT sk_uncomp[(NB_UINT_HFEVPOLY+
-+                       (LTRIANGULAR_NV_SIZE<<1)+
-+                       (LTRIANGULAR_N_SIZE<<1)+
-+                       SIZE_VECTOR_t)]={0};
-     #endif
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&(!ENABLED_SEED_SK)&&GEN_INVERSE_IN_FIRST)
-         unsigned int i;
-     #endif
-+    #if (FORMAT_HYBRID_CPK8)
-+        uint8_t pk_tmp[MQ_GFqm8_SIZE]={0};
-+    #endif
-     int ret;
- 
- 
-@@ -141,18 +153,12 @@
-     #if ENABLED_SEED_SK
-         #if GEN_INVERTIBLE_MATRIX_LU
-             /* The seed generates L,U and L',U' such as S=LU and T=L'U' */
--            sk_uncomp=(UINT*)malloc((NB_UINT_HFEVPOLY+(LTRIANGULAR_NV_SIZE<<1)
--                                     +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)
--                                    *sizeof(UINT));
--            VERIFY_ALLOC_RET(sk_uncomp);
-             expandSeed((uint8_t*)sk_uncomp,(NB_UINT_HFEVPOLY
-                                      +(LTRIANGULAR_NV_SIZE<<1)
-                                      +(LTRIANGULAR_N_SIZE<<1)+SIZE_VECTOR_t)<<3,
-                  (uint8_t*)sk,SIZE_SEED_SK);
-         #elif GEN_INV_MATRIX_TRIAL_ERROR
-             /* The seed generates S and T */
--            sk_uncomp=(UINT*)malloc(SIZE_SK_HFE_UNCOMPRESSED_WORD*sizeof(UINT));
--            VERIFY_ALLOC_RET(sk_uncomp);
-             expandSeedIUF(&hashInstance,(uint8_t*)sk,SIZE_SEED_SK<<3);
-             expandSeedSqueeze(&hashInstance,(uint8_t*)sk_uncomp,
-                                SIZE_SK_HFE_UNCOMPRESSED_WORD<<6);
-@@ -176,44 +182,16 @@
-     /* Here, the first element (of GF(2^n)) of Q is reserved to store cst.
-        The matrix Q is stored as upper triangular matrix. */
- 
--    Q=(UINT*)malloc(MQnv_GFqn_SIZE*sizeof(UINT));
--    if(!Q)
--    {
--        #if ENABLED_SEED_SK
--            free(sk_uncomp);
--        #endif
--        return ERROR_ALLOC;
--    }
-     #if PRECOMPUTE2
-         genSecretMQSprec(Q,F);
-     #else
-         ret=genSecretMQS_gf2(Q,F);
-         if(ret)
-         {
--            #if ENABLED_SEED_SK
--                free(sk_uncomp);
--            #endif
--            free(Q);
-             return ret;
-         }
-     #endif
- 
--    /* Generate S for the linear transformation on variables */
--    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--        S=MALLOC_MATRIXnv;
--        if(!S)
--        {
--            #if ENABLED_SEED_SK
--                free(sk_uncomp);
--            #endif
--            free(Q);
--            return ERROR_ALLOC;
--        }
--        /* The allocated memory for S will be use for T */
--        T=S;
--    #endif
--
--
-     #if GEN_INVERTIBLE_MATRIX_LU
-         #if ENABLED_SEED_SK
-             /* The random bytes are already generated from a seed */
-@@ -383,70 +361,25 @@
-     #endif
- 
- 
--    #if (ENABLED_SEED_SK&&GEN_INVERTIBLE_MATRIX_LU\
--                        &&(!AFFINE_TRANSFORMATION_BY_t))
--        free(sk_uncomp);
--    #endif
--
--
--    #if (FORMAT_HYBRID_CPK8)
--        uint8_t* pk_tmp=(uint8_t*)malloc(MQ_GFqm8_SIZE*sizeof(uint8_t));
--    #endif
--
--    #if (FORMAT_HYBRID_CPK8)
--        if(!pk_tmp)
--        {
--            #if (ENABLED_SEED_SK&&\
--                 (GEN_INV_MATRIX_TRIAL_ERROR||AFFINE_TRANSFORMATION_BY_t))
--                free(sk_uncomp);
--            #endif
--            free(Q);
--            #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--                /* T is stored in S. free(S) would have the same effect. */
--                free(T);
--            #endif
--            #if (GEN_INV_MATRIX_TRIAL_ERROR&&ENABLED_SEED_SK\
--                                           &&GEN_INVERSE_IN_FIRST)
--                /* T is stored in S_buf */
--                free(S_buf);
--            #endif
--            return ERROR_ALLOC;
--        }
--    #endif
--
--
-     /* Use T (variable S) to compute cst_pk and Q_pk */
-     #if (FORMAT_HYBRID_CPK8)
-         mixEquationsMQS8_gf2(pk_tmp,Q,T);
-     #elif FORMAT_MONOMIAL_PK8
-         mixEquationsMQS8_gf2(pk,Q,T);
-     #endif
--    free(Q);
- 
--
--    #if (ENABLED_SEED_SK&&GEN_INV_MATRIX_TRIAL_ERROR\
--                        &&(!AFFINE_TRANSFORMATION_BY_t))
--        free(sk_uncomp);
--    #endif
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&ENABLED_SEED_SK&&GEN_INVERSE_IN_FIRST)
-         /* T is stored in S_buf */
-         free(S_buf);
-     #endif
- 
--
--    /* Generate the inverse of T */
-     #if (GEN_INV_MATRIX_TRIAL_ERROR&&(!ENABLED_SEED_SK)\
-                                    &&(!GEN_INVERSE_IN_FIRST))
-+        /* Generate the inverse of T */
-         invMatrixn_gf2(sk+ACCESS_MATRIX_T,T);
-     #endif
- 
- 
--    #if ((!ENABLED_SEED_SK)||GEN_INVERTIBLE_MATRIX_LU)
--        /* T is stored in S */
--        free(S);
--    #endif
--
--
-     #if AFFINE_TRANSFORMATION_BY_t
-         #if ENABLED_SEED_SK
-             #if GEN_INVERTIBLE_MATRIX_LU
-@@ -466,10 +399,6 @@
-         #else
-             add2_gf2m((UINT*)pk,t);
-         #endif
--
--        #if ENABLED_SEED_SK
--            free(sk_uncomp);
--        #endif
-     #endif
- 
- 
-@@ -477,10 +406,6 @@
-         convMQS_one_eq_to_hybrid_rep8_gf2(pk,pk_tmp);
-     #endif
- 
--    #if (FORMAT_HYBRID_CPK8)
--        free(pk_tmp);
--    #endif
--
-     return 0;
- }
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_openHFE.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_openHFE.c
deleted file mode 100644
index 824aa7f..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sign_openHFE.c
+++ /dev/null
@@ -1,41 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/sign_openHFE.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/sign_openHFE.c
-@@ -57,8 +57,10 @@
-         for(k1=1;k1<NB_ITE;++k1)
-         {
-             /* Number of bits to complete the byte of sm8, in [0,7] */
--            val_n=((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))?(HFEDELTA+HFEv)
--                  :((8-(nb_bits&7U))&7U);
-+            if ((HFEDELTA+HFEv)<((8-(nb_bits&7U))&7U))
-+              val_n=(HFEDELTA+HFEv);
-+            else
-+              val_n=((8-(nb_bits&7U))&7U);
- 
-             /* First byte of sm8 */
-             if(nb_bits&7U)
-@@ -509,12 +511,8 @@
- 
-     /* Compute p(S_(NB_IT),X_(NB_IT)) */
-     #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP)
--        UINT* pk_tmp;
-+        UINT pk_tmp[(1+NB_WORD_UNCOMP_EQ*HFEmr8)];
-         unsigned int i;
--        #if HFEmr8
--            /* 1 to store the constant */
--            pk_tmp=(UINT*)malloc((1+NB_WORD_UNCOMP_EQ*HFEmr8)*sizeof(UINT));
--        #endif
- 
-         #if (HFENr8&&(HFEmr8>1))
-             uint64_t val;
-@@ -578,10 +576,6 @@
-     #endif
- 
- 
--    #if (FORMAT_HYBRID_CPK8&&EVAL_HYBRID_CPK8_UNCOMP&&HFEmr8)
--        free(pk_tmp);
--    #endif
--
-     return ret;
- }
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c
deleted file mode 100644
index 2b5fa23..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_sqr_gf2n.c
+++ /dev/null
@@ -1,223 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/sqr_gf2n.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/sqr_gf2n.c
-@@ -78,44 +78,6 @@
- /***********************************************************************/
- /***********************************************************************/
- 
--
--/* When rem is a macro */
--#if (K3!=1)
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q,R;\
--            mul_function;\
--            rem_function;\
--        }
--#else
--    #define MUL_MOD_MACRO32(name_function,mul_function,rem_function) \
--        name_function\
--        {\
--            uint64_t res_mul,Q;\
--            mul_function;\
--            rem_function;\
--        }
--#endif
--
--#define MUL_MOD_MACRO64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],Q,R;\
--        mul_function;\
--        rem_function;\
--    }
--
--/* HFEn == 64 */
--#define MUL_MOD_MACRO64_K64(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size],R;\
--        mul_function;\
--        rem_function;\
--    }
--
--
- #if HFEnr
-     #define INIT_Q(size2) uint64_t Q[size2];
- #else
-@@ -123,81 +85,108 @@
-     #define INIT_Q(size2)
- #endif
- 
--#if ((HFEn==312)||(HFEn==402)||(HFEn==544))
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size];\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
-+#if (REM_MACRO)
-+    #if (NB_WORD_GFqn!=1)
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            INIT_Q(NB_WORD_GFqn);
-+            sqr_gf2x(res_mul,A);
-+            REM_GF2N(res,res_mul,Q,R);
-         }
--#else
--    #define MUL_MOD_MACRO(name_function,mul_function,rem_function,size,size2) \
--        name_function\
--        {\
--            uint64_t res_mul[size],R;\
--            INIT_Q(size2);\
--            mul_function;\
--            rem_function;\
-+
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],R;
-+            INIT_Q(NB_WORD_GFqn);
-+            sqr_nocst_gf2x(res_mul,A);
-+            REM_GF2N(res,res_mul,Q,R);
-+        }
-+    #elif (HFEn<33)
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            sqr_gf2x(&res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-         }
--#endif
- 
--/* When rem is a function */
--#define MUL_MOD_FUNCTION32(name_function,mul_function,rem_function) \
--    name_function\
--    {\
--        uint64_t res_mul;\
--        mul_function;\
--        rem_function;\
--    }
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul,Q,R;
-+            sqr_nocst_gf2x(&res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-+    #elif HFEnr
-+        void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            sqr_gf2x(res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
- 
--#define MUL_MOD_FUNCTION(name_function,mul_function,rem_function,size) \
--    name_function\
--    {\
--        uint64_t res_mul[size];\
--        mul_function;\
--        rem_function;\
-+        void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+        {
-+            uint64_t res_mul[NB_WORD_MUL],Q,R;
-+            sqr_nocst_gf2x(res_mul,A);
-+            REM_GF2N(*res,res_mul,Q,R);
-+        }
-+    #else
-+/* HFEn == 64 */
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL],R;
-+        sqr_gf2x(res_mul,A);
-+        REM_GF2N(*res,res_mul,,R);
-     }
- 
--#if (REM_MACRO)
--    #if (NB_WORD_GFqn!=1)
--        MUL_MOD_MACRO(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
--        MUL_MOD_MACRO(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                      REM_GF2N(res,res_mul,Q,R),NB_WORD_MUL,NB_WORD_GFqn);
--    #elif (HFEn<33)
--        MUL_MOD_MACRO32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R));
--        MUL_MOD_MACRO32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R));
--    #elif HFEnr
--        MUL_MOD_MACRO64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
--        MUL_MOD_MACRO64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                        REM_GF2N(*res,res_mul,Q,R),NB_WORD_MUL);
--    #else
--        /* HFEn == 64 */
--        MUL_MOD_MACRO64_K64(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
--        MUL_MOD_MACRO64_K64(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                            REM_GF2N(*res,res_mul,,R),NB_WORD_MUL);
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL],R;
-+        sqr_nocst_gf2x(res_mul,A);
-+        REM_GF2N(*res,res_mul,,R);
-+    }
-     #endif
- #elif (NB_WORD_GFqn!=1)
--    MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--    MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
-+
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_nocst_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
- #elif (HFEn<33)
--    MUL_MOD_FUNCTION32(SQR_THEN_REM_GF2N,sqr_gf2x(&res_mul,A),
--                       remsqr_gf2n(res,&res_mul));
--    MUL_MOD_FUNCTION32(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(&res_mul,A),
--                       remsqr_gf2n(res,&res_mul));
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        sqr_gf2x(&res_mul,A);
-+        remsqr_gf2n(res,&res_mul);
-+    }
-+
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul;
-+        sqr_nocst_gf2x(&res_mul,A);
-+        remsqr_gf2n(res,&res_mul);
-+    }
- #else
--    MUL_MOD_FUNCTION(SQR_THEN_REM_GF2N,sqr_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--    MUL_MOD_FUNCTION(SQR_NOCST_THEN_REM_GF2N,sqr_nocst_gf2x(res_mul,A),
--                     remsqr_gf2n(res,res_mul),NB_WORD_MUL);
--#endif
-+    void PREFIX_NAME(sqr_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
- 
-+    void PREFIX_NAME(sqr_nocst_then_rem_gf2n)(uint64_t res[NB_WORD_GFqn], const uint64_t A[NB_WORD_GFqn])
-+    {
-+        uint64_t res_mul[NB_WORD_MUL];
-+        sqr_nocst_gf2x(res_mul,A);
-+        remsqr_gf2n(res,res_mul);
-+    }
-+#endif
- 
-
diff --git a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c b/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
deleted file mode 100644
index 34feb72..0000000
--- a/gemss/patches/Reference_Implementation_sign_GeMSS128_src_vecMatProduct_gf2.c
+++ /dev/null
@@ -1,258 +0,0 @@
---- upstream/Reference_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c
-+++ upstream-patched/Reference_Implementation/sign/GeMSS128/src/vecMatProduct_gf2.c
-@@ -9,11 +9,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_M(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2m(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-@@ -22,11 +22,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_N(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2n(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-@@ -35,11 +35,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_START_N(NB_IT) \
--    for(;ir<NB_IT;++ir)\
-+    for(;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2n(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GFqn;\
-@@ -48,11 +48,11 @@
- 
- /* for a block of bits of vec */
- #define LOOPIR_NV(NB_IT) \
--    for(ir=0;ir<NB_IT;++ir)\
-+    for(ir=0;ir<(NB_IT);++ir)\
-     {\
-         /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-             by the (iq*NB_BITS_UINT)+ir row of S */\
--        vec_ir=-(bit_ir&1);\
-+        vec_ir=(1+~(bit_ir&1));\
-         xorLoadMask1_gf2nv(res,S_cp,vec_ir);\
-         /* next row of S */\
-         S_cp+=NB_WORD_GF2nv;\
-@@ -69,7 +69,10 @@
- #endif
- 
- #if HFEmr
--    #define CLEAN_M (res)[NB_WORD_GF2m-1]&=MASK_GF2m;
-+    #define CLEAN_M \
-+      LOAD_UINT(vec_ir, (((unsigned char *)res)+(8*(NB_WORD_GF2m-1)))) \
-+      vec_ir &= MASK_GF2m; \
-+      STORE_UINT((((unsigned char *)res)+(8*(NB_WORD_GF2m-1))), vec_ir)
- #else
-     #define CLEAN_M
- #endif
-@@ -115,65 +118,134 @@
-         res a vector of length n in GF(2)
-         res = dotproduct(v,S) = v.S
- */
--#define VECMATPROD(NAME,SET0,LOOPIR,REM,nq) \
--void NAME(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S)\
--{\
--    cst_Mn_gf2 S_cp;\
--    UINT bit_ir, vec_ir;\
--    unsigned int iq,ir;\
--\
--    /* initialization of res to 0 */\
--    SET0(res);\
--\
--    S_cp=S;\
--    /* for each bit of vec excepted the last block */\
--    for(iq=0;iq<nq;++iq)\
--    {\
--        bit_ir=vec[iq];\
--        LOOPIR(NB_BITS_UINT);\
--    }\
--    /* the last block */\
--    REM(LOOPIR);\
-+void PREFIX_NAME(vecMatProductnv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2nv(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_NV(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_NV(LOOPIR_NV);
- }
- 
-+void PREFIX_NAME(vecMatProductnvn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_N(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_NV(LOOPIR_N);
-+}
- 
--VECMATPROD(PREFIX_NAME(vecMatProductnv_64),set0_gf2nv,LOOPIR_NV,REM_NV,HFEnvq)
--VECMATPROD(PREFIX_NAME(vecMatProductnvn_64),set0_gf2n,LOOPIR_N,REM_NV,HFEnvq)
--VECMATPROD(PREFIX_NAME(vecMatProductv_64),set0_gf2n,LOOPIR_N,REM_V,HFEvq)
--VECMATPROD(PREFIX_NAME(vecMatProductn_64),set0_gf2n,LOOPIR_N,REM_N,HFEnq)
--VECMATPROD(PREFIX_NAME(vecMatProductm_64),set0_gf2m,LOOPIR_M,REM_M,HFEnq)
--
--
--#define VECMATPROD_START(NAME,SET0,LOOPIR_START,REM_START,nq) \
--void NAME(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S, unsigned int start)\
--{\
--    cst_Mn_gf2 S_cp;\
--    UINT bit_ir, vec_ir;\
--    unsigned int iq,ir;\
--\
--    /* initialization of res to 0 */\
--    SET0(res);\
--\
--    S_cp=S;\
--    ir=start&63U;\
--    /* for each bit of vec excepted the last block */\
--    for(iq=start>>6;iq<nq;++iq)\
--    {\
--        bit_ir=vec[iq]>>ir;\
--        LOOPIR_START(NB_BITS_UINT);\
--        ir=0;\
--    }\
--    /* the last block */\
--    REM_START(LOOPIR_START);\
-+void PREFIX_NAME(vecMatProductv_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    iq = 0;
-+    ir = 0;
-+    while(NB_BITS_UINT*iq + ir < HFEv)
-+    {
-+        bit_ir = vec[iq];
-+        for(ir=0;(NB_BITS_UINT*iq + ir < HFEv) && (ir<NB_BITS_UINT);++ir)
-+        {
-+            /* multiply the (iq*NB_BITS_UINT)+ir bit of vec
-+                by the (iq*NB_BITS_UINT)+ir row of S */
-+            vec_ir=(1+~(bit_ir&1));
-+            xorLoadMask1_gf2n(res,S_cp,vec_ir);
-+            /* next row of S */
-+            S_cp+=NB_WORD_GFqn;
-+            bit_ir>>=1;
-+        }
-+        ++iq;
-+    }
-+}
-+
-+void PREFIX_NAME(vecMatProductn_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_N(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_N(LOOPIR_N);
-+}
-+
-+void PREFIX_NAME(vecMatProductm_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2m(res);
-+
-+    S_cp=S;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=0;iq<HFEnq;++iq)
-+    {
-+        bit_ir=vec[iq];
-+        LOOPIR_M(NB_BITS_UINT);
-+    }
-+    /* the last block */
-+    REM_M(LOOPIR_M);
-+}
-+
-+
-+void PREFIX_NAME(vecMatProductnvn_start_64)(vecn_gf2 res, cst_vecn_gf2 vec, cst_Mn_gf2 S, unsigned int start) {
-+    cst_Mn_gf2 S_cp;
-+    UINT bit_ir, vec_ir;
-+    unsigned int iq,ir;
-+
-+    /* initialization of res to 0 */
-+    set0_gf2n(res);
-+
-+    S_cp=S;
-+    ir=start&63U;
-+    /* for each bit of vec excepted the last block */
-+    for(iq=start>>6;iq<HFEnvq;++iq)
-+    {
-+        bit_ir=vec[iq]>>ir;
-+        LOOPIR_START_N(NB_BITS_UINT);
-+        ir=0;
-+    }
-+    /* the last block */
-+    REM_START_NV(LOOPIR_START_N);
- }
- 
- 
--VECMATPROD_START(PREFIX_NAME(vecMatProductnvn_start_64),set0_gf2n,
--                 LOOPIR_START_N,REM_START_NV,HFEnvq)
--/*
--VECMATPROD_START(PREFIX_NAME(vecMatProductn_start_64),set0_gf2n,
--                 LOOPIR_START_N,REM_START_N,HFEnq)
--*/
- 
- 
- 
diff --git a/gemss/update_patches.sh b/gemss/update_patches.sh
deleted file mode 100755
index 76a4d36..0000000
--- a/gemss/update_patches.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-PYTHON=/usr/bin/python3
-
-BASE=`dirname $0`
-BASE=`cd ${BASE} && pwd`
-cd ${BASE}
-
-PATCHES=${BASE}/patches
-
-VERSION=$(cat ${BASE}/VERSION)
-V1=upstream
-V2=upstream-patched
-
-if [ ! -e "${V1}" ]
-then
-  echo "${BASE}/${V1} not found"
-  exit
-fi
-
-if [ ! -e "${V2}" ]
-then
-  echo "${BASE}/${V2} not found"
-  exit
-fi
-
-rm -rf ${PATCHES}
-mkdir -p ${PATCHES}/tmp/
-
-diff -ru --no-dereference ${V1} ${V2} > ${PATCHES}/tmp/p
-
-( cd ${PATCHES}/tmp/
-  splitpatch ${PATCHES}/tmp/p
-  rm ${PATCHES}/tmp/p
-  for X in *
-  do
-    Y=$(echo ${X} \
-        | head -n 1 ${X} \
-        | tr '\t' ' ' \
-        | cut -d ' ' -f 2 \
-        | cut -d'/' -f 2- \
-        | tr '/' '_')
-  mv ${X} ${Y}
-  sed -i '1,2 s/\t.*$//' ${Y}
-  sed -i '$ s/diff.*//' ${Y}
-  done
-)
-mv ${PATCHES}/tmp/* ${PATCHES}
-rm -rf ${PATCHES}/tmp/