Merge pull request #708 from argilo/pow-puppet

Use a puppet to pass positive values to volk_32f_x2_pow_32f
gnuradio · Dec 1, 2023 · 53cb677 · 53cb677
2 parents 2bb1be0 + e8b5c90
commit 53cb677
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 1 deletion.
diff --git a/kernels/volk/volk_32f_x2_pow_32f.h b/kernels/volk/volk_32f_x2_pow_32f.h
@@ -16,6 +16,8 @@
  *
  * c[i] = pow(a[i], b[i])
  *
+ * Note that the aVector values must be positive; otherwise the output may be inaccurate.
+ *
  * <b>Dispatcher Prototype</b>
  * \code
  * void volk_32f_x2_pow_32f(float* cVector, const float* bVector, const float* aVector,

diff --git a/kernels/volk/volk_32f_x2_powpuppet_32f.h b/kernels/volk/volk_32f_x2_powpuppet_32f.h
@@ -0,0 +1,114 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2023 Free Software Foundation, Inc.
+ *
+ * This file is part of VOLK
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ */
+
+
+#ifndef INCLUDED_volk_32f_x2_powpuppet_32f_H
+#define INCLUDED_volk_32f_x2_powpuppet_32f_H
+
+#include <math.h>
+#include <volk/volk.h>
+#include <volk/volk_32f_x2_pow_32f.h>
+
+static inline float* make_positive(const float* input, unsigned int num_points)
+{
+    float* output = (float*)volk_malloc(num_points * sizeof(float), volk_get_alignment());
+    for (unsigned int i = 0; i < num_points; i++) {
+        output[i] = fabsf(input[i]);
+        if (output[i] == 0) {
+            output[i] = 2.0f;
+        }
+    }
+    return output;
+}
+
+#if LV_HAVE_AVX2 && LV_HAVE_FMA
+static inline void volk_32f_x2_powpuppet_32f_a_avx2_fma(float* cVector,
+                                                        const float* bVector,
+                                                        const float* aVector,
+                                                        unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_a_avx2_fma(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for aligned */
+
+#ifdef LV_HAVE_AVX2
+static inline void volk_32f_x2_powpuppet_32f_a_avx2(float* cVector,
+                                                    const float* bVector,
+                                                    const float* aVector,
+                                                    unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_a_avx2(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_AVX2 for aligned */
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_32f_x2_powpuppet_32f_a_sse4_1(float* cVector,
+                                                      const float* bVector,
+                                                      const float* aVector,
+                                                      unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_a_sse4_1(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_SSE4_1 for aligned */
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_32f_x2_powpuppet_32f_generic(float* cVector,
+                                                     const float* bVector,
+                                                     const float* aVector,
+                                                     unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_generic(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_32f_x2_powpuppet_32f_u_sse4_1(float* cVector,
+                                                      const float* bVector,
+                                                      const float* aVector,
+                                                      unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_u_sse4_1(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_SSE4_1 for unaligned */
+
+#if LV_HAVE_AVX2 && LV_HAVE_FMA
+static inline void volk_32f_x2_powpuppet_32f_u_avx2_fma(float* cVector,
+                                                        const float* bVector,
+                                                        const float* aVector,
+                                                        unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_u_avx2_fma(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */
+
+#ifdef LV_HAVE_AVX2
+static inline void volk_32f_x2_powpuppet_32f_u_avx2(float* cVector,
+                                                    const float* bVector,
+                                                    const float* aVector,
+                                                    unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_u_avx2(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_AVX2 for unaligned */
+
+#endif /* INCLUDED_volk_32f_x2_powpuppet_32f_H */
diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h
@@ -85,7 +85,6 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
     QA(VOLK_INIT_TEST(volk_32fc_32f_add_32fc, test_params))
     QA(VOLK_INIT_TEST(volk_32f_log2_32f, test_params.make_absolute(1.5e-5)))
     QA(VOLK_INIT_TEST(volk_32f_expfast_32f, test_params_inacc_tenth))
-    QA(VOLK_INIT_TEST(volk_32f_x2_pow_32f, test_params_inacc))
     QA(VOLK_INIT_TEST(volk_32f_sin_32f, test_params_inacc))
     QA(VOLK_INIT_TEST(volk_32f_cos_32f, test_params_inacc))
     QA(VOLK_INIT_TEST(volk_32f_tan_32f, test_params_inacc))
@@ -168,6 +167,7 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
     QA(VOLK_INIT_TEST(volk_32f_tanh_32f, test_params_inacc))
     QA(VOLK_INIT_TEST(volk_32fc_x2_s32fc_multiply_conjugate_add_32fc, test_params))
     QA(VOLK_INIT_TEST(volk_32f_exp_32f, test_params))
+    QA(VOLK_INIT_PUPP(volk_32f_x2_powpuppet_32f, volk_32f_x2_pow_32f, test_params_inacc))
     QA(VOLK_INIT_PUPP(
         volk_32f_s32f_mod_rangepuppet_32f, volk_32f_s32f_s32f_mod_range_32f, test_params))
     QA(VOLK_INIT_PUPP(