Skip to content

Commit

Permalink
Merge pull request #708 from argilo/pow-puppet
Browse files Browse the repository at this point in the history
Use a puppet to pass positive values to volk_32f_x2_pow_32f
  • Loading branch information
jdemel authored Dec 1, 2023
2 parents 2bb1be0 + e8b5c90 commit 53cb677
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 1 deletion.
2 changes: 2 additions & 0 deletions kernels/volk/volk_32f_x2_pow_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*
* c[i] = pow(a[i], b[i])
*
* Note that the aVector values must be positive; otherwise the output may be inaccurate.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_32f_x2_pow_32f(float* cVector, const float* bVector, const float* aVector,
Expand Down
114 changes: 114 additions & 0 deletions kernels/volk/volk_32f_x2_powpuppet_32f.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/* -*- c++ -*- */
/*
* Copyright 2023 Free Software Foundation, Inc.
*
* This file is part of VOLK
*
* SPDX-License-Identifier: LGPL-3.0-or-later
*/


#ifndef INCLUDED_volk_32f_x2_powpuppet_32f_H
#define INCLUDED_volk_32f_x2_powpuppet_32f_H

#include <math.h>
#include <volk/volk.h>
#include <volk/volk_32f_x2_pow_32f.h>

static inline float* make_positive(const float* input, unsigned int num_points)
{
float* output = (float*)volk_malloc(num_points * sizeof(float), volk_get_alignment());
for (unsigned int i = 0; i < num_points; i++) {
output[i] = fabsf(input[i]);
if (output[i] == 0) {
output[i] = 2.0f;
}
}
return output;
}

#if LV_HAVE_AVX2 && LV_HAVE_FMA
static inline void volk_32f_x2_powpuppet_32f_a_avx2_fma(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_a_avx2_fma(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for aligned */

#ifdef LV_HAVE_AVX2
static inline void volk_32f_x2_powpuppet_32f_a_avx2(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_a_avx2(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_AVX2 for aligned */

#ifdef LV_HAVE_SSE4_1
static inline void volk_32f_x2_powpuppet_32f_a_sse4_1(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_a_sse4_1(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_SSE4_1 for aligned */

#ifdef LV_HAVE_GENERIC
static inline void volk_32f_x2_powpuppet_32f_generic(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_generic(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_SSE4_1
static inline void volk_32f_x2_powpuppet_32f_u_sse4_1(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_u_sse4_1(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_SSE4_1 for unaligned */

#if LV_HAVE_AVX2 && LV_HAVE_FMA
static inline void volk_32f_x2_powpuppet_32f_u_avx2_fma(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_u_avx2_fma(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */

#ifdef LV_HAVE_AVX2
static inline void volk_32f_x2_powpuppet_32f_u_avx2(float* cVector,
const float* bVector,
const float* aVector,
unsigned int num_points)
{
float* aVectorPos = make_positive(aVector, num_points);
volk_32f_x2_pow_32f_u_avx2(cVector, bVector, aVectorPos, num_points);
volk_free(aVectorPos);
}
#endif /* LV_HAVE_AVX2 for unaligned */

#endif /* INCLUDED_volk_32f_x2_powpuppet_32f_H */
2 changes: 1 addition & 1 deletion lib/kernel_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
QA(VOLK_INIT_TEST(volk_32fc_32f_add_32fc, test_params))
QA(VOLK_INIT_TEST(volk_32f_log2_32f, test_params.make_absolute(1.5e-5)))
QA(VOLK_INIT_TEST(volk_32f_expfast_32f, test_params_inacc_tenth))
QA(VOLK_INIT_TEST(volk_32f_x2_pow_32f, test_params_inacc))
QA(VOLK_INIT_TEST(volk_32f_sin_32f, test_params_inacc))
QA(VOLK_INIT_TEST(volk_32f_cos_32f, test_params_inacc))
QA(VOLK_INIT_TEST(volk_32f_tan_32f, test_params_inacc))
Expand Down Expand Up @@ -168,6 +167,7 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
QA(VOLK_INIT_TEST(volk_32f_tanh_32f, test_params_inacc))
QA(VOLK_INIT_TEST(volk_32fc_x2_s32fc_multiply_conjugate_add_32fc, test_params))
QA(VOLK_INIT_TEST(volk_32f_exp_32f, test_params))
QA(VOLK_INIT_PUPP(volk_32f_x2_powpuppet_32f, volk_32f_x2_pow_32f, test_params_inacc))
QA(VOLK_INIT_PUPP(
volk_32f_s32f_mod_rangepuppet_32f, volk_32f_s32f_s32f_mod_range_32f, test_params))
QA(VOLK_INIT_PUPP(
Expand Down

0 comments on commit 53cb677

Please sign in to comment.