Skip to content

Commit

Permalink
Merge pull request #727 from argilo/remove-vcvtrq-asm
Browse files Browse the repository at this point in the history
Remove inline assembler from volk_32fc_convert_16ic_neon
  • Loading branch information
jdemel authored Jan 7, 2024
2 parents 863aff5 + 0b9dc5f commit 67cf98a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
22 changes: 11 additions & 11 deletions kernels/volk/volk_32fc_convert_16ic.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,6 @@ static inline void volk_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector,
#if LV_HAVE_NEONV7
#include <arm_neon.h>

#define VCVTRQ_S32_F32(result, value) \
__VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[0]) : "t"(value[0]) :); \
__VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[1]) : "t"(value[1]) :); \
__VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[2]) : "t"(value[2]) :); \
__VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[3]) : "t"(value[3]) :);

static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
const lv_32fc_t* inputVector,
unsigned int num_points)
Expand All @@ -173,7 +167,8 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector,

const float32x4_t min_val = vmovq_n_f32(min_val_f);
const float32x4_t max_val = vmovq_n_f32(max_val_f);
float32x4_t ret1, ret2, a, b;
float32x4_t half = vdupq_n_f32(0.5f);
float32x4_t ret1, ret2, a, b, sign, PlusHalf, Round;

int32x4_t toint_a = { 0, 0, 0, 0 };
int32x4_t toint_b = { 0, 0, 0, 0 };
Expand All @@ -190,9 +185,15 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
ret2 = vmaxq_f32(vminq_f32(b, max_val), min_val);

// vcvtr takes into account the current rounding mode (as does rintf)
VCVTRQ_S32_F32(toint_a, ret1);
VCVTRQ_S32_F32(toint_b, ret2);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
PlusHalf = vaddq_f32(ret1, half);
Round = vsubq_f32(PlusHalf, sign);
toint_a = vcvtq_s32_f32(Round);

sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret2), 31)));
PlusHalf = vaddq_f32(ret2, half);
Round = vsubq_f32(PlusHalf, sign);
toint_b = vcvtq_s32_f32(Round);

intInputVal1 = vqmovn_s32(toint_a);
intInputVal2 = vqmovn_s32(toint_b);
Expand All @@ -212,7 +213,6 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
}
}

#undef VCVTRQ_S32_F32
#endif /* LV_HAVE_NEONV7 */

#if LV_HAVE_NEONV8
Expand Down
2 changes: 1 addition & 1 deletion lib/kernel_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
QA(VOLK_INIT_TEST(volk_32f_s32f_convert_32i, test_params.make_tol(1)))
QA(VOLK_INIT_TEST(volk_32f_convert_64f, test_params))
QA(VOLK_INIT_TEST(volk_32f_s32f_convert_8i, test_params.make_tol(1)))
QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params))
QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params.make_tol(1)))
QA(VOLK_INIT_TEST(volk_32fc_s32f_power_spectrum_32f, test_params.make_tol(2e-6)))
QA(VOLK_INIT_TEST(volk_32fc_x2_square_dist_32f, test_params))
QA(VOLK_INIT_TEST(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, test_params))
Expand Down

0 comments on commit 67cf98a

Please sign in to comment.