diff --git a/kernels/volk/volk_32fc_convert_16ic.h b/kernels/volk/volk_32fc_convert_16ic.h index d1161653..a38cce64 100644 --- a/kernels/volk/volk_32fc_convert_16ic.h +++ b/kernels/volk/volk_32fc_convert_16ic.h @@ -150,12 +150,6 @@ static inline void volk_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, #if LV_HAVE_NEONV7 #include -#define VCVTRQ_S32_F32(result, value) \ - __VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[0]) : "t"(value[0]) :); \ - __VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[1]) : "t"(value[1]) :); \ - __VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[2]) : "t"(value[2]) :); \ - __VOLK_ASM("VCVTR.S32.F32 %0, %1" : "=t"(result[3]) : "t"(value[3]) :); - static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) @@ -173,7 +167,8 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const float32x4_t min_val = vmovq_n_f32(min_val_f); const float32x4_t max_val = vmovq_n_f32(max_val_f); - float32x4_t ret1, ret2, a, b; + float32x4_t half = vdupq_n_f32(0.5f); + float32x4_t ret1, ret2, a, b, sign, PlusHalf, Round; int32x4_t toint_a = { 0, 0, 0, 0 }; int32x4_t toint_b = { 0, 0, 0, 0 }; @@ -190,9 +185,15 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector, ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val); ret2 = vmaxq_f32(vminq_f32(b, max_val), min_val); - // vcvtr takes into account the current rounding mode (as does rintf) - VCVTRQ_S32_F32(toint_a, ret1); - VCVTRQ_S32_F32(toint_b, ret2); + sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31))); + PlusHalf = vaddq_f32(ret1, half); + Round = vsubq_f32(PlusHalf, sign); + toint_a = vcvtq_s32_f32(Round); + + sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret2), 31))); + PlusHalf = vaddq_f32(ret2, half); + Round = vsubq_f32(PlusHalf, sign); + toint_b = vcvtq_s32_f32(Round); intInputVal1 = vqmovn_s32(toint_a); intInputVal2 = vqmovn_s32(toint_b); @@ -212,7 +213,6 @@ static inline void volk_32fc_convert_16ic_neon(lv_16sc_t* outputVector, } } -#undef VCVTRQ_S32_F32 #endif /* LV_HAVE_NEONV7 */ #if LV_HAVE_NEONV8 diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h index 62b4fb78..16c79c36 100644 --- a/lib/kernel_tests.h +++ b/lib/kernel_tests.h @@ -126,7 +126,7 @@ std::vector init_test_list(volk_test_params_t test_params) QA(VOLK_INIT_TEST(volk_32f_s32f_convert_32i, test_params.make_tol(1))) QA(VOLK_INIT_TEST(volk_32f_convert_64f, test_params)) QA(VOLK_INIT_TEST(volk_32f_s32f_convert_8i, test_params.make_tol(1))) - QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params)) + QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params.make_tol(1))) QA(VOLK_INIT_TEST(volk_32fc_s32f_power_spectrum_32f, test_params.make_tol(2e-6))) QA(VOLK_INIT_TEST(volk_32fc_x2_square_dist_32f, test_params)) QA(VOLK_INIT_TEST(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, test_params))