Skip to content

Commit

Permalink
Merge pull request #709 from argilo/fix-truncate-toward-zero
Browse files Browse the repository at this point in the history
Fix truncate-toward-zero distortion
  • Loading branch information
jdemel authored Dec 1, 2023
2 parents 56a893c + 4e12ebf commit 2bb1be0
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 19 deletions.
18 changes: 9 additions & 9 deletions kernels/volk/volk_32f_x2_dot_prod_16i.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static inline void volk_32f_x2_dot_prod_16i_generic(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (int16_t)dotProduct;
*result = (int16_t)rintf(dotProduct);
}

#endif /*LV_HAVE_GENERIC*/
Expand Down Expand Up @@ -141,7 +141,7 @@ static inline void volk_32f_x2_dot_prod_16i_a_sse(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_SSE*/
Expand Down Expand Up @@ -213,7 +213,7 @@ static inline void volk_32f_x2_dot_prod_16i_a_avx2_fma(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX2 && LV_HAVE_FMA*/
Expand Down Expand Up @@ -291,7 +291,7 @@ static inline void volk_32f_x2_dot_prod_16i_a_avx(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX*/
Expand Down Expand Up @@ -370,7 +370,7 @@ static inline void volk_32f_x2_dot_prod_16i_a_avx512f(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX512F*/
Expand Down Expand Up @@ -444,7 +444,7 @@ static inline void volk_32f_x2_dot_prod_16i_u_sse(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_SSE*/
Expand Down Expand Up @@ -516,7 +516,7 @@ static inline void volk_32f_x2_dot_prod_16i_u_avx2_fma(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX2 && lV_HAVE_FMA*/
Expand Down Expand Up @@ -594,7 +594,7 @@ static inline void volk_32f_x2_dot_prod_16i_u_avx(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX*/
Expand Down Expand Up @@ -673,7 +673,7 @@ static inline void volk_32f_x2_dot_prod_16i_u_avx512f(int16_t* result,
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = (short)dotProduct;
*result = (short)rintf(dotProduct);
}

#endif /*LV_HAVE_AVX512F*/
Expand Down
18 changes: 8 additions & 10 deletions kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,

iValue = _mm256_mul_ps(iValue, vScalar);

iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
a = _mm256_cvtps_epi32(iValue);
a = _mm256_packs_epi32(a, a);
a = _mm256_permutevar8x32_epi32(a, idx);
Expand All @@ -113,7 +112,7 @@ volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
number = eighthPoints * 8;
iBufferPtr = &iBuffer[number];
for (; number < num_points; number++) {
*iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
*iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
complexVectorPtr++;
}
}
Expand Down Expand Up @@ -155,16 +154,16 @@ volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer,
iValue = _mm_mul_ps(iValue, vScalar);

_mm_store_ps(floatBuffer, iValue);
*iBufferPtr++ = (int16_t)(floatBuffer[0]);
*iBufferPtr++ = (int16_t)(floatBuffer[1]);
*iBufferPtr++ = (int16_t)(floatBuffer[2]);
*iBufferPtr++ = (int16_t)(floatBuffer[3]);
*iBufferPtr++ = (int16_t)rintf(floatBuffer[0]);
*iBufferPtr++ = (int16_t)rintf(floatBuffer[1]);
*iBufferPtr++ = (int16_t)rintf(floatBuffer[2]);
*iBufferPtr++ = (int16_t)rintf(floatBuffer[3]);
}

number = quarterPoints * 4;
iBufferPtr = &iBuffer[number];
for (; number < num_points; number++) {
*iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
*iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
complexVectorPtr++;
}
}
Expand All @@ -184,7 +183,7 @@ volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* iBuffer,
int16_t* iBufferPtr = iBuffer;
unsigned int number = 0;
for (number = 0; number < num_points; number++) {
*iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
*iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
complexVectorPtr++;
}
}
Expand Down Expand Up @@ -235,7 +234,6 @@ volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,

iValue = _mm256_mul_ps(iValue, vScalar);

iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
a = _mm256_cvtps_epi32(iValue);
a = _mm256_packs_epi32(a, a);
a = _mm256_permutevar8x32_epi32(a, idx);
Expand All @@ -248,7 +246,7 @@ volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
number = eighthPoints * 8;
iBufferPtr = &iBuffer[number];
for (; number < num_points; number++) {
*iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
*iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
complexVectorPtr++;
}
}
Expand Down

0 comments on commit 2bb1be0

Please sign in to comment.