fix: correct unpacking of q_float in to_float_iq function

This commit is contained in:
BENEDEK László 2025-05-09 15:07:14 +02:00
parent 125cd1e818
commit 8bf5a26bd8

View File

@ -48,7 +48,7 @@ __always_inline void to_float_iq(const int16_t* i_c, const int16_t* q_c, float*
q_float_hi = _mm256_mul_ps(q_float_hi, scale); q_float_hi = _mm256_mul_ps(q_float_hi, scale);
// interleave and store // interleave and store
__m256 interleaved_lo0 = _mm256_unpacklo_ps(i_float_lo, q_float_hi); __m256 interleaved_lo0 = _mm256_unpacklo_ps(i_float_lo, q_float_lo);
__m256 interleaved_lo1 = _mm256_unpackhi_ps(i_float_lo, q_float_lo); __m256 interleaved_lo1 = _mm256_unpackhi_ps(i_float_lo, q_float_lo);
__m256 interleaved_hi0 = _mm256_unpacklo_ps(i_float_hi, q_float_hi); __m256 interleaved_hi0 = _mm256_unpacklo_ps(i_float_hi, q_float_hi);
@ -58,19 +58,6 @@ __always_inline void to_float_iq(const int16_t* i_c, const int16_t* q_c, float*
_mm256_storeu_ps(&output[i * 2 + 8], _mm256_permute2f128_ps(interleaved_lo0, interleaved_lo1, 0x31)); _mm256_storeu_ps(&output[i * 2 + 8], _mm256_permute2f128_ps(interleaved_lo0, interleaved_lo1, 0x31));
_mm256_storeu_ps(&output[i * 2 + 16], _mm256_permute2f128_ps(interleaved_hi0, interleaved_hi1, 0x20)); _mm256_storeu_ps(&output[i * 2 + 16], _mm256_permute2f128_ps(interleaved_hi0, interleaved_hi1, 0x20));
_mm256_storeu_ps(&output[i * 2 + 24], _mm256_permute2f128_ps(interleaved_hi0, interleaved_hi1, 0x31)); _mm256_storeu_ps(&output[i * 2 + 24], _mm256_permute2f128_ps(interleaved_hi0, interleaved_hi1, 0x31));
// float i_output[16];
// float q_output[16];
// _mm256_storeu_ps(&i_output[i], i_float_lo);
// _mm256_storeu_ps(&i_output[i + 8], i_float_hi);
// _mm256_storeu_ps(&q_output[i], q_float_lo);
// _mm256_storeu_ps(&q_output[i + 8], q_float_hi);
// for (int j = 0; j < 16; j++) {
// output[(i + j) * 2 + 0] = i_output[j];
// output[(i + j) * 2 + 1] = q_output[j];
// }
} }
for (; i < len; ++i) { for (; i < len; ++i) {