1 #ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
2 #define INCLUDED_volk_16i_s32f_convert_32f_a_H
17 static inline void volk_16i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
19 const unsigned int eighthPoints = num_points / 8;
21 float* outputVectorPtr = outputVector;
22 __m128 invScalar = _mm_set_ps1(1.0/scalar);
23 int16_t* inputPtr = (int16_t*)inputVector;
28 for(;number < eighthPoints; number++){
31 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
34 inputVal2 = _mm_srli_si128(inputVal, 8);
37 inputVal = _mm_cvtepi16_epi32(inputVal);
38 inputVal2 = _mm_cvtepi16_epi32(inputVal2);
40 ret = _mm_cvtepi32_ps(inputVal);
41 ret = _mm_mul_ps(ret, invScalar);
42 _mm_storeu_ps(outputVectorPtr, ret);
45 ret = _mm_cvtepi32_ps(inputVal2);
46 ret = _mm_mul_ps(ret, invScalar);
47 _mm_storeu_ps(outputVectorPtr, ret);
54 number = eighthPoints * 8;
55 for(; number < num_points; number++){
56 outputVector[number] =((float)(inputVector[number])) / scalar;
62 #include <xmmintrin.h>
71 static inline void volk_16i_s32f_convert_32f_a_sse(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
75 float* outputVectorPtr = outputVector;
76 __m128 invScalar = _mm_set_ps1(1.0/scalar);
77 int16_t* inputPtr = (int16_t*)inputVector;
80 for(;number < quarterPoints; number++){
81 ret = _mm_set_ps((
float)(inputPtr[3]), (
float)(inputPtr[2]), (
float)(inputPtr[1]), (
float)(inputPtr[0]));
83 ret = _mm_mul_ps(ret, invScalar);
84 _mm_storeu_ps(outputVectorPtr, ret);
90 number = quarterPoints * 4;
91 for(; number < num_points; number++){
92 outputVector[number] = (float)(inputVector[number]) / scalar;
97 #ifdef LV_HAVE_GENERIC
105 static inline void volk_16i_s32f_convert_32f_a_generic(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
106 float* outputVectorPtr = outputVector;
107 const int16_t* inputVectorPtr = inputVector;
108 unsigned int number = 0;
110 for(number = 0; number < num_points; number++){
111 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;