GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_32f_index_max_16u_a16_H 00002 #define INCLUDED_volk_32f_index_max_16u_a16_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 00008 #if LV_HAVE_SSE4_1 00009 #include<smmintrin.h> 00010 00011 static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { 00012 if(num_points > 0){ 00013 unsigned int number = 0; 00014 const unsigned int quarterPoints = num_points / 4; 00015 00016 float* inputPtr = (float*)src0; 00017 00018 __m128 indexIncrementValues = _mm_set1_ps(4); 00019 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); 00020 00021 float max = src0[0]; 00022 float index = 0; 00023 __m128 maxValues = _mm_set1_ps(max); 00024 __m128 maxValuesIndex = _mm_setzero_ps(); 00025 __m128 compareResults; 00026 __m128 currentValues; 00027 00028 float maxValuesBuffer[4] __attribute__((aligned(16))); 00029 float maxIndexesBuffer[4] __attribute__((aligned(16))); 00030 00031 for(;number < quarterPoints; number++){ 00032 00033 currentValues = _mm_load_ps(inputPtr); inputPtr += 4; 00034 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); 00035 00036 compareResults = _mm_cmpgt_ps(maxValues, currentValues); 00037 00038 maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults); 00039 maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults); 00040 } 00041 00042 // Calculate the largest value from the remaining 4 points 00043 _mm_store_ps(maxValuesBuffer, maxValues); 00044 _mm_store_ps(maxIndexesBuffer, maxValuesIndex); 00045 00046 for(number = 0; number < 4; number++){ 00047 if(maxValuesBuffer[number] > max){ 00048 index = maxIndexesBuffer[number]; 00049 max = maxValuesBuffer[number]; 00050 } 00051 } 00052 00053 number = quarterPoints * 4; 00054 for(;number < num_points; number++){ 00055 if(src0[number] > max){ 00056 index = number; 00057 max = src0[number]; 00058 } 00059 } 00060 target[0] = (unsigned int)index; 00061 } 00062 } 00063 00064 #endif /*LV_HAVE_SSE4_1*/ 00065 00066 #if LV_HAVE_SSE 00067 #include<xmmintrin.h> 00068 00069 static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { 00070 if(num_points > 0){ 00071 unsigned int number = 0; 00072 const unsigned int quarterPoints = num_points / 4; 00073 00074 float* inputPtr = (float*)src0; 00075 00076 __m128 indexIncrementValues = _mm_set1_ps(4); 00077 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); 00078 00079 float max = src0[0]; 00080 float index = 0; 00081 __m128 maxValues = _mm_set1_ps(max); 00082 __m128 maxValuesIndex = _mm_setzero_ps(); 00083 __m128 compareResults; 00084 __m128 currentValues; 00085 00086 float maxValuesBuffer[4] __attribute__((aligned(16))); 00087 float maxIndexesBuffer[4] __attribute__((aligned(16))); 00088 00089 for(;number < quarterPoints; number++){ 00090 00091 currentValues = _mm_load_ps(inputPtr); inputPtr += 4; 00092 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); 00093 00094 compareResults = _mm_cmpgt_ps(maxValues, currentValues); 00095 00096 maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes)); 00097 00098 maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues)); 00099 } 00100 00101 // Calculate the largest value from the remaining 4 points 00102 _mm_store_ps(maxValuesBuffer, maxValues); 00103 _mm_store_ps(maxIndexesBuffer, maxValuesIndex); 00104 00105 for(number = 0; number < 4; number++){ 00106 if(maxValuesBuffer[number] > max){ 00107 index = maxIndexesBuffer[number]; 00108 max = maxValuesBuffer[number]; 00109 } 00110 } 00111 00112 number = quarterPoints * 4; 00113 for(;number < num_points; number++){ 00114 if(src0[number] > max){ 00115 index = number; 00116 max = src0[number]; 00117 } 00118 } 00119 target[0] = (unsigned int)index; 00120 } 00121 } 00122 00123 #endif /*LV_HAVE_SSE*/ 00124 00125 #if LV_HAVE_GENERIC 00126 static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { 00127 if(num_points > 0){ 00128 float max = src0[0]; 00129 unsigned int index = 0; 00130 00131 int i = 1; 00132 00133 for(; i < num_points; ++i) { 00134 00135 if(src0[i] > max){ 00136 index = i; 00137 max = src0[i]; 00138 } 00139 00140 } 00141 target[0] = index; 00142 } 00143 } 00144 00145 #endif /*LV_HAVE_GENERIC*/ 00146 00147 00148 #endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/