GNU Radio 3.4.0 C++ API
volk_32f_index_max_16u_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_index_max_16u_a16_H
00002 #define INCLUDED_volk_32f_index_max_16u_a16_H
00003 
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007 
00008 #if LV_HAVE_SSE4_1
00009 #include<smmintrin.h>
00010 
00011 static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
00012   if(num_points > 0){
00013     unsigned int number = 0;
00014     const unsigned int quarterPoints = num_points / 4;
00015 
00016     float* inputPtr = (float*)src0;
00017 
00018     __m128 indexIncrementValues = _mm_set1_ps(4);
00019     __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
00020 
00021     float max = src0[0];
00022     float index = 0;
00023     __m128 maxValues = _mm_set1_ps(max);
00024     __m128 maxValuesIndex = _mm_setzero_ps();
00025     __m128 compareResults;
00026     __m128 currentValues;
00027 
00028     float maxValuesBuffer[4] __attribute__((aligned(16)));
00029     float maxIndexesBuffer[4] __attribute__((aligned(16)));
00030 
00031     for(;number < quarterPoints; number++){
00032 
00033       currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
00034       currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
00035 
00036       compareResults = _mm_cmpgt_ps(maxValues, currentValues);
00037 
00038       maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
00039       maxValues      = _mm_blendv_ps(currentValues, maxValues, compareResults);
00040     }
00041 
00042     // Calculate the largest value from the remaining 4 points
00043     _mm_store_ps(maxValuesBuffer, maxValues);
00044     _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
00045 
00046     for(number = 0; number < 4; number++){
00047       if(maxValuesBuffer[number] > max){
00048         index = maxIndexesBuffer[number];
00049         max = maxValuesBuffer[number];
00050       }
00051     }
00052 
00053     number = quarterPoints * 4;
00054     for(;number < num_points; number++){      
00055       if(src0[number] > max){
00056         index = number;
00057         max = src0[number];
00058       }
00059     }
00060     target[0] = (unsigned int)index;
00061   }
00062 }
00063 
00064 #endif /*LV_HAVE_SSE4_1*/
00065 
00066 #if LV_HAVE_SSE
00067 #include<xmmintrin.h>
00068 
00069 static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) {
00070   if(num_points > 0){
00071     unsigned int number = 0;
00072     const unsigned int quarterPoints = num_points / 4;
00073 
00074     float* inputPtr = (float*)src0;
00075 
00076     __m128 indexIncrementValues = _mm_set1_ps(4);
00077     __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
00078 
00079     float max = src0[0];
00080     float index = 0;
00081     __m128 maxValues = _mm_set1_ps(max);
00082     __m128 maxValuesIndex = _mm_setzero_ps();
00083     __m128 compareResults;
00084     __m128 currentValues;
00085 
00086     float maxValuesBuffer[4] __attribute__((aligned(16)));
00087     float maxIndexesBuffer[4] __attribute__((aligned(16)));
00088 
00089     for(;number < quarterPoints; number++){
00090 
00091       currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
00092       currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
00093 
00094       compareResults = _mm_cmpgt_ps(maxValues, currentValues);
00095 
00096       maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
00097 
00098       maxValues      = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
00099     }
00100 
00101     // Calculate the largest value from the remaining 4 points
00102     _mm_store_ps(maxValuesBuffer, maxValues);
00103     _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
00104 
00105     for(number = 0; number < 4; number++){
00106       if(maxValuesBuffer[number] > max){
00107         index = maxIndexesBuffer[number];
00108         max = maxValuesBuffer[number];
00109       }
00110     }
00111 
00112     number = quarterPoints * 4;
00113     for(;number < num_points; number++){      
00114       if(src0[number] > max){
00115         index = number;
00116         max = src0[number];
00117       }
00118     }
00119     target[0] = (unsigned int)index;
00120   }
00121 }
00122 
00123 #endif /*LV_HAVE_SSE*/
00124 
00125 #if LV_HAVE_GENERIC
00126 static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) {
00127   if(num_points > 0){
00128     float max = src0[0];
00129     unsigned int index = 0;
00130     
00131     int i = 1; 
00132     
00133     for(; i < num_points; ++i) {
00134       
00135       if(src0[i] > max){
00136         index = i;
00137         max = src0[i];
00138       }
00139 
00140     }
00141     target[0] = index;
00142   }
00143 }
00144 
00145 #endif /*LV_HAVE_GENERIC*/
00146 
00147 
00148 #endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/