GNU Radio 3.4.0 C++ API
volk_32f_stddev_and_mean_32f_x2_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
00002 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007 
00008 #if LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Calculates the standard deviation and mean of the input buffer
00012   \param stddev The calculated standard deviation
00013   \param mean The mean of the input buffer
00014   \param inputBuffer The buffer of points to calculate the std deviation for
00015   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00016 */
00017 static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00018   float returnValue = 0;
00019   float newMean = 0;
00020   if(num_points > 0){
00021     unsigned int number = 0;
00022     const unsigned int sixteenthPoints = num_points / 16;
00023 
00024     const float* aPtr = inputBuffer;
00025     float meanBuffer[4] __attribute__((aligned(128)));
00026     float squareBuffer[4] __attribute__((aligned(128)));
00027 
00028     __m128 accumulator = _mm_setzero_ps();
00029     __m128 squareAccumulator = _mm_setzero_ps();
00030     __m128 aVal1, aVal2, aVal3, aVal4;
00031     __m128 cVal1, cVal2, cVal3, cVal4;
00032     for(;number < sixteenthPoints; number++) {
00033       aVal1 = _mm_load_ps(aPtr); aPtr += 4;   
00034       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
00035       accumulator = _mm_add_ps(accumulator, aVal1);  // accumulator += x
00036 
00037       aVal2 = _mm_load_ps(aPtr); aPtr += 4;
00038       cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
00039       accumulator = _mm_add_ps(accumulator, aVal2);  // accumulator += x
00040 
00041       aVal3 = _mm_load_ps(aPtr); aPtr += 4;
00042       cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
00043       accumulator = _mm_add_ps(accumulator, aVal3);  // accumulator += x
00044 
00045       aVal4 = _mm_load_ps(aPtr); aPtr += 4;
00046       cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
00047       accumulator = _mm_add_ps(accumulator, aVal4);  // accumulator += x
00048 
00049       cVal1 = _mm_or_ps(cVal1, cVal2);
00050       cVal3 = _mm_or_ps(cVal3, cVal4);
00051       cVal1 = _mm_or_ps(cVal1, cVal3);
00052 
00053       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
00054     }
00055     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
00056     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
00057     newMean = meanBuffer[0];
00058     newMean += meanBuffer[1];
00059     newMean += meanBuffer[2];
00060     newMean += meanBuffer[3];
00061     returnValue = squareBuffer[0];
00062     returnValue += squareBuffer[1];
00063     returnValue += squareBuffer[2];
00064     returnValue += squareBuffer[3];
00065   
00066     number = sixteenthPoints * 16;
00067     for(;number < num_points; number++){
00068       returnValue += (*aPtr) * (*aPtr);
00069       newMean += *aPtr++;
00070     }
00071     newMean /= num_points;
00072     returnValue /= num_points;
00073     returnValue -= (newMean * newMean);
00074     returnValue = sqrt(returnValue);
00075   }
00076   *stddev = returnValue;
00077   *mean = newMean;
00078 }
00079 #endif /* LV_HAVE_SSE4_1 */
00080 
00081 #if LV_HAVE_SSE
00082 #include <xmmintrin.h>
00083 /*!
00084   \brief Calculates the standard deviation and mean of the input buffer
00085   \param stddev The calculated standard deviation
00086   \param mean The mean of the input buffer
00087   \param inputBuffer The buffer of points to calculate the std deviation for
00088   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00089 */
00090 static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00091   float returnValue = 0;
00092   float newMean = 0;
00093   if(num_points > 0){
00094     unsigned int number = 0;
00095     const unsigned int quarterPoints = num_points / 4;
00096 
00097     const float* aPtr = inputBuffer;
00098     float meanBuffer[4] __attribute__((aligned(128)));
00099     float squareBuffer[4] __attribute__((aligned(128)));
00100 
00101     __m128 accumulator = _mm_setzero_ps();
00102     __m128 squareAccumulator = _mm_setzero_ps();
00103     __m128 aVal = _mm_setzero_ps();
00104     for(;number < quarterPoints; number++) {
00105       aVal = _mm_load_ps(aPtr);                     // aVal = x
00106       accumulator = _mm_add_ps(accumulator, aVal);  // accumulator += x
00107       aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
00108       squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
00109       aPtr += 4;
00110     }
00111     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
00112     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
00113     newMean = meanBuffer[0];
00114     newMean += meanBuffer[1];
00115     newMean += meanBuffer[2];
00116     newMean += meanBuffer[3];
00117     returnValue = squareBuffer[0];
00118     returnValue += squareBuffer[1];
00119     returnValue += squareBuffer[2];
00120     returnValue += squareBuffer[3];
00121   
00122     number = quarterPoints * 4;
00123     for(;number < num_points; number++){
00124       returnValue += (*aPtr) * (*aPtr);
00125       newMean += *aPtr++;
00126     }
00127     newMean /= num_points;
00128     returnValue /= num_points;
00129     returnValue -= (newMean * newMean);
00130     returnValue = sqrt(returnValue);
00131   }
00132   *stddev = returnValue;
00133   *mean = newMean;
00134 }
00135 #endif /* LV_HAVE_SSE */
00136 
00137 #if LV_HAVE_GENERIC
00138 /*!
00139   \brief Calculates the standard deviation and mean of the input buffer
00140   \param stddev The calculated standard deviation
00141   \param mean The mean of the input buffer
00142   \param inputBuffer The buffer of points to calculate the std deviation for
00143   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00144 */
00145 static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00146   float returnValue = 0;
00147   float newMean = 0;
00148   if(num_points > 0){
00149     const float* aPtr = inputBuffer;
00150     unsigned int number = 0;
00151     
00152     for(number = 0; number < num_points; number++){
00153       returnValue += (*aPtr) * (*aPtr);
00154       newMean += *aPtr++;
00155     }
00156     newMean /= num_points;
00157     returnValue /= num_points;
00158     returnValue -= (newMean * newMean);
00159     returnValue = sqrt(returnValue);
00160   }
00161   *stddev = returnValue;
00162   *mean = newMean;
00163 }
00164 #endif /* LV_HAVE_GENERIC */
00165 
00166 
00167 
00168 
00169 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */