fast_convert.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * fast_convert.h - Quick ways to convert floating point numbers to integers
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2009 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  *
00025  * $Id: fast_convert.h,v 1.7 2009/04/18 03:18:41 steveu Exp $
00026  */
00027 
00028 #if !defined(_SPANDSP_FAST_CONVERT_H_)
00029 #define _SPANDSP_FAST_CONVERT_H_
00030 
00031 #if defined(__cplusplus)
00032 extern "C"
00033 {
00034 #endif
00035 
00036 /* The following code, to handle issues with lrint() and lrintf() on various
00037  * platforms, is adapted from similar code in libsndfile, which is:
00038  *
00039  * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
00040  *
00041  * This program is free software; you can redistribute it and/or modify
00042  * it under the terms of the GNU Lesser General Public License as published by
00043  * the Free Software Foundation; either version 2.1 of the License, or
00044  * (at your option) any later version.
00045  *
00046  * This program is distributed in the hope that it will be useful,
00047  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00048  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00049  * GNU Lesser General Public License for more details.
00050  */
00051 
00052 /*
00053  *    On Intel Pentium processors (especially PIII and probably P4), converting
00054  *    from float to int is very slow. To meet the C specs, the code produced by
00055  *    most C compilers targeting Pentium needs to change the FPU rounding mode
00056  *    before the float to int conversion is performed.
00057  *
00058  *    Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
00059  *    is this flushing of the pipeline which is so slow.
00060  *
00061  *    Fortunately the ISO C99 specification defines the functions lrint, lrintf,
00062  *    llrint and llrintf which fix this problem as a side effect.
00063  *
00064  *    On Unix-like systems, the configure process should have detected the
00065  *    presence of these functions. If they weren't found we have to replace them
00066  *    here with a standard C cast.
00067  */
00068 
00069 /*
00070  *    The C99 prototypes for these functions are as follows:
00071  *
00072  *        int rintf(float x);
00073  *        int rint(double x);
00074  *        long int lrintf(float x);
00075  *        long int lrint(double x);
00076  *        long long int llrintf(float x);
00077  *        long long int llrint(double x);
00078  *
00079  *    The presence of the required functions are detected during the configure
00080  *    process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
00081  *    the config file.
00082  */
00083 
00084 #if defined(__CYGWIN__)
00085 #if !defined(__cplusplus)
00086     /*
00087      *    CYGWIN has lrint and lrintf functions, but they are slow and buggy:
00088      *        http://sourceware.org/ml/cygwin/2005-06/msg00153.html
00089      *        http://sourceware.org/ml/cygwin/2005-09/msg00047.html
00090      *    The latest version of cygwin seems to have made no effort to fix this.
00091      *    These replacement functions (pulled from the Public Domain MinGW
00092      *    math.h header) replace the native versions.
00093      */
00094     static __inline__ long int lrint(double x)
00095     {
00096         long int retval;
00097 
00098         __asm__ __volatile__
00099         (
00100             "fistpl %0"
00101             : "=m" (retval)
00102             : "t" (x)
00103             : "st"
00104         );
00105 
00106         return retval;
00107     }
00108 
00109     static __inline__ long int lrintf(float x)
00110     {
00111         long int retval;
00112 
00113         __asm__ __volatile__
00114         (
00115             "fistpl %0"
00116             : "=m" (retval)
00117             : "t" (x)
00118             : "st"
00119         );
00120         return retval;
00121     }
00122 #endif
00123 
00124     /* The fastest way to convert is the equivalent of lrint() */
00125     static __inline__ long int lfastrint(double x)
00126     {
00127         long int retval;
00128 
00129         __asm__ __volatile__
00130         (
00131             "fistpl %0"
00132             : "=m" (retval)
00133             : "t" (x)
00134             : "st"
00135         );
00136 
00137         return retval;
00138     }
00139 
00140     static __inline__ long int lfastrintf(float x)
00141     {
00142         long int retval;
00143 
00144         __asm__ __volatile__
00145         (
00146             "fistpl %0"
00147             : "=m" (retval)
00148             : "t" (x)
00149             : "st"
00150         );
00151         return retval;
00152     }
00153 #elif defined(__GNUC__)
00154 
00155 #if defined(__i386__)
00156     /* These routines are guaranteed fast on an i386 machine. Using the built in
00157        lrint() and lrintf() should be similar, but they may not always be enabled.
00158        Sometimes, especially with "-O0", you might get slow calls to routines. */
00159     static __inline__ long int lfastrint(double x)
00160     {
00161         long int retval;
00162 
00163         __asm__ __volatile__
00164         (
00165             "fistpl %0"
00166             : "=m" (retval)
00167             : "t" (x)
00168             : "st"
00169         );
00170 
00171         return retval;
00172     }
00173 
00174     static __inline__ long int lfastrintf(float x)
00175     {
00176         long int retval;
00177 
00178         __asm__ __volatile__
00179         (
00180             "fistpl %0"
00181             : "=m" (retval)
00182             : "t" (x)
00183             : "st"
00184         );
00185         return retval;
00186     }
00187 #elif defined(__x86_64__)
00188     /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
00189        double or float to an int. It looks like the design on the x86_64 took account
00190        of the default behaviour specified for C. */
00191     static __inline__ long int lfastrint(double x)
00192     {
00193         return (long int) (x);
00194     }
00195 
00196     static __inline__ long int lfastrintf(float x)
00197     {
00198         return (long int) (x);
00199     }
00200 #elif defined(__ppc__)  ||   defined(__powerpc__)
00201     static __inline__ long int lfastrint(register double x)
00202     {
00203         int res[2];
00204 
00205         __asm__ __volatile__
00206         (
00207             "fctiw %1, %1\n\t"
00208             "stfd %1, %0"
00209             : "=m" (res)    /* Output */
00210             : "f" (x)       /* Input */
00211             : "memory"
00212         );
00213 
00214         return res[1];
00215     }
00216 
00217     static __inline__ long int lfastrintf(register float x)
00218     {
00219         int res[2];
00220 
00221         __asm__ __volatile__
00222         (
00223             "fctiw %1, %1\n\t"
00224             "stfd %1, %0"
00225             : "=m" (res)    /* Output */
00226             : "f" (x)       /* Input */
00227             : "memory"
00228         );
00229 
00230         return res[1];
00231     }
00232 #else
00233     /* Fallback routines, for unrecognised platforms */
00234     static __inline__ long int lfastrint(double x)
00235     {
00236         return (long int) x;
00237     }
00238 
00239     static __inline__ long int lfastrintf(float x)
00240     {
00241         return (long int) x;
00242     }
00243 #endif
00244 
00245 #elif defined(_M_IX86)
00246     /* Visual Studio i386 */
00247     /*
00248      *    Win32 doesn't seem to have the lrint() and lrintf() functions.
00249      *    Therefore implement inline versions of these functions here.
00250      */
00251 
00252     __inline long int lrint(double x)
00253     {
00254         long int i;
00255 
00256         _asm
00257         {
00258             fld x
00259             fistp i
00260         };
00261         return i;
00262     }
00263 
00264     __inline long int lrintf(float x)
00265     {
00266         long int i;
00267 
00268         _asm
00269         {
00270             fld x
00271             fistp i
00272         };
00273         return i;
00274     }
00275 
00276     __inline float rintf(float flt)
00277     {
00278         _asm
00279         {       fld flt
00280                 frndint
00281         }
00282     }
00283 
00284     __inline double rint(double dbl)
00285     {
00286         _asm 
00287         {
00288             fld dbl
00289             frndint
00290         }
00291     }
00292 
00293     __inline long int lfastrint(double x)
00294     {
00295         long int i;
00296 
00297         _asm
00298         {
00299             fld x
00300             fistp i
00301         };
00302         return i;
00303     }
00304 
00305     __inline long int lfastrintf(float x)
00306     {
00307         long int i;
00308 
00309         _asm
00310         {
00311             fld x
00312             fistp i
00313         };
00314         return i;
00315     }
00316 #elif defined(_M_X64)
00317     /* Visual Studio x86_64 */
00318     /* x86_64 machines will do best with a simple assignment. */
00319 #include <intrin.h>
00320 
00321     __inline long int lrint(double x)
00322     {
00323                 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
00324     }
00325 
00326     __inline long int lrintf(float x)
00327     {
00328                 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
00329     }
00330 
00331     __inline long int lfastrint(double x)
00332     {
00333         return (long int) (x);
00334     }
00335 
00336     __inline long int lfastrintf(float x)
00337     {
00338         return (long int) (x);
00339     }
00340 #elif defined(__MWERKS__)  &&  defined(macintosh)
00341     /* This MacOS 9 solution was provided by Stephane Letz */
00342 
00343     long int __inline__ lfastrint(register double x)
00344     {
00345         long int res[2];
00346 
00347         asm
00348         {
00349             fctiw x, x
00350             stfd x, res
00351         }
00352         return res[1];
00353     }
00354 
00355     long int __inline__ lfastrintf(register float x)
00356     {
00357         long int res[2];
00358 
00359         asm
00360         {
00361             fctiw x, x
00362             stfd x, res
00363         }
00364         return res[1];
00365     }
00366 #elif defined(__MACH__)  &&  defined(__APPLE__)  &&  (defined(__ppc__)  ||  defined(__powerpc__))
00367     /* For Apple Mac OS/X - do recent versions still need this? */
00368 
00369     static __inline__ long int lfastrint(register double x)
00370     {
00371         int res[2];
00372 
00373         __asm__ __volatile__
00374         (
00375             "fctiw %1, %1\n\t"
00376             "stfd %1, %0"
00377             : "=m" (res)    /* Output */
00378             : "f" (x)       /* Input */
00379             : "memory"
00380         );
00381 
00382         return res[1];
00383     }
00384 
00385     static __inline__ long int lfastrintf(register float x)
00386     {
00387         int res[2];
00388 
00389         __asm__ __volatile__
00390         (
00391             "fctiw %1, %1\n\t"
00392             "stfd %1, %0"
00393             : "=m" (res)    /* Output */
00394             : "f" (x)       /* Input */
00395             : "memory"
00396         );
00397 
00398         return res[1];
00399     }
00400 #else
00401     /* There is nothing else to do, but use a simple casting operation, instead of a real
00402        rint() type function. Since we are only trying to use rint() to speed up conversions,
00403        the accuracy issues related to changing the rounding scheme are of little concern
00404        to us. */
00405 
00406     #if !defined(__sgi)
00407         #warning "No usable lrint() and lrintf() functions available."
00408         #warning "Replacing these functions with a simple C cast."
00409     #endif
00410 
00411     static __inline__ long int lrint(double x)
00412     {
00413         return (long int) (x);
00414     }
00415 
00416     static __inline__ long int lrintf(float x)
00417     {
00418         return (long int) (x);
00419     }
00420 
00421     static __inline__ long int lfastrint(double x)
00422     {
00423         return (long int) (x);
00424     }
00425 
00426     static __inline__ long int lfastrintf(float x)
00427     {
00428         return (long int) (x);
00429     }
00430 #endif
00431 
00432 #if defined(__cplusplus)
00433 }
00434 #endif
00435 
00436 #endif
00437 
00438 /*- End of file ------------------------------------------------------------*/

Generated on Tue Aug 4 03:35:57 2009 for spandsp by  doxygen 1.5.9