NFFT  3.3.1
nfsft_benchomp_detail.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 #include <stdio.h>
00020 #include <math.h>
00021 #include <string.h>
00022 #include <stdlib.h>
00023 #include <complex.h>
00024 
00025 #include "nfft3.h"
00026 #include "infft.h"
00027 #ifdef _OPENMP
00028 #include <omp.h>
00029 #endif
00030 
00031 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
00032 {
00033   double re,im;
00034   int k, n, j, t;
00035   nfsft_plan plan;
00036 
00037   fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
00038   *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
00039   *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00040   *f = (C*)nfft_malloc((*M)*sizeof(C));
00041 
00042   memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00043   memset(*f,0U,(*M)*sizeof(C));
00044 
00045 #ifdef _OPENMP
00046   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
00047 #else
00048   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
00049 #endif
00050 
00051   nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00052     NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00053     PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
00054 
00055 #ifdef _OPENMP
00056   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
00057 #else
00058   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
00059 #endif
00060 
00061   for (j=0; j < *M; j++)
00062     for (t=0; t < 2; t++)
00063       fscanf(infile, "%lg", (*x)+2*j+t);
00064 
00065   if (trafo_adjoint==0)
00066   {
00067     for (k = 0; k <= *N; k++)
00068       for (n = -k; n <= k; n++)
00069       {
00070         fscanf(infile, "%lg %lg", &re, &im);
00071         (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
00072       }
00073   }
00074   else
00075   {
00076     for (j=0; j < *M; j++)
00077     {
00078       fscanf(infile, "%lg %lg", &re, &im);
00079       (*f)[j] = re + _Complex_I * im;
00080     }
00081   }
00082 
00083   nfsft_finalize(&plan);
00084 }
00085 
00086 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
00087 {
00088   nfsft_plan plan;
00089   int k, n;
00090 //  int N, M, trafo_adjoint;
00091   int t, j;
00092   ticks t0, t1;
00093   double tt_total, tt_pre;
00094 
00095 //  fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
00096 
00097 /*#ifdef _OPENMP
00098   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
00099 #else
00100   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
00101 #endif*/
00102 
00103   /* precomputation (for fast polynomial transform) */
00104 //  nfsft_precompute(N,1000.0,0U,0U);
00105 
00106   /* Initialize transform plan using the guru interface. All input and output
00107    * arrays are allocated by nfsft_init_guru(). Computations are performed with
00108    * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
00109    * Fourier coefficients is preserved during transformations. The NFFT uses a
00110    * cut-off parameter m = 6. See the NFFT 3 manual for details.
00111    */
00112   nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00113     NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00114     PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
00115 
00116 /*#ifdef _OPENMP
00117   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
00118 #else
00119   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
00120 #endif*/
00121 
00122   for (j=0; j < plan.M_total; j++)
00123   {
00124     for (t=0; t < 2; t++)
00125  //     fscanf(infile, "%lg", plan.x+2*j+t);
00126       plan.x[2*j+t] = x[2*j+t];
00127   }
00128 
00129   if (trafo_adjoint==0)
00130   {
00131     memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00132     for (k = 0; k <= plan.N; k++)
00133       for (n = -k; n <= k; n++)
00134       {
00135 //        fscanf(infile, "%lg %lg", &re, &im);
00136 //        plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
00137         plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
00138       }
00139   }
00140   else
00141   {
00142     for (j=0; j < plan.M_total; j++)
00143     {
00144 //      fscanf(infile, "%lg %lg", &re, &im);
00145 //      plan.f[j] = re + _Complex_I * im;
00146       plan.f[j] = f[j];
00147     }
00148 
00149     memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00150   }
00151 
00152   t0 = getticks();
00153   /* precomputation (for NFFT, node-dependent) */
00154   nfsft_precompute_x(&plan);
00155   t1 = getticks();
00156   tt_pre = nfft_elapsed_seconds(t1,t0);
00157 
00158   if (trafo_adjoint==0)
00159     nfsft_trafo(&plan);
00160   else
00161     nfsft_adjoint(&plan);
00162   t1 = getticks();
00163   tt_total = nfft_elapsed_seconds(t1,t0);
00164 
00165 #ifndef MEASURE_TIME
00166   plan.MEASURE_TIME_t[0] = 0.0;
00167   plan.MEASURE_TIME_t[2] = 0.0;
00168 #endif
00169 
00170 #ifndef MEASURE_TIME_FFTW
00171   plan.MEASURE_TIME_t[1] = 0.0;
00172 #endif
00173 
00174   printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
00175 
00177   nfsft_finalize(&plan);
00178 }
00179 
00180 int main(int argc, char **argv)
00181 {
00182   int m, nfsft_flags, psi_flags;
00183   int nrepeat;
00184   int trafo_adjoint, N, M, r;
00185   double *x;
00186   C *f_hat, *f;
00187 #ifdef _OPENMP
00188   int nthreads;
00189 
00190   if (argc != 6)
00191     return 1;
00192 
00193   nthreads = atoi(argv[5]);
00194   fftw_init_threads();
00195   omp_set_num_threads(nthreads);
00196 #else
00197   if (argc != 5)
00198     return 1;
00199 #endif
00200 
00201   m = atoi(argv[1]);
00202   nfsft_flags = atoi(argv[2]);
00203   psi_flags = atoi(argv[3]);
00204   nrepeat = atoi(argv[4]);
00205 
00206   bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
00207 
00208   /* precomputation (for fast polynomial transform) */
00209   nfsft_precompute(N,1000.0,0U,0U);
00210 
00211   for (r = 0; r < nrepeat; r++)
00212     bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
00213 
00214   return 0;
00215 }