![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 #include <stdio.h> 00019 #include <stdlib.h> 00020 #include <string.h> 00021 #include <unistd.h> 00022 00023 #include "config.h" 00024 00025 #include "nfft3.h" 00026 #include "infft.h" 00027 00028 #define NREPEAT 5 00029 00030 #if defined(_WIN32) || defined(_WIN64) 00031 const char *CMD_CREATEDATASET = "nfsft_benchomp_createdataset.exe"; 00032 const char *CMD_DETAIL_SINGLE = "nfsft_benchomp_detail_single.exe"; 00033 const char *CMD_DETAIL_THREADS = "nfsft_benchomp_detail_threads.exe"; 00034 #else 00035 const char *CMD_CREATEDATASET = "./nfsft_benchomp_createdataset"; 00036 const char *CMD_DETAIL_SINGLE = "./nfsft_benchomp_detail_single"; 00037 const char *CMD_DETAIL_THREADS = "./nfsft_benchomp_detail_threads"; 00038 #endif 00039 00040 static FILE* file_out_tex = NULL; 00041 00042 int get_nthreads_array(int **arr) 00043 { 00044 int max_threads = X(get_num_threads)(); 00045 int alloc_num = 2; 00046 int k; 00047 int ret_number = 0; 00048 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0; 00049 00050 if (max_threads <= 5) 00051 { 00052 *arr = (int*) malloc(max_threads*sizeof(int)); 00053 for (k = 0; k < max_threads; k++) 00054 *(*arr + k) = k+1; 00055 return max_threads; 00056 } 00057 00058 for (k = 1; k <= max_threads; k*=2, alloc_num++); 00059 00060 *arr = (int*) malloc(alloc_num*sizeof(int)); 00061 00062 for (k = 1; k <= max_threads; k*=2) 00063 { 00064 if (k != max_threads && 2*k > max_threads && max_threads_pw2) 00065 { 00066 *(*arr + ret_number) = max_threads/2; 00067 ret_number++; 00068 } 00069 00070 *(*arr + ret_number) = k; 00071 ret_number++; 00072 00073 if (k != max_threads && 2*k > max_threads) 00074 { 00075 *(*arr + ret_number) = max_threads; 00076 ret_number++; 00077 break; 00078 } 00079 } 00080 00081 return ret_number; 00082 } 00083 00084 00085 void check_result_value(const int val, const int ok, const char *msg) 00086 { 00087 if (val != ok) 00088 { 00089 fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok); 00090 00091 exit(1); 00092 } 00093 } 00094 00095 void run_test_create(int trafo_adjoint, int N, int M) 00096 { 00097 char cmd[1025]; 00098 00099 snprintf(cmd, 1024, "%s %d %d %d > nfsft_benchomp_test.data", CMD_CREATEDATASET, trafo_adjoint, N, M); 00100 fprintf(stderr, "%s\n", cmd); 00101 check_result_value(system(cmd), 0, "createdataset"); 00102 } 00103 00104 void run_test_init_output() 00105 { 00106 FILE *f = fopen("nfsft_benchomp_test.result", "w"); 00107 if (f!= NULL) 00108 fclose(f); 00109 } 00110 00111 typedef struct 00112 { 00113 int trafo_adjoint; 00114 int N; 00115 int M; 00116 int m; 00117 int nfsft_flags; 00118 int psi_flags; 00119 } s_param; 00120 00121 typedef struct 00122 { 00123 double avg; 00124 double min; 00125 double max; 00126 } s_resval; 00127 00128 typedef struct 00129 { 00130 int nthreads; 00131 s_resval resval[6]; 00132 } s_result; 00133 00134 typedef struct 00135 { 00136 s_param param; 00137 s_result *results; 00138 int nresults; 00139 } s_testset; 00140 00141 void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads) 00142 { 00143 FILE *f; 00144 char cmd[1025]; 00145 int r,t; 00146 00147 for (t = 0; t < 6; t++) 00148 { 00149 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0; 00150 } 00151 00152 if (nthreads < 2) 00153 snprintf(cmd, 1024, "%s %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_SINGLE, m, nfsft_flags, psi_flags, nrepeat); 00154 else 00155 snprintf(cmd, 1024, "%s %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_THREADS, m, nfsft_flags, psi_flags, nrepeat, nthreads); 00156 fprintf(stderr, "%s\n", cmd); 00157 00158 check_result_value(system(cmd), 0, cmd); 00159 00160 f = fopen("nfsft_benchomp_test.out", "r"); 00161 for (r = 0; r < nrepeat; r++) 00162 { 00163 int retval; 00164 double v[6]; 00165 // FILE *f; 00166 // check_result_value(system(cmd), 0, cmd); 00167 // f = fopen("nfsft_benchomp_test.out", "r"); 00168 retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5); 00169 check_result_value(retval, 6, "read nfsft_benchomp_test.out"); 00170 // fclose(f); 00171 // fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]); 00172 for (t = 0; t < 6; t++) 00173 { 00174 res[t].avg += v[t]; 00175 if (res[t].min > v[t]) 00176 res[t].min = v[t]; 00177 if (res[t].max < v[t]) 00178 res[t].max = v[t]; 00179 } 00180 } 00181 fclose(f); 00182 00183 for (t = 0; t < 6; t++) 00184 res[t].avg /= nrepeat; 00185 00186 fprintf(stderr, "%d %d: ", nthreads, nrepeat); 00187 for (t = 0; t < 6; t++) 00188 fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max); 00189 fprintf(stderr, "\n"); 00190 } 00191 00192 const char *get_psi_string(int flags) 00193 { 00194 if (flags & PRE_PSI) 00195 return "prepsi"; 00196 else if (flags & PRE_ONE_PSI) 00197 return "unknownPSI"; 00198 00199 return "nopsi"; 00200 } 00201 const char *get_sort_string(int flags) 00202 { 00203 if (flags & NFFT_SORT_NODES) 00204 return "sorted"; 00205 00206 return "unsorted"; 00207 } 00208 00209 const char *get_adjoint_omp_string(int flags) 00210 { 00211 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00212 return "blockwise"; 00213 00214 return ""; 00215 } 00216 00217 #define MASK_TA (1U<<1) 00218 #define MASK_N (1U<<2) 00219 #define MASK_M (1U<<4) 00220 #define MASK_WINM (1U<<5) 00221 #define MASK_FLAGS_PSI (1U<<6) 00222 #define MASK_FLAGS_SORT (1U<<7) 00223 #define MASK_FLAGS_BW (1U<<8) 00224 #define MASK_FLAGS_FPT (1U<<9) 00225 00226 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets) 00227 { 00228 int t; 00229 unsigned int mask = 0; 00230 00231 if (ntestsets < 2) 00232 return 0; 00233 00234 for (t = 1; t < ntestsets; t++) 00235 { 00236 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint) 00237 mask |= MASK_TA; 00238 if (testsets[t-1].param.N != testsets[t].param.N) 00239 mask |= MASK_N; 00240 if (testsets[t-1].param.M != testsets[t].param.M) 00241 mask |= MASK_M; 00242 if (testsets[t-1].param.m != testsets[t].param.m) 00243 mask |= MASK_WINM; 00244 if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI)) 00245 mask |= MASK_FLAGS_PSI; 00246 if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES)) 00247 mask |= MASK_FLAGS_SORT; 00248 if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT)) 00249 mask |= MASK_FLAGS_BW; 00250 if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT)) 00251 mask |= MASK_FLAGS_FPT; 00252 } 00253 00254 return mask; 00255 } 00256 00257 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask) 00258 { 00259 unsigned int mask = ~diff_mask; 00260 int offset = 0; 00261 int len; 00262 00263 len = snprintf(outstr, maxlen, "%s", hostname); 00264 if (len < 0 || len+offset >= maxlen-1) return; 00265 offset += len; 00266 00267 if (mask & MASK_TA) 00268 { 00269 len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top"); 00270 if (len < 0 || len+offset >= maxlen-1) return; 00271 offset += len; 00272 } 00273 00274 if (mask & MASK_N) 00275 { 00276 len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N); 00277 if (len < 0 || len+offset >= maxlen-1) return; 00278 offset += len; 00279 } 00280 00281 if (mask & MASK_M) 00282 { 00283 len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M); 00284 if (len < 0 || len+offset >= maxlen-1) return; 00285 offset += len; 00286 } 00287 00288 if (mask & MASK_WINM) 00289 { 00290 len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m); 00291 if (len < 0 || len+offset >= maxlen-1) return; 00292 offset += len; 00293 } 00294 00295 if (mask & MASK_FLAGS_PSI) 00296 { 00297 len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags)); 00298 if (len < 0 || len+offset >= maxlen-1) return; 00299 offset += len; 00300 } 00301 00302 if (mask & MASK_FLAGS_SORT) 00303 { 00304 len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags)); 00305 if (len < 0 || len+offset >= maxlen-1) return; 00306 offset += len; 00307 } 00308 00309 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0) 00310 { 00311 len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags)); 00312 if (len < 0 || len+offset >= maxlen-1) return; 00313 offset += len; 00314 } 00315 00316 if (mask & MASK_FLAGS_FPT) 00317 { 00318 len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : ""); 00319 if (len < 0 || len+offset >= maxlen-1) return; 00320 offset += len; 00321 } 00322 00323 } 00324 00325 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref) 00326 { 00327 int i, t; 00328 char hostname[1025]; 00329 char plottitle[1025]; 00330 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets); 00331 00332 #ifdef HAVE_GETHOSTNAME 00333 if (gethostname(hostname, 1024) != 0) 00334 #endif 00335 strncpy(hostname, "unnamed", 1024); 00336 00337 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask); 00338 00339 fprintf(out, "\\begin{tikzpicture}\n"); 00340 fprintf(out, "\\begin{axis}["); 00341 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, "); 00342 fprintf(out, " title={%s}", plottitle); 00343 fprintf(out, " ]\n"); 00344 00345 for (t = 0; t < ntestsets; t++) 00346 { 00347 s_testset testset = testsets[t]; 00348 fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags)); 00349 fprintf(stderr, "\n"); 00350 00351 fprintf(out, "\\addplot coordinates {"); 00352 for (i = 0; i < testset.nresults; i++) 00353 if (use_tref == 1) 00354 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg); 00355 else 00356 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg); 00357 fprintf(out, "};\n"); 00358 00359 for (i = 0; i < testset.nresults; i++) 00360 if (use_tref == 1) 00361 fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg); 00362 else 00363 fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg); 00364 fprintf(stderr, "\n\n"); 00365 } 00366 00367 fprintf(out, "\\legend{{"); 00368 for (t = 0; t < ntestsets; t++) 00369 { 00370 char title[256]; 00371 if (t > 0) 00372 fprintf(out, "},{"); 00373 get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask)); 00374 fprintf(out, "%s", title); 00375 } 00376 fprintf(out, "}}\n"); 00377 fprintf(out, "\\end{axis}\n"); 00378 fprintf(out, "\\end{tikzpicture}\n"); 00379 fprintf(out, "\n\n"); 00380 00381 fflush(out); 00382 } 00383 00384 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref) 00385 { 00386 double tref = 1.0/0.0; 00387 int t, k; 00388 00389 if (use_tref == 1) 00390 for (t = 0; t < ntestsets; t++) 00391 for (k = 0; k < testsets[t].nresults; k++) 00392 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref) 00393 tref = testsets[t].results[k].resval[5].avg; 00394 00395 print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref); 00396 } 00397 00398 void print_output_histo_PENRT(FILE *out, s_testset testset) 00399 { 00400 int i, size = testset.nresults; 00401 char hostname[1025]; 00402 00403 #ifdef HAVE_GETHOSTNAME 00404 if (gethostname(hostname, 1024) != 0) 00405 #endif 00406 strncpy(hostname, "unnamed", 1024); 00407 00408 fprintf(out, "\\begin{tikzpicture}\n"); 00409 fprintf(out, "\\begin{axis}["); 00410 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, "); 00411 fprintf(out, "symbolic x coords={"); 00412 for (i = 0; i < size; i++) 00413 if (i > 0) 00414 fprintf(out, ",%d", testset.results[i].nthreads); 00415 else 00416 fprintf(out, "%d", testset.results[i].nthreads); 00417 00418 fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, "); 00419 fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags)); 00420 fprintf(out, " ]\n"); 00421 fprintf(out, "\\addplot coordinates {"); 00422 for (i = 0; i < size; i++) 00423 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg); 00424 fprintf(out, "};\n"); 00425 00426 fprintf(out, "\\addplot coordinates {"); 00427 for (i = 0; i < size; i++) 00428 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg); 00429 fprintf(out, "};\n"); 00430 00431 fprintf(out, "\\addplot coordinates {"); 00432 for (i = 0; i < size; i++) 00433 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg); 00434 fprintf(out, "};\n"); 00435 00436 fprintf(out, "\\addplot coordinates {"); 00437 for (i = 0; i < size; i++) 00438 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg); 00439 fprintf(out, "};\n"); 00440 00441 fprintf(out, "\\addplot coordinates {"); 00442 for (i = 0; i < size; i++) 00443 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg); 00444 fprintf(out, "};\n"); 00445 fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top"); 00446 fprintf(out, "\\end{axis}\n"); 00447 fprintf(out, "\\end{tikzpicture}\n"); 00448 fprintf(out, "\n\n"); 00449 00450 fflush(out); 00451 } 00452 00453 void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size) 00454 { 00455 int i; 00456 testset->param.trafo_adjoint = trafo_adjoint; 00457 testset->param.N = N; 00458 testset->param.M = M; 00459 testset->param.m = m; 00460 testset->param.nfsft_flags = nfsft_flags; 00461 testset->param.psi_flags = psi_flags; 00462 00463 testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result)); 00464 testset->nresults = n_threads_array_size; 00465 00466 run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M); 00467 for (i = 0; i < n_threads_array_size; i++) 00468 { 00469 testset->results[i].nthreads = nthreads_array[i]; 00470 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]); 00471 } 00472 00473 } 00474 00475 void test1(int *nthreads_array, int n_threads_array_size, int m) 00476 { 00477 s_testset testsets[4]; 00478 00479 run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00480 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00481 print_output_histo_PENRT(file_out_tex, testsets[0]); 00482 #endif 00483 00484 run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00485 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00486 print_output_histo_PENRT(file_out_tex, testsets[1]); 00487 #endif 00488 00489 print_output_speedup_total(file_out_tex, testsets, 2, 0); 00490 00491 run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00492 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00493 print_output_histo_PENRT(file_out_tex, testsets[2]); 00494 #endif 00495 00496 run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00497 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00498 print_output_histo_PENRT(file_out_tex, testsets[3]); 00499 #endif 00500 00501 print_output_speedup_total(file_out_tex, testsets+2, 2, 0); 00502 } 00503 00504 int main(int argc, char** argv) 00505 { 00506 int *nthreads_array; 00507 int n_threads_array_size = get_nthreads_array(&nthreads_array); 00508 int k; 00509 00510 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW) 00511 fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n"); 00512 fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n"); 00513 fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n"); 00514 fprintf(stderr, "and run \"make clean all\"\n\n"); 00515 #endif 00516 00517 for (k = 0; k < n_threads_array_size; k++) 00518 fprintf(stderr, "%d ", nthreads_array[k]); 00519 fprintf(stderr, "\n"); 00520 00521 file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w"); 00522 00523 test1(nthreads_array, n_threads_array_size, 2); 00524 test1(nthreads_array, n_threads_array_size, 4); 00525 test1(nthreads_array, n_threads_array_size, 6); 00526 test1(nthreads_array, n_threads_array_size, 8); 00527 00528 fclose(file_out_tex); 00529 00530 return 0; 00531 }