![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00025 #include "config.h" 00026 00027 #include <stdlib.h> 00028 #include <math.h> 00029 #ifdef HAVE_COMPLEX_H 00030 #include <complex.h> 00031 #endif 00032 00033 #include "nfft3.h" 00034 #include "fastsum.h" 00035 #include "infft.h" 00036 00037 // Required for test if (ths->k == one_over_x) 00038 #include "kernels.h" 00039 00046 static int max_i(int a, int b) 00047 { 00048 return a >= b ? a : b; 00049 } 00050 00052 static R fak(int n) 00053 { 00054 if (n <= 1) 00055 return K(1.0); 00056 else 00057 return (R)(n) * fak(n - 1); 00058 } 00059 00061 static R binom(int n, int m) 00062 { 00063 return fak(n) / fak(m) / fak(n - m); 00064 } 00065 00067 static R BasisPoly(int m, int r, R xx) 00068 { 00069 int k; 00070 R sum = K(0.0); 00071 00072 for (k = 0; k <= m - r; k++) 00073 { 00074 sum += binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k); 00075 } 00076 return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1)) 00077 / (R)(1 << (m + 1)) / fak(r); /* 1<<(m+1) = 2^(m+1) */ 00078 } 00079 00081 C regkern(kernel k, R xx, int p, const R *param, R a, R b) 00082 { 00083 int r; 00084 C sum = K(0.0); 00085 00086 if (xx < -K(0.5)) 00087 xx = -K(0.5); 00088 if (xx > K(0.5)) 00089 xx = K(0.5); 00090 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b)) 00091 { 00092 return k(xx, 0, param); 00093 } 00094 else if (xx < -K(0.5) + b) 00095 { 00096 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0) 00097 * BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b); 00098 for (r = 0; r < p; r++) 00099 { 00100 sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param) 00101 * BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b); 00102 } 00103 return sum; 00104 } 00105 else if ((xx > -a) && (xx < a)) 00106 { 00107 for (r = 0; r < p; r++) 00108 { 00109 sum += 00110 POW(a, (R) r) 00111 * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a) 00112 + k(a, r, param) * BasisPoly(p - 1, r, -xx / a) 00113 * (r & 1 ? -1 : 1)); 00114 } 00115 return sum; 00116 } 00117 else if (xx > K(0.5) - b) 00118 { 00119 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0) 00120 * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b); 00121 for (r = 0; r < p; r++) 00122 { 00123 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param) 00124 * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b); 00125 } 00126 return sum; 00127 } 00128 return k(xx, 0, param); 00129 } 00130 00134 static C regkern1(kernel k, R xx, int p, const R *param, R a, R b) 00135 { 00136 int r; 00137 C sum = K(0.0); 00138 00139 if (xx < -K(0.5)) 00140 xx = -K(0.5); 00141 if (xx > K(0.5)) 00142 xx = K(0.5); 00143 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b)) 00144 { 00145 return k(xx, 0, param); 00146 } 00147 else if ((xx > -a) && (xx < a)) 00148 { 00149 for (r = 0; r < p; r++) 00150 { 00151 sum += 00152 POW(a, (R) r) 00153 * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a) 00154 + k(a, r, param) * BasisPoly(p - 1, r, -xx / a) 00155 * (r & 1 ? -1 : 1)); 00156 } 00157 return sum; 00158 } 00159 else if (xx < -K(0.5) + b) 00160 { 00161 for (r = 0; r < p; r++) 00162 { 00163 sum += POW(b, (R) r) 00164 * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx + K(0.5)) / b) 00165 + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx + K(0.5)) / b) 00166 * (r & 1 ? -1 : 1)); 00167 } 00168 return sum; 00169 } 00170 else if (xx > K(0.5) - b) 00171 { 00172 for (r = 0; r < p; r++) 00173 { 00174 sum += POW(b, (R) r) 00175 * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx - K(0.5)) / b) 00176 + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx - K(0.5)) / b) 00177 * (r & 1 ? -1 : 1)); 00178 } 00179 return sum; 00180 } 00181 return k(xx, 0, param); 00182 } 00183 00185 //static C regkern2(kernel k, R xx, int p, const R *param, R a, R b) 00186 //{ 00187 // int r; 00188 // C sum = K(0.0); 00189 // 00190 // xx = FABS(xx); 00191 // 00192 // if (xx > K(0.5)) 00193 // { 00194 // for (r = 0; r < p; r++) 00195 // { 00196 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param) 00197 // * (BasisPoly(p - 1, r, 0) + BasisPoly(p - 1, r, 0)); 00198 // } 00199 // return sum; 00200 // } 00201 // else if ((a <= xx) && (xx <= K(0.5) - b)) 00202 // { 00203 // return k(xx, 0, param); 00204 // } 00205 // else if (xx < a) 00206 // { 00207 // for (r = 0; r < p; r++) 00208 // { 00209 // sum += POW(-a, (R) r) * k(a, r, param) 00210 // * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a)); 00211 // } 00212 // return sum; 00213 // } 00214 // else if ((K(0.5) - b < xx) && (xx <= K(0.5))) 00215 // { 00216 // for (r = 0; r < p; r++) 00217 // { 00218 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param) 00219 // * (BasisPoly(p - 1, r, (xx - K(0.5)) / b) 00220 // + BasisPoly(p - 1, r, -(xx - K(0.5)) / b)); 00221 // } 00222 // return sum; 00223 // } 00224 // return K(0.0); 00225 //} 00226 00230 static C regkern3(kernel k, R xx, int p, const R *param, R a, R b) 00231 { 00232 int r; 00233 C sum = K(0.0); 00234 00235 xx = FABS(xx); 00236 00237 if (xx >= K(0.5)) 00238 { 00239 /*return kern(typ,c,0,K(0.5));*/ 00240 xx = K(0.5); 00241 } 00242 /* else */ 00243 if ((a <= xx) && (xx <= K(0.5) - b)) 00244 { 00245 return k(xx, 0, param); 00246 } 00247 else if (xx < a) 00248 { 00249 for (r = 0; r < p; r++) 00250 { 00251 sum += POW(-a, (R) r) * k(a, r, param) 00252 * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a)); 00253 } 00254 /*sum=kern(typ,c,0,xx); */ 00255 return sum; 00256 } 00257 else if ((K(0.5) - b < xx) && (xx <= K(0.5))) 00258 { 00259 sum = k(K(0.5), 0, param) * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b); 00260 /* sum=regkern2(typ,c,p,a,b, K(0.5))*BasisPoly(p-1,0,-K(2.0)*xx/b+(K(1.0)-b)/b); */ 00261 for (r = 0; r < p; r++) 00262 { 00263 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param) 00264 * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b); 00265 } 00266 return sum; 00267 } 00268 return K(0.0); 00269 } 00270 00272 //static C linintkern(const R x, const C *Add, const int Ad, const R a) 00273 //{ 00274 // R c, c1, c3; 00275 // int r; 00276 // C f1, f2; 00277 // 00278 // c = x * Ad / a; 00279 // r = (int)(LRINT(c)); 00280 // r = abs(r); 00281 // f1 = Add[r]; 00282 // f2 = Add[r + 1]; 00283 // c = FABS(c); 00284 // c1 = c - r; 00285 // c3 = c1 - K(1.0); 00286 // return (-f1 * c3 + f2 * c1); 00287 //} 00288 // 00289 //static C quadrintkern(const R x, const C *Add, const int Ad, const R a) 00290 //{ 00291 // R c, c1, c2, c3; 00292 // int r; 00293 // C f0, f1, f2; 00294 // 00295 // c = x * Ad / a; 00296 // r = (int)(LRINT(c)); 00297 // r = abs(r); 00298 // if (r == 0) 00299 // { 00300 // f0 = Add[r + 1]; 00301 // f1 = Add[r]; 00302 // f2 = Add[r + 1]; 00303 // } 00304 // else 00305 // { 00306 // f0 = Add[r - 1]; 00307 // f1 = Add[r]; 00308 // f2 = Add[r + 1]; 00309 // } 00310 // c = FABS(c); 00311 // c1 = c - r; 00312 // c2 = c1 + K(1.0); 00313 // c3 = c1 - K(1.0); 00314 // return (f0 * c1 * c3 / K(2.0) - f1 * c2 * c3 + f2 * c2 * c1 / K(2.0)); 00315 //} 00316 00318 C kubintkern(const R x, const C *Add, const int Ad, const R a) 00319 { 00320 R c, c1, c2, c3, c4; 00321 int r; 00322 C f0, f1, f2, f3; 00323 c = x * (R)(Ad) / a; 00324 r = (int)(LRINT(c)); 00325 r = abs(r); 00326 if (r == 0) 00327 { 00328 f0 = Add[r + 1]; 00329 f1 = Add[r]; 00330 f2 = Add[r + 1]; 00331 f3 = Add[r + 2]; 00332 } 00333 else 00334 { 00335 f0 = Add[r - 1]; 00336 f1 = Add[r]; 00337 f2 = Add[r + 1]; 00338 f3 = Add[r + 2]; 00339 } 00340 c = FABS(c); 00341 c1 = c - (R)(r); 00342 c2 = c1 + K(1.0); 00343 c3 = c1 - K(1.0); 00344 c4 = c1 - K(2.0); 00345 /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2- 00346 f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */ 00347 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0) 00348 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0)); 00349 } 00350 00352 static C kubintkern1(const R x, const C *Add, const int Ad, const R a) 00353 { 00354 R c, c1, c2, c3, c4; 00355 int r; 00356 C f0, f1, f2, f3; 00357 Add += 2; 00358 c = (x + a) * (R)(Ad) / K(2.0) / a; 00359 r = (int)(LRINT(c)); 00360 r = abs(r); 00361 /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];} 00362 else */ 00363 { 00364 f0 = Add[r - 1]; 00365 f1 = Add[r]; 00366 f2 = Add[r + 1]; 00367 f3 = Add[r + 2]; 00368 } 00369 c = FABS(c); 00370 c1 = c - (R)(r); 00371 c2 = c1 + K(1.0); 00372 c3 = c1 - K(1.0); 00373 c4 = c1 - K(2.0); 00374 /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2- 00375 f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */ 00376 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0) 00377 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0)); 00378 } 00379 00381 static void quicksort(int d, int t, R *x, C *alpha, int N) 00382 { 00383 int lpos = 0; 00384 int rpos = N - 1; 00385 /*R pivot=x[((N-1)/2)*d+t];*/ 00386 R pivot = x[(N / 2) * d + t]; 00387 00388 int k; 00389 R temp1; 00390 C temp2; 00391 00392 while (lpos <= rpos) 00393 { 00394 while (x[lpos * d + t] < pivot) 00395 lpos++; 00396 while (x[rpos * d + t] > pivot) 00397 rpos--; 00398 if (lpos <= rpos) 00399 { 00400 for (k = 0; k < d; k++) 00401 { 00402 temp1 = x[lpos * d + k]; 00403 x[lpos * d + k] = x[rpos * d + k]; 00404 x[rpos * d + k] = temp1; 00405 } 00406 temp2 = alpha[lpos]; 00407 alpha[lpos] = alpha[rpos]; 00408 alpha[rpos] = temp2; 00409 00410 lpos++; 00411 rpos--; 00412 } 00413 } 00414 if (0 < rpos) 00415 quicksort(d, t, x, alpha, rpos + 1); 00416 if (lpos < N - 1) 00417 quicksort(d, t, x + lpos * d, alpha + lpos, N - lpos); 00418 } 00419 00421 static void BuildBox(fastsum_plan *ths) 00422 { 00423 int t, l; 00424 int *box_index; 00425 R val[ths->d]; 00426 00427 box_index = (int *) NFFT(malloc)((size_t)(ths->box_count) * sizeof(int)); 00428 for (t = 0; t < ths->box_count; t++) 00429 box_index[t] = 0; 00430 00431 for (l = 0; l < ths->N_total; l++) 00432 { 00433 int ind = 0; 00434 for (t = 0; t < ths->d; t++) 00435 { 00436 val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0); 00437 ind *= ths->box_count_per_dim; 00438 ind += (int) (val[t] / ths->eps_I); 00439 } 00440 box_index[ind]++; 00441 } 00442 00443 ths->box_offset[0] = 0; 00444 for (t = 1; t <= ths->box_count; t++) 00445 { 00446 ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1]; 00447 box_index[t - 1] = ths->box_offset[t - 1]; 00448 } 00449 00450 for (l = 0; l < ths->N_total; l++) 00451 { 00452 int ind = 0; 00453 for (t = 0; t < ths->d; t++) 00454 { 00455 val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0); 00456 ind *= ths->box_count_per_dim; 00457 ind += (int) (val[t] / ths->eps_I); 00458 } 00459 00460 ths->box_alpha[box_index[ind]] = ths->alpha[l]; 00461 00462 for (t = 0; t < ths->d; t++) 00463 { 00464 ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t]; 00465 } 00466 box_index[ind]++; 00467 } 00468 NFFT(free)(box_index); 00469 } 00470 00472 static inline C calc_SearchBox(int d, R *y, R *x, C *alpha, int start, 00473 int end_lt, const C *Add, const int Ad, int p, R a, const kernel k, 00474 const R *param, const unsigned flags) 00475 { 00476 C result = K(0.0); 00477 00478 int m, l; 00479 R r; 00480 00481 for (m = start; m < end_lt; m++) 00482 { 00483 if (d == 1) 00484 { 00485 r = y[0] - x[m]; 00486 } 00487 else 00488 { 00489 r = K(0.0); 00490 for (l = 0; l < d; l++) 00491 r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]); 00492 r = SQRT(r); 00493 } 00494 if (FABS(r) < a) 00495 { 00496 result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */ 00497 if (d == 1) 00498 { 00499 if (flags & EXACT_NEARFIELD) 00500 result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */ 00501 else 00502 result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */ 00503 } 00504 else 00505 { 00506 if (flags & EXACT_NEARFIELD) 00507 result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */ 00508 else 00509 #if defined(NF_KUB) 00510 result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */ 00511 #elif defined(NF_QUADR) 00512 result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */ 00513 #elif defined(NF_LIN) 00514 result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */ 00515 #else 00516 #error define interpolation method 00517 #endif 00518 } 00519 } 00520 } 00521 return result; 00522 } 00523 00525 static C SearchBox(R *y, fastsum_plan *ths) 00526 { 00527 C val = K(0.0); 00528 int t; 00529 int y_multiind[ths->d]; 00530 int multiindex[ths->d]; 00531 int y_ind; 00532 00533 for (t = 0; t < ths->d; t++) 00534 { 00535 y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->eps_B / K(2.0)) / ths->eps_I)); 00536 } 00537 00538 if (ths->d == 1) 00539 { 00540 for (y_ind = max_i(0, y_multiind[0] - 1); 00541 y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++) 00542 { 00543 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, 00544 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, ths->Ad, 00545 ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags); 00546 } 00547 } 00548 else if (ths->d == 2) 00549 { 00550 for (multiindex[0] = max_i(0, y_multiind[0] - 1); 00551 multiindex[0] < ths->box_count_per_dim 00552 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++) 00553 for (multiindex[1] = max_i(0, y_multiind[1] - 1); 00554 multiindex[1] < ths->box_count_per_dim 00555 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++) 00556 { 00557 y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1]; 00558 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, 00559 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, 00560 ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags); 00561 } 00562 } 00563 else if (ths->d == 3) 00564 { 00565 for (multiindex[0] = max_i(0, y_multiind[0] - 1); 00566 multiindex[0] < ths->box_count_per_dim 00567 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++) 00568 for (multiindex[1] = max_i(0, y_multiind[1] - 1); 00569 multiindex[1] < ths->box_count_per_dim 00570 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++) 00571 for (multiindex[2] = max_i(0, y_multiind[2] - 1); 00572 multiindex[2] < ths->box_count_per_dim 00573 && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++) 00574 { 00575 y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1]) 00576 * ths->box_count_per_dim + multiindex[2]; 00577 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, 00578 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, 00579 ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, 00580 ths->flags); 00581 } 00582 } 00583 else 00584 { 00585 exit(EXIT_FAILURE); 00586 } 00587 return val; 00588 } 00589 00591 static void BuildTree(int d, int t, R *x, C *alpha, int N) 00592 { 00593 if (N > 1) 00594 { 00595 int m = N / 2; 00596 00597 quicksort(d, t, x, alpha, N); 00598 00599 BuildTree(d, (t + 1) % d, x, alpha, m); 00600 BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), N - m - 1); 00601 } 00602 } 00603 00605 static C SearchTree(const int d, const int t, const R *x, const C *alpha, 00606 const R *xmin, const R *xmax, const int N, const kernel k, const R *param, 00607 const int Ad, const C *Add, const int p, const unsigned flags) 00608 { 00609 if (N == 0) 00610 { 00611 return K(0.0); 00612 } 00613 else 00614 { 00615 int m = N / 2; 00616 R Min = xmin[t]; 00617 R Max = xmax[t]; 00618 R Median = x[m * d + t]; 00619 R a = FABS(Max - Min) / 2; 00620 int l; 00621 int E = 0; 00622 R r; 00623 00624 if (Min > Median) 00625 return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin, 00626 xmax, N - m - 1, k, param, Ad, Add, p, flags); 00627 else if (Max < Median) 00628 return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, 00629 Add, p, flags); 00630 else 00631 { 00632 C result = K(0.0); 00633 E = 0; 00634 00635 for (l = 0; l < d; l++) 00636 { 00637 if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l]) 00638 E++; 00639 } 00640 00641 if (E == d) 00642 { 00643 if (d == 1) 00644 { 00645 r = xmin[0] + a - x[m]; /* remember: xmin+a = y */ 00646 } 00647 else 00648 { 00649 r = K(0.0); 00650 for (l = 0; l < d; l++) 00651 r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]); /* remember: xmin+a = y */ 00652 r = SQRT(r); 00653 } 00654 if (FABS(r) < a) 00655 { 00656 result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */ 00657 if (d == 1) 00658 { 00659 if (flags & EXACT_NEARFIELD) 00660 result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */ 00661 else 00662 result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */ 00663 } 00664 else 00665 { 00666 if (flags & EXACT_NEARFIELD) 00667 result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */ 00668 else 00669 #if defined(NF_KUB) 00670 result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */ 00671 #elif defined(NF_QUADR) 00672 result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */ 00673 #elif defined(NF_LIN) 00674 result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */ 00675 #else 00676 #error define interpolation method 00677 #endif 00678 } 00679 } 00680 } 00681 result += SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin, 00682 xmax, N - m - 1, k, param, Ad, Add, p, flags) 00683 + SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add, 00684 p, flags); 00685 return result; 00686 } 00687 } 00688 } 00689 00691 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, 00692 kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B) 00693 { 00694 int t; 00695 int N[d], n[d]; 00696 int n_total; 00697 unsigned sort_flags_trafo = 0U; 00698 unsigned sort_flags_adjoint = 0U; 00699 #ifdef _OPENMP 00700 int nthreads = NFFT(get_num_threads)(); 00701 #endif 00702 00703 if (d > 1) 00704 { 00705 sort_flags_trafo = NFFT_SORT_NODES; 00706 #ifdef _OPENMP 00707 sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT; 00708 #else 00709 sort_flags_adjoint = NFFT_SORT_NODES; 00710 #endif 00711 } 00712 00713 ths->d = d; 00714 00715 ths->N_total = N_total; 00716 ths->M_total = M_total; 00717 00718 ths->x = (R *) NFFT(malloc)((size_t)(d * N_total) * (sizeof(R))); 00719 ths->alpha = (C *) NFFT(malloc)((size_t)(N_total) * (sizeof(C))); 00720 00721 ths->y = (R *) NFFT(malloc)((size_t)(d * M_total) * (sizeof(R))); 00722 ths->f = (C *) NFFT(malloc)((size_t)(M_total) * (sizeof(C))); 00723 00724 ths->k = k; 00725 ths->kernel_param = param; 00726 00727 ths->flags = flags; 00728 00729 ths->p = p; 00730 ths->eps_I = eps_I; /* =(R)ths->p/(R)nn; */ 00731 ths->eps_B = eps_B; /* =K(1.0)/K(16.0); */ 00734 if (!(ths->flags & EXACT_NEARFIELD)) 00735 { 00736 if (ths->d == 1) 00737 { 00738 ths->Ad = 4 * (ths->p) * (ths->p); 00739 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 5) * (sizeof(C))); 00740 } 00741 else 00742 { 00743 if (ths->k == one_over_x) 00744 { 00745 R delta = K(1e-8); 00746 switch (p) 00747 { 00748 case 2: 00749 delta = K(1e-3); 00750 break; 00751 case 3: 00752 delta = K(1e-4); 00753 break; 00754 case 4: 00755 delta = K(1e-5); 00756 break; 00757 case 5: 00758 delta = K(1e-6); 00759 break; 00760 case 6: 00761 delta = K(1e-6); 00762 break; 00763 case 7: 00764 delta = K(1e-7); 00765 break; 00766 default: 00767 delta = K(1e-8); 00768 } 00769 00770 #if defined(NF_KUB) 00771 ths->Ad = max_i(10, (int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0)))))); 00772 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C))); 00773 #elif defined(NF_QUADR) 00774 ths->Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0))))); 00775 ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C))); 00776 #elif defined(NF_LIN) 00777 ths->Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0))))); 00778 ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C))); 00779 #else 00780 #error define NF_LIN or NF_QUADR or NF_KUB 00781 #endif 00782 } 00783 else 00784 { 00785 ths->Ad = 2 * (ths->p) * (ths->p); 00786 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C))); 00787 } 00788 } 00789 } 00790 00792 ths->n = nn; 00793 for (t = 0; t < d; t++) 00794 { 00795 N[t] = nn; 00796 n[t] = 2 * nn; 00797 } 00798 NFFT(init_guru)(&(ths->mv1), d, N, N_total, n, m, 00799 sort_flags_adjoint | 00800 PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT 00801 | FFT_OUT_OF_PLACE, 00802 FFTW_MEASURE | FFTW_DESTROY_INPUT); 00803 NFFT(init_guru)(&(ths->mv2), d, N, M_total, n, m, 00804 sort_flags_trafo | 00805 PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT 00806 | FFT_OUT_OF_PLACE, 00807 FFTW_MEASURE | FFTW_DESTROY_INPUT); 00808 00810 n_total = 1; 00811 for (t = 0; t < d; t++) 00812 n_total *= nn; 00813 00814 ths->b = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C)); 00815 #ifdef _OPENMP 00816 #pragma omp critical (nfft_omp_critical_fftw_plan) 00817 { 00818 FFTW(plan_with_nthreads)(nthreads); 00819 #endif 00820 00821 ths->fft_plan = FFTW(plan_dft)(d, N, ths->b, ths->b, FFTW_FORWARD, 00822 FFTW_ESTIMATE); 00823 00824 #ifdef _OPENMP 00825 } 00826 #endif 00827 00828 if (ths->flags & NEARFIELD_BOXES) 00829 { 00830 ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->eps_B) / ths->eps_I))) + 1; 00831 ths->box_count = 1; 00832 for (t = 0; t < ths->d; t++) 00833 ths->box_count *= ths->box_count_per_dim; 00834 00835 ths->box_offset = (int *) NFFT(malloc)((size_t)(ths->box_count + 1) * sizeof(int)); 00836 00837 ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(C))); 00838 00839 ths->box_x = (R *) NFFT(malloc)((size_t)(ths->d * ths->N_total) * sizeof(R)); 00840 } 00841 } 00842 00844 void fastsum_finalize(fastsum_plan *ths) 00845 { 00846 NFFT(free)(ths->x); 00847 NFFT(free)(ths->alpha); 00848 NFFT(free)(ths->y); 00849 NFFT(free)(ths->f); 00850 00851 if (!(ths->flags & EXACT_NEARFIELD)) 00852 NFFT(free)(ths->Add); 00853 00854 NFFT(finalize)(&(ths->mv1)); 00855 NFFT(finalize)(&(ths->mv2)); 00856 00857 #ifdef _OPENMP 00858 #pragma omp critical (nfft_omp_critical_fftw_plan) 00859 { 00860 #endif 00861 FFTW(destroy_plan)(ths->fft_plan); 00862 #ifdef _OPENMP 00863 } 00864 #endif 00865 00866 NFFT(free)(ths->b); 00867 00868 if (ths->flags & NEARFIELD_BOXES) 00869 { 00870 NFFT(free)(ths->box_offset); 00871 NFFT(free)(ths->box_alpha); 00872 NFFT(free)(ths->box_x); 00873 } 00874 } 00875 00877 void fastsum_exact(fastsum_plan *ths) 00878 { 00879 int j, k; 00880 int t; 00881 R r; 00882 00883 #ifdef _OPENMP 00884 #pragma omp parallel for default(shared) private(j,k,t,r) 00885 #endif 00886 for (j = 0; j < ths->M_total; j++) 00887 { 00888 ths->f[j] = K(0.0); 00889 for (k = 0; k < ths->N_total; k++) 00890 { 00891 if (ths->d == 1) 00892 r = ths->y[j] - ths->x[k]; 00893 else 00894 { 00895 r = K(0.0); 00896 for (t = 0; t < ths->d; t++) 00897 r += (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]) 00898 * (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]); 00899 r = SQRT(r); 00900 } 00901 ths->f[j] += ths->alpha[k] * ths->k(r, 0, ths->kernel_param); 00902 } 00903 } 00904 } 00905 00907 void fastsum_precompute(fastsum_plan *ths) 00908 { 00909 int j, k, t; 00910 int n_total; 00911 #ifdef MEASURE_TIME 00912 ticks t0, t1; 00913 #endif 00914 00915 ths->MEASURE_TIME_t[0] = K(0.0); 00916 ths->MEASURE_TIME_t[1] = K(0.0); 00917 ths->MEASURE_TIME_t[2] = K(0.0); 00918 ths->MEASURE_TIME_t[3] = K(0.0); 00919 00920 #ifdef MEASURE_TIME 00921 t0 = getticks(); 00922 #endif 00923 00924 if (ths->flags & NEARFIELD_BOXES) 00925 { 00926 BuildBox(ths); 00927 } 00928 else 00929 { 00931 BuildTree(ths->d, 0, ths->x, ths->alpha, ths->N_total); 00932 } 00933 00934 #ifdef MEASURE_TIME 00935 t1 = getticks(); 00936 ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0); 00937 #endif 00938 00939 #ifdef MEASURE_TIME 00940 t0 = getticks(); 00941 #endif 00942 00943 if (!(ths->flags & EXACT_NEARFIELD)) 00944 { 00945 if (ths->d == 1) 00946 #ifdef _OPENMP 00947 #pragma omp parallel for default(shared) private(k) 00948 #endif 00949 for (k = -ths->Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++) 00950 ths->Add[k + ths->Ad / 2 + 2] = regkern1(ths->k, 00951 ths->eps_I * (R) k / (R)(ths->Ad) * K(2.0), ths->p, ths->kernel_param, 00952 ths->eps_I, ths->eps_B); 00953 else 00954 #ifdef _OPENMP 00955 #pragma omp parallel for default(shared) private(k) 00956 #endif 00957 for (k = 0; k <= ths->Ad + 2; k++) 00958 ths->Add[k] = regkern3(ths->k, ths->eps_I * (R) k / (R)(ths->Ad), ths->p, 00959 ths->kernel_param, ths->eps_I, ths->eps_B); 00960 } 00961 #ifdef MEASURE_TIME 00962 t1 = getticks(); 00963 ths->MEASURE_TIME_t[0] += NFFT(elapsed_seconds)(t1,t0); 00964 #endif 00965 00966 #ifdef MEASURE_TIME 00967 t0 = getticks(); 00968 #endif 00969 00970 for (k = 0; k < ths->mv1.M_total; k++) 00971 for (t = 0; t < ths->mv1.d; t++) 00972 ths->mv1.x[ths->mv1.d * k + t] = -ths->x[ths->mv1.d * k + t]; /* note the factor -1 for transposed transform instead of adjoint*/ 00973 00975 if (ths->mv1.flags & PRE_LIN_PSI) 00976 NFFT(precompute_lin_psi)(&(ths->mv1)); 00977 00978 if (ths->mv1.flags & PRE_PSI) 00979 NFFT(precompute_psi)(&(ths->mv1)); 00980 00981 if (ths->mv1.flags & PRE_FULL_PSI) 00982 NFFT(precompute_full_psi)(&(ths->mv1)); 00983 #ifdef MEASURE_TIME 00984 t1 = getticks(); 00985 ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0); 00986 #endif 00987 00989 for (k = 0; k < ths->mv1.M_total; k++) 00990 ths->mv1.f[k] = ths->alpha[k]; 00991 00992 #ifdef MEASURE_TIME 00993 t0 = getticks(); 00994 #endif 00995 00996 for (j = 0; j < ths->mv2.M_total; j++) 00997 for (t = 0; t < ths->mv2.d; t++) 00998 ths->mv2.x[ths->mv2.d * j + t] = -ths->y[ths->mv2.d * j + t]; /* note the factor -1 for conjugated transform instead of standard*/ 00999 01001 if (ths->mv2.flags & PRE_LIN_PSI) 01002 NFFT(precompute_lin_psi)(&(ths->mv2)); 01003 01004 if (ths->mv2.flags & PRE_PSI) 01005 NFFT(precompute_psi)(&(ths->mv2)); 01006 01007 if (ths->mv2.flags & PRE_FULL_PSI) 01008 NFFT(precompute_full_psi)(&(ths->mv2)); 01009 #ifdef MEASURE_TIME 01010 t1 = getticks(); 01011 ths->MEASURE_TIME_t[2] += NFFT(elapsed_seconds)(t1,t0); 01012 #endif 01013 01014 #ifdef MEASURE_TIME 01015 t0 = getticks(); 01016 #endif 01017 01018 n_total = 1; 01019 for (t = 0; t < ths->d; t++) 01020 n_total *= ths->n; 01021 01022 #ifdef _OPENMP 01023 #pragma omp parallel for default(shared) private(j,k,t) 01024 #endif 01025 for (j = 0; j < n_total; j++) 01026 { 01027 if (ths->d == 1) 01028 ths->b[j] = regkern1(ths->k, (R) j / (R)(ths->n) - K(0.5), ths->p, 01029 ths->kernel_param, ths->eps_I, ths->eps_B) / (R)(n_total); 01030 else 01031 { 01032 k = j; 01033 ths->b[j] = K(0.0); 01034 for (t = 0; t < ths->d; t++) 01035 { 01036 ths->b[j] += ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5)) 01037 * ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5)); 01038 k = k / (ths->n); 01039 } 01040 ths->b[j] = regkern3(ths->k, SQRT(CREAL(ths->b[j])), ths->p, ths->kernel_param, 01041 ths->eps_I, ths->eps_B) / (R)(n_total); 01042 } 01043 } 01044 01045 NFFT(fftshift_complex)(ths->b, (int)(ths->mv1.d), ths->mv1.N); 01046 FFTW(execute)(ths->fft_plan); 01047 NFFT(fftshift_complex)(ths->b, (int)(ths->mv1.d), ths->mv1.N); 01048 #ifdef MEASURE_TIME 01049 t1 = getticks(); 01050 ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0); 01051 #endif 01052 } 01053 01055 void fastsum_trafo(fastsum_plan *ths) 01056 { 01057 int j, k, t; 01058 #ifdef MEASURE_TIME 01059 ticks t0, t1; 01060 #endif 01061 01062 ths->MEASURE_TIME_t[4] = K(0.0); 01063 ths->MEASURE_TIME_t[5] = K(0.0); 01064 ths->MEASURE_TIME_t[6] = K(0.0); 01065 ths->MEASURE_TIME_t[7] = K(0.0); 01066 01067 #ifdef MEASURE_TIME 01068 t0 = getticks(); 01069 #endif 01070 01071 NFFT(adjoint)(&(ths->mv1)); 01072 #ifdef MEASURE_TIME 01073 t1 = getticks(); 01074 ths->MEASURE_TIME_t[4] += NFFT(elapsed_seconds)(t1,t0); 01075 #endif 01076 01077 #ifdef MEASURE_TIME 01078 t0 = getticks(); 01079 #endif 01080 01081 #ifdef _OPENMP 01082 #pragma omp parallel for default(shared) private(k) 01083 #endif 01084 for (k = 0; k < ths->mv2.N_total; k++) 01085 ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k]; 01086 #ifdef MEASURE_TIME 01087 t1 = getticks(); 01088 ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0); 01089 #endif 01090 01091 #ifdef MEASURE_TIME 01092 t0 = getticks(); 01093 #endif 01094 01095 NFFT(trafo)(&(ths->mv2)); 01096 #ifdef MEASURE_TIME 01097 t1 = getticks(); 01098 ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0); 01099 #endif 01100 01101 #ifdef MEASURE_TIME 01102 t0 = getticks(); 01103 #endif 01104 01105 #ifdef _OPENMP 01106 #pragma omp parallel for default(shared) private(j,k,t) 01107 #endif 01108 for (j = 0; j < ths->M_total; j++) 01109 { 01110 R ymin[ths->d], ymax[ths->d]; 01112 if (ths->flags & NEARFIELD_BOXES) 01113 { 01114 ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d * j, ths); 01115 } 01116 else 01117 { 01118 for (t = 0; t < ths->d; t++) 01119 { 01120 ymin[t] = ths->y[ths->d * j + t] - ths->eps_I; 01121 ymax[t] = ths->y[ths->d * j + t] + ths->eps_I; 01122 } 01123 ths->f[j] = ths->mv2.f[j] 01124 + SearchTree(ths->d, 0, ths->x, ths->alpha, ymin, ymax, ths->N_total, 01125 ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); 01126 } 01127 /* ths->f[j] = ths->mv2.f[j]; */ 01128 /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */ 01129 } 01130 01131 #ifdef MEASURE_TIME 01132 t1 = getticks(); 01133 ths->MEASURE_TIME_t[7] += NFFT(elapsed_seconds)(t1,t0); 01134 #endif 01135 } 01136 /* \} */ 01137 01138 /* fastsum.c */