NFFT  3.5.0
nfsft.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include "config.h"
26 
27 /* Include standard C headers. */
28 #include <math.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #ifdef HAVE_COMPLEX_H
32 #include <complex.h>
33 #endif
34 
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 /* Include NFFT3 utilities header. */
40 
41 /* Include NFFT3 library header. */
42 #include "nfft3.h"
43 
44 #include "infft.h"
45 
46 /* Include private associated Legendre functions header. */
47 #include "legendre.h"
48 
49 /* Include private API header. */
50 #include "api.h"
51 
52 #ifdef _OPENMP
53 #include "../fpt/fpt.h"
54 #endif
55 
65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
66 
72 #define NFSFT_DEFAULT_THRESHOLD 1000
73 
79 #define NFSFT_BREAK_EVEN 5
80 
87 #ifdef _OPENMP
88  static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0,0};
89 #else
90  static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0};
91 #endif
92 
115 static inline void c2e(nfsft_plan *plan)
116 {
117  int k;
118  int n;
119  double _Complex last;
120  double _Complex act;
121  double _Complex *xp;
122  double _Complex *xm;
123  int low;
124  int up;
125  int lowe;
126  int upe;
128  /* Set the first row to order to zero since it is unused. */
129  memset(plan->f_hat_intern,0U,(2*plan->N+2)*sizeof(double _Complex));
130 
131  /* Determine lower and upper bounds for loop processing even terms. */
132  lowe = -plan->N + (plan->N%2);
133  upe = -lowe;
134 
135  /* Process even terms. */
136  for (n = lowe; n <= upe; n += 2)
137  {
138  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
139  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M}$. */
140  xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
141  xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
142  for(k = 1; k <= plan->N; k++)
143  {
144  *xp *= 0.5;
145  *xm-- = *xp++;
146  }
147  /* Set the first coefficient in the array corresponding to this order to
148  * zero since it is unused. */
149  *xm = 0.0;
150  }
151 
152  /* Determine lower and upper bounds for loop processing odd terms. */
153  low = -plan->N + (1-plan->N%2);
154  up = -low;
155 
156  /* Process odd terms incorporating the additional sine term
157  * \f$\sin \vartheta\f$. */
158  for (n = low; n <= up; n += 2)
159  {
160  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
161  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M-1}$ incorporating
162  * the additional term \f$\sin \vartheta\f$. */
163  plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
164  xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
165  /* Set the first coefficient in the array corresponding to this order to zero
166  * since it is unused. */
167  *xp++ = 0.0;
168  xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
169  last = *xm;
170  *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
171  *xp++ = -(*xm--);
172  for (k = plan->N-1; k > 0; k--)
173  {
174  act = *xm;
175  *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
176  *xp++ = -(*xm--);
177  last = act;
178  }
179  *xm = 0.0;
180  }
181 }
182 
193 static inline void c2e_transposed(nfsft_plan *plan)
194 {
195  int k;
196  int n;
197  double _Complex last;
198  double _Complex act;
199  double _Complex *xp;
200  double _Complex *xm;
201  int low;
202  int up;
203  int lowe;
204  int upe;
206  /* Determine lower and upper bounds for loop processing even terms. */
207  lowe = -plan->N + (plan->N%2);
208  upe = -lowe;
209 
210  /* Process even terms. */
211  for (n = lowe; n <= upe; n += 2)
212  {
213  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M}\f$ from
214  * old coefficients $\left(c_k^n\right)_{k=-M,\ldots,M}$. */
215  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
216  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
217  for(k = 1; k <= plan->N; k++)
218  {
219  *xp += *xm--;
220  *xp++ *= 0.5;;
221  }
222  }
223 
224  /* Determine lower and upper bounds for loop processing odd terms. */
225  low = -plan->N + (1-plan->N%2);
226  up = -low;
227 
228  /* Process odd terms. */
229  for (n = low; n <= up; n += 2)
230  {
231  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M-1}\f$ from
232  * old coefficients $\left(c_k^n\right)_{k=0,\ldots,M-1}$. */
233  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
234  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
235  for(k = 1; k <= plan->N; k++)
236  {
237  *xp++ -= *xm--;
238  }
239 
240  plan->f_hat[NFSFT_INDEX(0,n,plan)] =
241  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
242  last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
243  plan->f_hat[NFSFT_INDEX(1,n,plan)] =
244  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
245 
246  xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
247  for (k = 2; k < plan->N; k++)
248  {
249  act = *xp;
250  *xp = -0.25 * _Complex_I * (xp[1] - last);
251  xp++;
252  last = act;
253  }
254  *xp = 0.25 * _Complex_I * last;
255 
256  plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
257  }
258 }
259 
260 void nfsft_init(nfsft_plan *plan, int N, int M)
261 {
262  /* Call nfsft_init_advanced with default flags. */
265 }
266 
267 void nfsft_init_advanced(nfsft_plan* plan, int N, int M,
268  unsigned int flags)
269 {
270  /* Call nfsft_init_guru with the flags and default NFFT cut-off. */
271  nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT,
273 }
274 
275 void nfsft_init_guru(nfsft_plan *plan, int N, int M, unsigned int flags,
276  unsigned int nfft_flags, int nfft_cutoff)
277 {
278  int *nfft_size; /*< NFFT size */
279  int *fftw_size; /*< FFTW size */
280 
281  /* Save the flags in the plan. */
282  plan->flags = flags;
283 
284  /* Save the bandwidth N and the number of samples M in the plan. */
285  plan->N = N;
286  plan->M_total = M;
287 
288  /* Calculate the next greater power of two with respect to the bandwidth N
289  * and the corresponding exponent. */
290  //next_power_of_2_exp_int(plan->N,&plan->NPT,&plan->t);
291 
292  /* Save length of array of Fourier coefficients. Owing to the data layout the
293  * length is (2N+2)(2N+2) */
294  plan->N_total = (2*plan->N+2)*(2*plan->N+2);
295 
296  /* Allocate memory for auxilliary array of spherical Fourier coefficients,
297  * if neccesary. */
298  if (plan->flags & NFSFT_PRESERVE_F_HAT)
299  {
300  plan->f_hat_intern = (double _Complex*) nfft_malloc(plan->N_total*
301  sizeof(double _Complex));
302  }
303 
304  /* Allocate memory for spherical Fourier coefficients, if neccesary. */
305  if (plan->flags & NFSFT_MALLOC_F_HAT)
306  {
307  plan->f_hat = (double _Complex*) nfft_malloc(plan->N_total*
308  sizeof(double _Complex));
309  }
310 
311  /* Allocate memory for samples, if neccesary. */
312  if (plan->flags & NFSFT_MALLOC_F)
313  {
314  plan->f = (double _Complex*) nfft_malloc(plan->M_total*sizeof(double _Complex));
315  }
316 
317  /* Allocate memory for nodes, if neccesary. */
318  if (plan->flags & NFSFT_MALLOC_X)
319  {
320  plan->x = (double*) nfft_malloc(plan->M_total*2*sizeof(double));
321  }
322 
323  /* Check if fast algorithm is activated. */
324  if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
325  {
326  }
327  else
328  {
329  nfft_size = (int*)nfft_malloc(2*sizeof(int));
330  fftw_size = (int*)nfft_malloc(2*sizeof(int));
331 
333  nfft_size[0] = 2*plan->N+2;
334  nfft_size[1] = 2*plan->N+2;
335  fftw_size[0] = 4*plan->N;
336  fftw_size[1] = 4*plan->N;
337 
339  nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
340  nfft_cutoff, nfft_flags,
341  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
342 
343  /* Assign angle array. */
344  plan->plan_nfft.x = plan->x;
345  /* Assign result array. */
346  plan->plan_nfft.f = plan->f;
347  /* Assign Fourier coefficients array. */
348  plan->plan_nfft.f_hat = plan->f_hat;
349 
352  /* Precompute. */
353  //nfft_precompute_one_psi(&plan->plan_nfft);
354 
355  /* Free auxilliary arrays. */
356  nfft_free(nfft_size);
357  nfft_free(fftw_size);
358  }
359 
360  plan->mv_trafo = (void (*) (void* ))nfsft_trafo;
361  plan->mv_adjoint = (void (*) (void* ))nfsft_adjoint;
362 }
363 
364 void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags,
365  unsigned int fpt_flags)
366 {
367  int n; /*< The order n */
368 
369  /* Check if already initialized. */
370  if (wisdom.initialized == true)
371  {
372  return;
373  }
374 
375 #ifdef _OPENMP
376  #pragma omp parallel default(shared)
377  {
378  int nthreads = omp_get_num_threads();
379  #pragma omp single
380  {
381  wisdom.nthreads = nthreads;
382  }
383  }
384 #endif
385 
386  /* Save the precomputation flags. */
387  wisdom.flags = nfsft_flags;
388 
389  /* Compute and save N_max = 2^{\ceil{log_2 N}} as next greater
390  * power of two with respect to N. */
391  X(next_power_of_2_exp_int)(N,&wisdom.N_MAX,&wisdom.T_MAX);
392 
393  /* Check, if precomputation for direct algorithms needs to be performed. */
394  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
395  {
396  wisdom.alpha = NULL;
397  wisdom.beta = NULL;
398  wisdom.gamma = NULL;
399  }
400  else
401  {
402  /* Allocate memory for three-term recursion coefficients. */
403  wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
404  sizeof(double));
405  wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
406  sizeof(double));
407  wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
408  sizeof(double));
410  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
411  * gamma_k^n. */
412  alpha_al_all(wisdom.alpha,wisdom.N_MAX);
413  beta_al_all(wisdom.beta,wisdom.N_MAX);
414  gamma_al_all(wisdom.gamma,wisdom.N_MAX);
415  }
416 
417  /* Check, if precomputation for fast algorithms needs to be performed. */
418  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
419  {
420  }
421  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
422  {
423  /* Precompute data for DPT/FPT. */
424 
425  /* Check, if recursion coefficients have already been calculated. */
426  if (wisdom.alpha != NULL)
427  {
428 #ifdef _OPENMP
429  #pragma omp parallel default(shared) private(n)
430  {
431  int nthreads = omp_get_num_threads();
432  int threadid = omp_get_thread_num();
433  #pragma omp single
434  {
435  wisdom.nthreads = nthreads;
436  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
437  }
438 
439  if (threadid == 0)
440  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
441  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
442  else
443  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
444  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA | FPT_NO_INIT_FPT_DATA);
445 
446  #pragma omp barrier
447 
448  if (threadid != 0)
449  wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
450 
451  /* Perform the first part of precomputation which contains most allocations in one thread */
452  #pragma omp master
453  {
454  for (int n = 0; n <= wisdom.N_MAX; n++)
455  fpt_precompute_1(wisdom.set_threads[0],n,n);
456  }
457  #pragma omp barrier
458 
459  #pragma omp for private(n) schedule(dynamic)
460  for (n = 0; n <= wisdom.N_MAX; n++)
461  fpt_precompute_2(wisdom.set_threads[threadid],n,&wisdom.alpha[ROW(n)],
462  &wisdom.beta[ROW(n)], &wisdom.gamma[ROW(n)],n,kappa);
463  }
464 #else
465  /* Use the recursion coefficients to precompute FPT data using persistent
466  * arrays. */
467  wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
468  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
469  for (n = 0; n <= wisdom.N_MAX; n++)
470  {
471  /*fprintf(stderr,"%d\n",n);
472  fflush(stderr);*/
473  /* Precompute data for FPT transformation for order n. */
474  fpt_precompute(wisdom.set,n,&wisdom.alpha[ROW(n)],&wisdom.beta[ROW(n)],
475  &wisdom.gamma[ROW(n)],n,kappa);
476  }
477 #endif
478  }
479  else
480  {
481 #ifdef _OPENMP
482  #pragma omp parallel default(shared) private(n)
483  {
484  double *alpha, *beta, *gamma;
485  int nthreads = omp_get_num_threads();
486  int threadid = omp_get_thread_num();
487  #pragma omp single
488  {
489  wisdom.nthreads = nthreads;
490  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
491  }
492 
493  alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
494  beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
495  gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
496 
497  if (threadid == 0)
498  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
499  fpt_flags | FPT_AL_SYMMETRY);
500  else
501  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
502  fpt_flags | FPT_AL_SYMMETRY | FPT_NO_INIT_FPT_DATA);
503 
504  #pragma omp barrier
505 
506  if (threadid != 0)
507  wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
508 
509  /* Perform the first part of precomputation which contains most allocations in one thread */
510  #pragma omp master
511  {
512  for (int n = 0; n <= wisdom.N_MAX; n++)
513  fpt_precompute_1(wisdom.set_threads[0],n,n);
514  }
515  #pragma omp barrier
516 
517  #pragma omp for private(n) schedule(dynamic)
518  for (n = 0; n <= wisdom.N_MAX; n++)
519  {
520  alpha_al_row(alpha,wisdom.N_MAX,n);
521  beta_al_row(beta,wisdom.N_MAX,n);
522  gamma_al_row(gamma,wisdom.N_MAX,n);
523 
524  /* Precompute data for FPT transformation for order n. */
525  fpt_precompute_2(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
526  kappa);
527  }
528  /* Free auxilliary arrays. */
529  nfft_free(alpha);
530  nfft_free(beta);
531  nfft_free(gamma);
532  }
533 #else
534  /* Allocate memory for three-term recursion coefficients. */
535  double *alpha, *beta, *gamma;
536  alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
537  beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
538  gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
539  wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
540  fpt_flags | FPT_AL_SYMMETRY);
541  for (n = 0; n <= wisdom.N_MAX; n++)
542  {
543  /*fprintf(stderr,"%d NO_DIRECT\n",n);
544  fflush(stderr);*/
545  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
546  * gamma_k^n. */
547  alpha_al_row(alpha,wisdom.N_MAX,n);
548  beta_al_row(beta,wisdom.N_MAX,n);
549  gamma_al_row(gamma,wisdom.N_MAX,n);
550 
551  /* Precompute data for FPT transformation for order n. */
552  fpt_precompute(wisdom.set,n,alpha,beta,gamma,n,
553  kappa);
554  }
555  /* Free auxilliary arrays. */
556  nfft_free(alpha);
557  nfft_free(beta);
558  nfft_free(gamma);
559 #endif
560  }
561  }
562 
563  /* Wisdom has been initialised. */
564  wisdom.initialized = true;
565 }
566 
567 void nfsft_forget(void)
568 {
569  /* Check if wisdom has been initialised. */
570  if (wisdom.initialized == false)
571  {
572  /* Nothing to do. */
573  return;
574  }
575 
576  /* Check, if precomputation for direct algorithms has been performed. */
577  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
578  {
579  }
580  else
581  {
582  /* Free arrays holding three-term recurrence coefficients. */
583  nfft_free(wisdom.alpha);
584  nfft_free(wisdom.beta);
585  nfft_free(wisdom.gamma);
586  wisdom.alpha = NULL;
587  wisdom.beta = NULL;
588  wisdom.gamma = NULL;
589  }
590 
591  /* Check, if precomputation for fast algorithms has been performed. */
592  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
593  {
594  }
595  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
596  {
597 #ifdef _OPENMP
598  int k;
599  for (k = 0; k < wisdom.nthreads; k++)
600  fpt_finalize(wisdom.set_threads[k]);
601  nfft_free(wisdom.set_threads);
602 #else
603  /* Free precomputed data for FPT transformation. */
604  fpt_finalize(wisdom.set);
605 #endif
606  }
607 
608  /* Wisdom is now uninitialised. */
609  wisdom.initialized = false;
610 }
611 
612 
614 {
615  if (!plan)
616  return;
617 
618  if (!(plan->flags & NFSFT_NO_FAST_ALGORITHM))
619  {
620  /* Finalise the nfft plan. */
621  nfft_finalize(&plan->plan_nfft);
622  }
623 
624  /* De-allocate memory for auxilliary array of spherical Fourier coefficients,
625  * if neccesary. */
626  if (plan->flags & NFSFT_PRESERVE_F_HAT)
627  {
628  nfft_free(plan->f_hat_intern);
629  }
630 
631  /* De-allocate memory for spherical Fourier coefficients, if necessary. */
632  if (plan->flags & NFSFT_MALLOC_F_HAT)
633  {
634  //fprintf(stderr,"deallocating f_hat\n");
635  nfft_free(plan->f_hat);
636  }
637 
638  /* De-allocate memory for samples, if neccesary. */
639  if (plan->flags & NFSFT_MALLOC_F)
640  {
641  //fprintf(stderr,"deallocating f\n");
642  nfft_free(plan->f);
643  }
644 
645  /* De-allocate memory for nodes, if neccesary. */
646  if (plan->flags & NFSFT_MALLOC_X)
647  {
648  //fprintf(stderr,"deallocating x\n");
649  nfft_free(plan->x);
650  }
651 }
652 
653 static void nfsft_set_f_nan(nfsft_plan *plan)
654 {
655  int m;
656  double nan_value = nan("");
657  for (m = 0; m < plan->M_total; m++)
658  plan->f[m] = nan_value;
659 }
660 
661 void nfsft_trafo_direct(nfsft_plan *plan)
662 {
663  int m; /*< The node index */
664  int k; /*< The degree k */
665  int n; /*< The order n */
666  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
667  double *alpha; /*< Pointer to current three-term recurrence
668  coefficient alpha_k^n for associated Legendre
669  functions P_k^n */
670  double *gamma; /*< Pointer to current three-term recurrence
671  coefficient beta_k^n for associated Legendre
672  functions P_k^n */
673  double _Complex *a; /*< Pointer to auxilliary array for Clenshaw algor. */
674  double stheta; /*< Current angle theta for Clenshaw algorithm */
675  double sphi; /*< Current angle phi for Clenshaw algorithm */
676 
677 #ifdef MEASURE_TIME
678  plan->MEASURE_TIME_t[0] = 0.0;
679  plan->MEASURE_TIME_t[1] = 0.0;
680  plan->MEASURE_TIME_t[2] = 0.0;
681 #endif
682 
683  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
684  {
685  nfsft_set_f_nan(plan);
686  return;
687  }
688 
689  /* Copy spherical Fourier coefficients, if necessary. */
690  if (plan->flags & NFSFT_PRESERVE_F_HAT)
691  {
692  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
693  sizeof(double _Complex));
694  }
695  else
696  {
697  plan->f_hat_intern = plan->f_hat;
698  }
699 
700  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
701  * multiply spherical Fourier coefficients with corresponding normalization
702  * weight. */
703  if (plan->flags & NFSFT_NORMALIZED)
704  {
705  /* Traverse Fourier coefficients array. */
706 #ifdef _OPENMP
707  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
708 #endif
709  for (k = 0; k <= plan->N; k++)
710  {
711  for (n = -k; n <= k; n++)
712  {
713  /* Multiply with normalization weight. */
714  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
715  sqrt((2*k+1)/(4.0*KPI));
716  }
717  }
718  }
719 
720  /* Distinguish by bandwidth M. */
721  if (plan->N == 0)
722  {
723  /* N = 0 */
724 
725  /* Constant function */
726  for (m = 0; m < plan->M_total; m++)
727  {
728  plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
729  }
730  }
731  else
732  {
733  /* N > 0 */
734 
735  /* Evaluate
736  * \sum_{k=0}^N \sum_{n=-k}^k a_k^n P_k^{|n|}(cos theta_m) e^{i n phi_m}
737  * = \sum_{n=-N}^N \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
738  * e^{i n phi_m}.
739  */
740 #ifdef _OPENMP
741  #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k)
742 #endif
743  for (m = 0; m < plan->M_total; m++)
744  {
745  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
746  stheta = cos(2.0*KPI*plan->x[2*m+1]);
747  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
748  sphi = 2.0*KPI*plan->x[2*m];
749 
750  /* Initialize result for current node. */
751  plan->f[m] = 0.0;
752 
753  /* For n = -N,...,N, evaluate
754  * b_n := \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
755  * using Clenshaw's algorithm.
756  */
757  for (n = -plan->N; n <= plan->N; n++)
758  {
759  /* Get pointer to Fourier coefficients vector for current order n. */
760  a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
761 
762  /* Take absolute value of n. */
763  n_abs = abs(n);
764 
765  /* Get pointers to three-term recurrence coefficients arrays. */
766  alpha = &(wisdom.alpha[ROW(n_abs)]);
767  gamma = &(wisdom.gamma[ROW(n_abs)]);
768 
769  if (plan->N > 1024)
770  {
771  /* Clenshaw's algorithm */
772  long double _Complex it2 = a[plan->N];
773  long double _Complex it1 = a[plan->N-1];
774  for (k = plan->N; k > n_abs + 1; k--)
775  {
776  long double _Complex temp = a[k-2] + it2 * gamma[k];
777  it2 = it1 + it2 * alpha[k] * stheta;
778  it1 = temp;
779  }
780 
781  /* Compute final step if neccesary. */
782  if (n_abs < plan->N)
783  {
784  it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
785  }
786 
787  /* Compute final result by multiplying the fixed part
788  * gamma_|n| (1-cos^2(theta))^{|n|/2}
789  * for order n and the exponential part
790  * e^{i n phi}
791  * and add the result to f_m.
792  */
793  long double _Complex result = it2 * wisdom.gamma[ROW(n_abs)] *
794  powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
795 
796  plan->f[m] += result;
797  }
798  else
799  {
800  /* Clenshaw's algorithm */
801  double _Complex it2 = a[plan->N];
802  double _Complex it1 = a[plan->N-1];
803  for (k = plan->N; k > n_abs + 1; k--)
804  {
805  double _Complex temp = a[k-2] + it2 * gamma[k];
806  it2 = it1 + it2 * alpha[k] * stheta;
807  it1 = temp;
808  }
809 
810  /* Compute final step if neccesary. */
811  if (n_abs < plan->N)
812  {
813  it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
814  }
815 
816  /* Compute final result by multiplying the fixed part
817  * gamma_|n| (1-cos^2(theta))^{|n|/2}
818  * for order n and the exponential part
819  * e^{i n phi}
820  * and add the result to f_m.
821  */
822  plan->f[m] += it2 * wisdom.gamma[ROW(n_abs)] *
823  pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
824  }
825  }
826 
827  /* Write result f_m for current node to array f. */
828 // plan->f[m] = f_m;
829  }
830  }
831 }
832 
833 static void nfsft_set_f_hat_nan(nfsft_plan *plan)
834 {
835  int k, n;
836  double nan_value = nan("");
837  for (k = 0; k <= plan->N; k++)
838  for (n = -k; n <= k; n++)
839  plan->f_hat[NFSFT_INDEX(k,n,plan)] = nan_value;
840 }
841 
842 void nfsft_adjoint_direct(nfsft_plan *plan)
843 {
844  int m; /*< The node index */
845  int k; /*< The degree k */
846  int n; /*< The order n */
847  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
848  double *alpha; /*< Pointer to current three-term recurrence
849  coefficient alpha_k^n for associated Legendre
850  functions P_k^n */
851  double *gamma; /*< Pointer to current three-term recurrence
852  coefficient beta_k^n for associated Legendre
853  functions P_k^n */
854 
855 #ifdef MEASURE_TIME
856  plan->MEASURE_TIME_t[0] = 0.0;
857  plan->MEASURE_TIME_t[1] = 0.0;
858  plan->MEASURE_TIME_t[2] = 0.0;
859 #endif
860 
861  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
862  {
863  nfsft_set_f_hat_nan(plan);
864  return;
865  }
866 
867  /* Initialise spherical Fourier coefficients array with zeros. */
868  memset(plan->f_hat,0U,plan->N_total*sizeof(double _Complex));
869 
870  /* Distinguish by bandwidth N. */
871  if (plan->N == 0)
872  {
873  /* N == 0 */
874 
875  /* Constant function */
876  for (m = 0; m < plan->M_total; m++)
877  {
878  plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
879  }
880  }
881  else
882  {
883  /* N > 0 */
884 
885 #ifdef _OPENMP
886  /* Traverse all orders n. */
887  #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic)
888  for (n = -plan->N; n <= plan->N; n++)
889  {
890  /* Take absolute value of n. */
891  n_abs = abs(n);
892 
893  /* Get pointers to three-term recurrence coefficients arrays. */
894  alpha = &(wisdom.alpha[ROW(n_abs)]);
895  gamma = &(wisdom.gamma[ROW(n_abs)]);
896 
897  /* Traverse all nodes. */
898  for (m = 0; m < plan->M_total; m++)
899  {
900  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
901  double stheta = cos(2.0*KPI*plan->x[2*m+1]);
902  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
903  double sphi = 2.0*KPI*plan->x[2*m];
904 
905  /* Transposed Clenshaw algorithm */
906  if (plan->N > 1024)
907  {
908  /* Initial step */
909  long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
910  powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
911  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
912  long double _Complex it2 = 0.0;
913 
914  if (n_abs < plan->N)
915  {
916  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
917  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
918  }
919 
920  /* Loop for transposed Clenshaw algorithm */
921  for (k = n_abs+2; k <= plan->N; k++)
922  {
923  long double _Complex temp = it2;
924  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
925  it1 = temp;
926  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
927  }
928  }
929  else
930  {
931  /* Initial step */
932  double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
933  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
934  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
935  double _Complex it2 = 0.0;
936 
937  if (n_abs < plan->N)
938  {
939  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
940  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
941  }
942 
943  /* Loop for transposed Clenshaw algorithm */
944  for (k = n_abs+2; k <= plan->N; k++)
945  {
946  double _Complex temp = it2;
947  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
948  it1 = temp;
949  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
950  }
951  }
952  }
953  }
954 #else
955  /* Traverse all nodes. */
956  for (m = 0; m < plan->M_total; m++)
957  {
958  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
959  double stheta = cos(2.0*KPI*plan->x[2*m+1]);
960  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
961  double sphi = 2.0*KPI*plan->x[2*m];
962 
963  /* Traverse all orders n. */
964  for (n = -plan->N; n <= plan->N; n++)
965  {
966  /* Take absolute value of n. */
967  n_abs = abs(n);
968 
969  /* Get pointers to three-term recurrence coefficients arrays. */
970  alpha = &(wisdom.alpha[ROW(n_abs)]);
971  gamma = &(wisdom.gamma[ROW(n_abs)]);
972 
973  /* Transposed Clenshaw algorithm */
974  if (plan->N > 1024)
975  {
976  /* Initial step */
977  long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
978  powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
979  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
980  long double _Complex it2 = 0.0;
981 
982  if (n_abs < plan->N)
983  {
984  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
985  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
986  }
987 
988  /* Loop for transposed Clenshaw algorithm */
989  for (k = n_abs+2; k <= plan->N; k++)
990  {
991  long double _Complex temp = it2;
992  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
993  it1 = temp;
994  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
995  }
996  }
997  else
998  {
999  /* Initial step */
1000  double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
1001  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1002  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
1003  double _Complex it2 = 0.0;
1004 
1005  if (n_abs < plan->N)
1006  {
1007  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
1008  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
1009  }
1010 
1011  /* Loop for transposed Clenshaw algorithm */
1012  for (k = n_abs+2; k <= plan->N; k++)
1013  {
1014  double _Complex temp = it2;
1015  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1016  it1 = temp;
1017  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
1018  }
1019  }
1020  }
1021  }
1022 #endif
1023  }
1024 
1025  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1026  * multiply spherical Fourier coefficients with corresponding normalization
1027  * weight. */
1028  if (plan->flags & NFSFT_NORMALIZED)
1029  {
1030  /* Traverse Fourier coefficients array. */
1031 #ifdef _OPENMP
1032  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1033 #endif
1034  for (k = 0; k <= plan->N; k++)
1035  {
1036  for (n = -k; n <= k; n++)
1037  {
1038  /* Multiply with normalization weight. */
1039  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1040  sqrt((2*k+1)/(4.0*KPI));
1041  }
1042  }
1043  }
1044 
1045  /* Set unused coefficients to zero. */
1046  if (plan->flags & NFSFT_ZERO_F_HAT)
1047  {
1048  for (n = -plan->N; n <= plan->N+1; n++)
1049  {
1050  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1051  (plan->N+1+abs(n))*sizeof(double _Complex));
1052  }
1053  }
1054 }
1055 
1057 {
1058  int k; /*< The degree k */
1059  int n; /*< The order n */
1060 #ifdef MEASURE_TIME
1061  ticks t0, t1;
1062 #endif
1063  #ifdef DEBUG
1064  double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1065  t_pre = 0.0;
1066  t_norm = 0.0;
1067  t_fpt = 0.0;
1068  t_c2e = 0.0;
1069  t_nfft = 0.0;
1070  #endif
1071 
1072 #ifdef MEASURE_TIME
1073  plan->MEASURE_TIME_t[0] = 0.0;
1074  plan->MEASURE_TIME_t[1] = 0.0;
1075  plan->MEASURE_TIME_t[2] = 0.0;
1076 #endif
1077 
1078  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1079  {
1080  nfsft_set_f_nan(plan);
1081  return;
1082  }
1083 
1084  if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
1085  {
1086  nfsft_set_f_nan(plan);
1087  return;
1088  }
1089 
1090  /* Check, if precomputation was done and that the bandwidth N is not too
1091  * big.
1092  */
1093  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1094  {
1095  nfsft_set_f_nan(plan);
1096  return;
1097  }
1098 
1099  /* Check, if slow transformation should be used due to small bandwidth. */
1100  if (plan->N < NFSFT_BREAK_EVEN)
1101  {
1102  /* Use NDSFT. */
1103  nfsft_trafo_direct(plan);
1104  }
1105 
1106  /* Check for correct value of the bandwidth N. */
1107  else if (plan->N <= wisdom.N_MAX)
1108  {
1109  /* Copy spherical Fourier coefficients, if necessary. */
1110  if (plan->flags & NFSFT_PRESERVE_F_HAT)
1111  {
1112  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
1113  sizeof(double _Complex));
1114  }
1115  else
1116  {
1117  plan->f_hat_intern = plan->f_hat;
1118  }
1119 
1120  /* Propagate pointer values to the internal NFFT plan to assure
1121  * consistency. Pointers may have been modified externally.
1122  */
1123  plan->plan_nfft.x = plan->x;
1124  plan->plan_nfft.f = plan->f;
1125  plan->plan_nfft.f_hat = plan->f_hat_intern;
1126 
1127  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1128  * multiply spherical Fourier coefficients with corresponding normalization
1129  * weight. */
1130  if (plan->flags & NFSFT_NORMALIZED)
1131  {
1132  /* Traverse Fourier coefficients array. */
1133 #ifdef _OPENMP
1134  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1135 #endif
1136  for (k = 0; k <= plan->N; k++)
1137  {
1138  for (n = -k; n <= k; n++)
1139  {
1140  /* Multiply with normalization weight. */
1141  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
1142  sqrt((2*k+1)/(4.0*KPI));
1143  }
1144  }
1145  }
1146 
1147 #ifdef MEASURE_TIME
1148  t0 = getticks();
1149 #endif
1150  /* Check, which polynomial transform algorithm should be used. */
1151  if (plan->flags & NFSFT_USE_DPT)
1152  {
1153 #ifdef _OPENMP
1154  n = 0;
1155  fpt_trafo_direct(wisdom.set_threads[0],abs(n),
1156  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1157  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1158  plan->N,0U);
1159 
1160  int n_abs;
1161  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1162  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1163  {
1164  int threadid = omp_get_thread_num();
1165  n = -n_abs;
1166  fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1167  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1168  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1169  plan->N,0U);
1170  n = n_abs;
1171  fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1172  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1173  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1174  plan->N,0U);
1175  }
1176 #else
1177  /* Use direct discrete polynomial transform DPT. */
1178  for (n = -plan->N; n <= plan->N; n++)
1179  {
1180  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1181  //fflush(stderr);
1182  fpt_trafo_direct(wisdom.set,abs(n),
1183  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1184  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1185  plan->N,0U);
1186  }
1187 #endif
1188  }
1189  else
1190  {
1191 #ifdef _OPENMP
1192  n = 0;
1193  fpt_trafo(wisdom.set_threads[0],abs(n),
1194  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1195  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1196  plan->N,0U);
1197 
1198  int n_abs;
1199  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1200  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1201  {
1202  int threadid = omp_get_thread_num();
1203  n = -n_abs;
1204  fpt_trafo(wisdom.set_threads[threadid],abs(n),
1205  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1206  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1207  plan->N,0U);
1208  n = n_abs;
1209  fpt_trafo(wisdom.set_threads[threadid],abs(n),
1210  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1211  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1212  plan->N,0U);
1213  }
1214 #else
1215  /* Use fast polynomial transform FPT. */
1216  for (n = -plan->N; n <= plan->N; n++)
1217  {
1218  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1219  //fflush(stderr);
1220  fpt_trafo(wisdom.set,abs(n),
1221  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1222  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1223  plan->N,0U);
1224  }
1225 #endif
1226  }
1227 #ifdef MEASURE_TIME
1228  t1 = getticks();
1229  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1230 #endif
1231 
1232 #ifdef MEASURE_TIME
1233  t0 = getticks();
1234 #endif
1235  /* Convert Chebyshev coefficients to Fourier coefficients. */
1236  c2e(plan);
1237 #ifdef MEASURE_TIME
1238  t1 = getticks();
1239  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1240 #endif
1241 
1242 #ifdef MEASURE_TIME
1243  t0 = getticks();
1244 #endif
1245  /* Check, which nonequispaced discrete Fourier transform algorithm should
1246  * be used.
1247  */
1248  if (plan->flags & NFSFT_USE_NDFT)
1249  {
1250  /* Use NDFT. */
1251  nfft_trafo_direct(&plan->plan_nfft);
1252  }
1253  else
1254  {
1255  /* Use NFFT. */
1256  //fprintf(stderr,"nfsft_adjoint: nfft_trafo\n");
1257  nfft_trafo_2d(&plan->plan_nfft);
1258  }
1259 #ifdef MEASURE_TIME
1260  t1 = getticks();
1261  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1262 #endif
1263  }
1264 }
1265 
1267 {
1268  int k; /*< The degree k */
1269  int n; /*< The order n */
1270 #ifdef MEASURE_TIME
1271  ticks t0, t1;
1272 #endif
1273 
1274 #ifdef MEASURE_TIME
1275  plan->MEASURE_TIME_t[0] = 0.0;
1276  plan->MEASURE_TIME_t[1] = 0.0;
1277  plan->MEASURE_TIME_t[2] = 0.0;
1278 #endif
1279 
1280  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1281  {
1282  nfsft_set_f_hat_nan(plan);
1283  return;
1284  }
1285 
1286  if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
1287  {
1288  nfsft_set_f_hat_nan(plan);
1289  return;
1290  }
1291 
1292  /* Check, if precomputation was done and that the bandwidth N is not too
1293  * big.
1294  */
1295  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1296  {
1297  nfsft_set_f_hat_nan(plan);
1298  return;
1299  }
1300 
1301  /* Check, if slow transformation should be used due to small bandwidth. */
1302  if (plan->N < NFSFT_BREAK_EVEN)
1303  {
1304  /* Use adjoint NDSFT. */
1305  nfsft_adjoint_direct(plan);
1306  }
1307  /* Check for correct value of the bandwidth N. */
1308  else if (plan->N <= wisdom.N_MAX)
1309  {
1310  //fprintf(stderr,"nfsft_adjoint: Starting\n");
1311  //fflush(stderr);
1312  /* Propagate pointer values to the internal NFFT plan to assure
1313  * consistency. Pointers may have been modified externally.
1314  */
1315  plan->plan_nfft.x = plan->x;
1316  plan->plan_nfft.f = plan->f;
1317  plan->plan_nfft.f_hat = plan->f_hat;
1318 
1319 #ifdef MEASURE_TIME
1320  t0 = getticks();
1321 #endif
1322  /* Check, which adjoint nonequispaced discrete Fourier transform algorithm
1323  * should be used.
1324  */
1325  if (plan->flags & NFSFT_USE_NDFT)
1326  {
1327  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint_direct\n");
1328  //fflush(stderr);
1329  /* Use adjoint NDFT. */
1330  nfft_adjoint_direct(&plan->plan_nfft);
1331  }
1332  else
1333  {
1334  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint\n");
1335  //fflush(stderr);
1336  //fprintf(stderr,"nfsft_adjoint: nfft_adjoint\n");
1337  /* Use adjoint NFFT. */
1338  nfft_adjoint_2d(&plan->plan_nfft);
1339  }
1340 #ifdef MEASURE_TIME
1341  t1 = getticks();
1342  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1343 #endif
1344 
1345  //fprintf(stderr,"nfsft_adjoint: Executing c2e_transposed\n");
1346  //fflush(stderr);
1347 #ifdef MEASURE_TIME
1348  t0 = getticks();
1349 #endif
1350  /* Convert Fourier coefficients to Chebyshev coefficients. */
1351  c2e_transposed(plan);
1352 #ifdef MEASURE_TIME
1353  t1 = getticks();
1354  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1355 #endif
1356 
1357 #ifdef MEASURE_TIME
1358  t0 = getticks();
1359 #endif
1360  /* Check, which transposed polynomial transform algorithm should be used */
1361  if (plan->flags & NFSFT_USE_DPT)
1362  {
1363 #ifdef _OPENMP
1364  n = 0;
1365  fpt_transposed_direct(wisdom.set_threads[0],abs(n),
1366  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1367  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1368  plan->N,0U);
1369 
1370  int n_abs;
1371  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1372  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1373  {
1374  int threadid = omp_get_thread_num();
1375  n = -n_abs;
1376  fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1377  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1378  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1379  plan->N,0U);
1380  n = n_abs;
1381  fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1382  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1383  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1384  plan->N,0U);
1385  }
1386 #else
1387  /* Use transposed DPT. */
1388  for (n = -plan->N; n <= plan->N; n++)
1389  {
1390  //fprintf(stderr,"nfsft_adjoint: Executing dpt_transposed\n");
1391  //fflush(stderr);
1392  fpt_transposed_direct(wisdom.set,abs(n),
1393  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1394  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1395  plan->N,0U);
1396  }
1397 #endif
1398  }
1399  else
1400  {
1401 #ifdef _OPENMP
1402  n = 0;
1403  fpt_transposed(wisdom.set_threads[0],abs(n),
1404  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1405  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1406  plan->N,0U);
1407 
1408  int n_abs;
1409  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1410  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1411  {
1412  int threadid = omp_get_thread_num();
1413  n = -n_abs;
1414  fpt_transposed(wisdom.set_threads[threadid],abs(n),
1415  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1416  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1417  plan->N,0U);
1418  n = n_abs;
1419  fpt_transposed(wisdom.set_threads[threadid],abs(n),
1420  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1421  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1422  plan->N,0U);
1423  }
1424 #else
1425  //fprintf(stderr,"nfsft_adjoint: fpt_transposed\n");
1426  /* Use transposed FPT. */
1427  for (n = -plan->N; n <= plan->N; n++)
1428  {
1429  //fprintf(stderr,"nfsft_adjoint: Executing fpt_transposed\n");
1430  //fflush(stderr);
1431  fpt_transposed(wisdom.set,abs(n),
1432  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1433  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1434  plan->N,0U);
1435  }
1436 #endif
1437  }
1438 #ifdef MEASURE_TIME
1439  t1 = getticks();
1440  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1441 #endif
1442 
1443  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1444  * multiply spherical Fourier coefficients with corresponding normalization
1445  * weight. */
1446  if (plan->flags & NFSFT_NORMALIZED)
1447  {
1448  //fprintf(stderr,"nfsft_adjoint: Normalizing\n");
1449  //fflush(stderr);
1450  /* Traverse Fourier coefficients array. */
1451 #ifdef _OPENMP
1452  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1453 #endif
1454  for (k = 0; k <= plan->N; k++)
1455  {
1456  for (n = -k; n <= k; n++)
1457  {
1458  /* Multiply with normalization weight. */
1459  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1460  sqrt((2*k+1)/(4.0*KPI));
1461  }
1462  }
1463  }
1464 
1465  /* Set unused coefficients to zero. */
1466  if (plan->flags & NFSFT_ZERO_F_HAT)
1467  {
1468  //fprintf(stderr,"nfsft_adjoint: Setting to zero\n");
1469  //fflush(stderr);
1470  for (n = -plan->N; n <= plan->N+1; n++)
1471  {
1472  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1473  (plan->N+1+abs(n))*sizeof(double _Complex));
1474  }
1475  }
1476  //fprintf(stderr,"nfsft_adjoint: Finished\n");
1477  //fflush(stderr);
1478  }
1479 }
1480 
1481 void nfsft_precompute_x(nfsft_plan *plan)
1482 {
1483  if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
1484  return;
1485 
1486  /* Pass angle array to NFFT plan. */
1487  plan->plan_nfft.x = plan->x;
1488 
1489  /* Precompute. */
1490  if (plan->plan_nfft.flags & PRE_ONE_PSI)
1492 }
1493 /* \} */
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
Definition: nfsft.c:267
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
Definition: nfsft.c:115
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
#define NFSFT_MALLOC_F_HAT
Definition: nfft3.h:579
double * x
the nodes for ,
Definition: nfft3.h:574
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
Definition: nfft3.h:632
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:192
#define NFSFT_NORMALIZED
Definition: nfft3.h:575
#define NFSFT_MALLOC_X
Definition: nfft3.h:578
void nfsft_trafo(nfsft_plan *plan)
Definition: nfsft.c:1056
#define NFSFT_PRESERVE_F_HAT
Definition: nfft3.h:581
Wisdom structure.
void nfsft_adjoint(nfsft_plan *plan)
Definition: nfsft.c:1266
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:574
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
int N
the bandwidth
Definition: nfft3.h:574
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
Definition: fpt.c:1740
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition: nfsft.c:364
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition: nfft3.h:574
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
Definition: nfsft.c:79
#define NFSFT_NO_FAST_ALGORITHM
Definition: nfft3.h:590
#define NFSFT_ZERO_F_HAT
Definition: nfft3.h:591
int N_MAX
Stores precomputation flags.
bool initialized
Indicates wether the structure has been initialized.
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
Definition: fpt.c:1307
void(* mv_adjoint)(void *)
Adjoint transform.
Definition: nfft3.h:574
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:89
unsigned int flags
the planner flags
Definition: nfft3.h:574
void nfsft_init(nfsft_plan *plan, int N, int M)
Definition: nfsft.c:260
Holds data for a set of cascade summations.
Definition: fpt.h:65
fftw_complex * f
Samples.
Definition: nfft3.h:192
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
Definition: nfsft.c:65
void nfft_free(void *p)
#define FFTW_INIT
Definition: nfft3.h:203
nfft_plan plan_nfft
the internal NFFT plan
Definition: nfft3.h:574
NFFT_INT M_total
Total number of samples.
Definition: nfft3.h:574
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
Definition: nfft3.h:574
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
#define PRE_PSI
Definition: nfft3.h:197
#define NFSFT_NO_DIRECT_ALGORITHM
Definition: nfft3.h:589
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
Definition: nfft3.h:574
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void nfft_finalize(nfft_plan *ths)
fftw_complex * f
Samples.
Definition: nfft3.h:574
void nfft_precompute_one_psi(nfft_plan *ths)
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
Definition: fpt.c:795
#define NFSFT_MALLOC_F
Definition: nfft3.h:580
#define NFSFT_USE_NDFT
Definition: nfft3.h:576
void nfsft_finalize(nfsft_plan *plan)
Definition: nfsft.c:613
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:98
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
Definition: nfsft.c:193
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:107
Header file for the nfft3 library.
double * x
Nodes in time/spatial domain, size is doubles.
Definition: nfft3.h:192
#define PRE_PHI_HUT
Definition: nfft3.h:193
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
Definition: nfft3.h:637
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
Definition: nfsft.c:90
#define PRE_ONE_PSI
Definition: nfft3.h:206
unsigned flags
Flags for precomputation, (de)allocation, and FFTW usage, default setting is PRE_PHI_HUT | PRE_PSI | ...
Definition: nfft3.h:192
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
Definition: nfft3.h:574
int T_MAX
The logarithm /f$t = N_{{max}}/f$ of the maximum bandwidth.
#define NFSFT_INDEX(k, n, plan)
Definition: nfft3.h:594
void(* mv_trafo)(void *)
Transform.
Definition: nfft3.h:574
void nfsft_forget(void)
Definition: nfsft.c:567
#define NFSFT_USE_DPT
Definition: nfft3.h:577