53 #include "../fpt/fpt.h" 65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6 72 #define NFSFT_DEFAULT_THRESHOLD 1000 79 #define NFSFT_BREAK_EVEN 5 119 double _Complex last;
129 memset(plan->
f_hat_intern,0U,(2*plan->
N+2)*
sizeof(
double _Complex));
132 lowe = -plan->
N + (plan->
N%2);
136 for (n = lowe; n <= upe; n += 2)
142 for(k = 1; k <= plan->
N; k++)
153 low = -plan->
N + (1-plan->
N%2);
158 for (n = low; n <= up; n += 2)
170 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
172 for (k = plan->
N-1; k > 0; k--)
175 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
197 double _Complex last;
207 lowe = -plan->
N + (plan->
N%2);
211 for (n = lowe; n <= upe; n += 2)
217 for(k = 1; k <= plan->
N; k++)
225 low = -plan->
N + (1-plan->
N%2);
229 for (n = low; n <= up; n += 2)
235 for(k = 1; k <= plan->
N; k++)
247 for (k = 2; k < plan->
N; k++)
250 *xp = -0.25 * _Complex_I * (xp[1] - last);
254 *xp = 0.25 * _Complex_I * last;
275 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
276 unsigned int nfft_flags,
int nfft_cutoff)
294 plan->
N_total = (2*plan->
N+2)*(2*plan->
N+2);
301 sizeof(
double _Complex));
308 sizeof(
double _Complex));
333 nfft_size[0] = 2*plan->
N+2;
334 nfft_size[1] = 2*plan->
N+2;
335 fftw_size[0] = 4*plan->
N;
336 fftw_size[1] = 4*plan->
N;
340 nfft_cutoff, nfft_flags,
341 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
365 unsigned int fpt_flags)
376 #pragma omp parallel default(shared) 378 int nthreads = omp_get_num_threads();
381 wisdom.nthreads = nthreads;
387 wisdom.flags = nfsft_flags;
391 X(next_power_of_2_exp_int)(N,&wisdom.
N_MAX,&wisdom.
T_MAX);
426 if (wisdom.
alpha != NULL)
429 #pragma omp parallel default(shared) private(n) 431 int nthreads = omp_get_num_threads();
432 int threadid = omp_get_thread_num();
435 wisdom.nthreads = nthreads;
449 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
454 for (
int n = 0; n <= wisdom.
N_MAX; n++)
455 fpt_precompute_1(wisdom.set_threads[0],n,n);
459 #pragma omp for private(n) schedule(dynamic) 460 for (n = 0; n <= wisdom.
N_MAX; n++)
461 fpt_precompute_2(wisdom.set_threads[threadid],n,&wisdom.
alpha[ROW(n)],
462 &wisdom.
beta[ROW(n)], &wisdom.
gamma[ROW(n)],n,kappa);
469 for (n = 0; n <= wisdom.
N_MAX; n++)
475 &wisdom.
gamma[ROW(n)],n,kappa);
482 #pragma omp parallel default(shared) private(n) 485 int nthreads = omp_get_num_threads();
486 int threadid = omp_get_thread_num();
489 wisdom.nthreads = nthreads;
507 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
512 for (
int n = 0; n <= wisdom.
N_MAX; n++)
513 fpt_precompute_1(wisdom.set_threads[0],n,n);
517 #pragma omp for private(n) schedule(dynamic) 518 for (n = 0; n <= wisdom.
N_MAX; n++)
520 alpha_al_row(alpha,wisdom.
N_MAX,n);
521 beta_al_row(beta,wisdom.
N_MAX,n);
522 gamma_al_row(gamma,wisdom.
N_MAX,n);
525 fpt_precompute_2(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
541 for (n = 0; n <= wisdom.
N_MAX; n++)
547 alpha_al_row(alpha,wisdom.
N_MAX,n);
548 beta_al_row(beta,wisdom.
N_MAX,n);
549 gamma_al_row(gamma,wisdom.
N_MAX,n);
599 for (k = 0; k < wisdom.nthreads; k++)
600 fpt_finalize(wisdom.set_threads[k]);
604 fpt_finalize(wisdom.
set);
656 double nan_value = nan(
"");
657 for (m = 0; m < plan->
M_total; m++)
658 plan->
f[m] = nan_value;
685 nfsft_set_f_nan(plan);
693 sizeof(
double _Complex));
707 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 709 for (k = 0; k <= plan->
N; k++)
711 for (n = -k; n <= k; n++)
715 sqrt((2*k+1)/(4.0*KPI));
726 for (m = 0; m < plan->
M_total; m++)
741 #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k) 743 for (m = 0; m < plan->
M_total; m++)
746 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
748 sphi = 2.0*KPI*plan->
x[2*m];
757 for (n = -plan->
N; n <= plan->N; n++)
766 alpha = &(wisdom.
alpha[ROW(n_abs)]);
767 gamma = &(wisdom.
gamma[ROW(n_abs)]);
772 long double _Complex it2 = a[plan->
N];
773 long double _Complex it1 = a[plan->
N-1];
774 for (k = plan->
N; k > n_abs + 1; k--)
776 long double _Complex temp = a[k-2] + it2 * gamma[k];
777 it2 = it1 + it2 * alpha[k] * stheta;
784 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
793 long double _Complex result = it2 * wisdom.
gamma[ROW(n_abs)] *
794 powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
796 plan->
f[m] += result;
801 double _Complex it2 = a[plan->
N];
802 double _Complex it1 = a[plan->
N-1];
803 for (k = plan->
N; k > n_abs + 1; k--)
805 double _Complex temp = a[k-2] + it2 * gamma[k];
806 it2 = it1 + it2 * alpha[k] * stheta;
813 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
822 plan->
f[m] += it2 * wisdom.
gamma[ROW(n_abs)] *
823 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
833 static void nfsft_set_f_hat_nan(
nfsft_plan *plan)
836 double nan_value = nan(
"");
837 for (k = 0; k <= plan->
N; k++)
838 for (n = -k; n <= k; n++)
863 nfsft_set_f_hat_nan(plan);
868 memset(plan->
f_hat,0U,plan->
N_total*
sizeof(
double _Complex));
876 for (m = 0; m < plan->
M_total; m++)
887 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic) 888 for (n = -plan->
N; n <= plan->N; n++)
894 alpha = &(wisdom.
alpha[ROW(n_abs)]);
895 gamma = &(wisdom.
gamma[ROW(n_abs)]);
898 for (m = 0; m < plan->
M_total; m++)
901 double stheta = cos(2.0*KPI*plan->
x[2*m+1]);
903 double sphi = 2.0*KPI*plan->
x[2*m];
909 long double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
910 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
912 long double _Complex it2 = 0.0;
916 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
921 for (k = n_abs+2; k <= plan->
N; k++)
923 long double _Complex temp = it2;
924 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
932 double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
933 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
935 double _Complex it2 = 0.0;
939 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
944 for (k = n_abs+2; k <= plan->
N; k++)
946 double _Complex temp = it2;
947 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
956 for (m = 0; m < plan->
M_total; m++)
959 double stheta = cos(2.0*KPI*plan->
x[2*m+1]);
961 double sphi = 2.0*KPI*plan->
x[2*m];
964 for (n = -plan->
N; n <= plan->N; n++)
970 alpha = &(wisdom.
alpha[ROW(n_abs)]);
971 gamma = &(wisdom.
gamma[ROW(n_abs)]);
977 long double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
978 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
980 long double _Complex it2 = 0.0;
984 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
989 for (k = n_abs+2; k <= plan->
N; k++)
991 long double _Complex temp = it2;
992 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1000 double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
1001 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1003 double _Complex it2 = 0.0;
1005 if (n_abs < plan->N)
1007 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
1012 for (k = n_abs+2; k <= plan->
N; k++)
1014 double _Complex temp = it2;
1015 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1032 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1034 for (k = 0; k <= plan->
N; k++)
1036 for (n = -k; n <= k; n++)
1040 sqrt((2*k+1)/(4.0*KPI));
1048 for (n = -plan->
N; n <= plan->N+1; n++)
1051 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
1064 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1080 nfsft_set_f_nan(plan);
1086 nfsft_set_f_nan(plan);
1095 nfsft_set_f_nan(plan);
1103 nfsft_trafo_direct(plan);
1107 else if (plan->
N <= wisdom.
N_MAX)
1113 sizeof(
double _Complex));
1134 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1136 for (k = 0; k <= plan->
N; k++)
1138 for (n = -k; n <= k; n++)
1142 sqrt((2*k+1)/(4.0*KPI));
1155 fpt_trafo_direct(wisdom.set_threads[0],abs(n),
1161 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1162 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1164 int threadid = omp_get_thread_num();
1166 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1171 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1178 for (n = -plan->
N; n <= plan->N; n++)
1182 fpt_trafo_direct(wisdom.
set,abs(n),
1193 fpt_trafo(wisdom.set_threads[0],abs(n),
1199 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1200 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1202 int threadid = omp_get_thread_num();
1204 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1209 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1216 for (n = -plan->
N; n <= plan->N; n++)
1220 fpt_trafo(wisdom.
set,abs(n),
1282 nfsft_set_f_hat_nan(plan);
1288 nfsft_set_f_hat_nan(plan);
1297 nfsft_set_f_hat_nan(plan);
1305 nfsft_adjoint_direct(plan);
1308 else if (plan->
N <= wisdom.
N_MAX)
1365 fpt_transposed_direct(wisdom.set_threads[0],abs(n),
1371 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1372 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1374 int threadid = omp_get_thread_num();
1376 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1381 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1388 for (n = -plan->
N; n <= plan->N; n++)
1392 fpt_transposed_direct(wisdom.
set,abs(n),
1409 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1410 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1412 int threadid = omp_get_thread_num();
1427 for (n = -plan->
N; n <= plan->N; n++)
1452 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1454 for (k = 0; k <= plan->
N; k++)
1456 for (n = -k; n <= k; n++)
1460 sqrt((2*k+1)/(4.0*KPI));
1470 for (n = -plan->
N; n <= plan->N+1; n++)
1473 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
#define NFSFT_MALLOC_F_HAT
double * x
the nodes for ,
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
fftw_complex * f_hat
Fourier coefficients.
void nfsft_trafo(nfsft_plan *plan)
#define NFSFT_PRESERVE_F_HAT
void nfsft_adjoint(nfsft_plan *plan)
fftw_complex * f_hat
Fourier coefficients.
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
#define NFSFT_NO_FAST_ALGORITHM
int N_MAX
Stores precomputation flags.
bool initialized
Indicates wether the structure has been initialized.
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
void(* mv_adjoint)(void *)
Adjoint transform.
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
unsigned int flags
the planner flags
void nfsft_init(nfsft_plan *plan, int N, int M)
Holds data for a set of cascade summations.
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
nfft_plan plan_nfft
the internal NFFT plan
NFFT_INT M_total
Total number of samples.
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
#define X(name)
Include header for C99 complex datatype.
#define NFSFT_NO_DIRECT_ALGORITHM
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void nfft_finalize(nfft_plan *ths)
void nfft_precompute_one_psi(nfft_plan *ths)
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
void nfsft_finalize(nfsft_plan *plan)
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Header file for the nfft3 library.
double * x
Nodes in time/spatial domain, size is doubles.
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
unsigned flags
Flags for precomputation, (de)allocation, and FFTW usage, default setting is PRE_PHI_HUT | PRE_PSI | ...
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
int T_MAX
The logarithm /f$t = N_{{max}}/f$ of the maximum bandwidth.
#define NFSFT_INDEX(k, n, plan)
void(* mv_trafo)(void *)
Transform.