31 #include "../fpt/fpt.h" 37 #define DEFAULT_NFFT_CUTOFF 6 38 #define FPT_THRESHOLD 1000 40 #define NFSOFT_INDEX_TWO(m,n,l,B) ((B+1)*(B+1)+(B+1)*(B+1)*(m+B)-((m-1)*m*(2*m-1)+(B+1)*(B+2)*(2*B+3))/6)+(posN(n,m,B))+(l-MAX(ABS(m),ABS(n))) 42 static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads);
43 static int posN(
int n,
int m,
int B);
52 unsigned int nfsoft_flags)
56 DEFAULT_NFFT_CUTOFF, FPT_THRESHOLD);
60 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
67 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
68 int fpt_kappa,
int nn_oversampled)
77 n[0] = nn_oversampled ;
78 n[1] = nn_oversampled ;
79 n[2] = nn_oversampled ;
81 nfft_init_guru(&plan->
p_nfft, 3, N, M, n, nfft_cutoff, nfft_flags,
82 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
91 plan->
flags = nfsoft_flags;
96 if (plan->
f_hat == NULL ) printf(
"Allocation failed!\n");
102 if (plan->
x == NULL ) printf(
"Allocation failed!\n");
107 if (plan->
f == NULL ) printf(
"Allocation failed!\n");
117 plan->
nthreads = Y(get_num_threads)();
123 static void c2e(
nfsoft_plan *my_plan,
int even, C* wig_coeffs,
int k,
int m)
131 C cheby[2*my_plan->
N_total + 2];
132 cheby[my_plan->
N_total+1] = wig_coeffs[0];
135 for (j=1;j<my_plan->
N_total+1;j++)
137 cheby[my_plan->
N_total+1+j]=0.5* wig_coeffs[j];
138 cheby[my_plan->
N_total+1-j]=0.5* wig_coeffs[j];
151 cheby[0]=(C) (-1.)/(2.*_Complex_I) * aux[1];
154 cheby[j]=(1./(2.*_Complex_I)*(aux[j+1]-aux[j-1]));
159 for (
int i = 1; i <= 2* my_plan ->
N_total + 2; i++)
168 static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads)
171 int N, t, k_start, k, m;
181 t = (int) log2(
X(next_power_of_2)(N));
190 N =
X(next_power_of_2)(l);
196 unsigned int fptflags = 0U
216 set[0] =
fpt_init((2* N + 1) * (2* N + 1), t, fptflags);
217 for (
int i=1; i<nthreads; i++)
219 set[i] =
fpt_init((2* N + 1) * (2* N + 1), t, fptflags | FPT_NO_INIT_FPT_DATA);
220 set[i]->dpt =
set[0]->dpt;
225 for (k = -N; k <= N; k++)
226 for (m = -N; m <= N; m++)
229 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
231 fpt_precompute_1(
set[0], (k+N)*(2*N+1) + m+N,k_start);
233 #pragma omp parallel for default(shared) private(k,m,k_start) schedule(dynamic) num_threads(nthreads) 235 for (k = -N; k <= N; k++)
236 for (m = -N; m <= N; m++)
239 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
248 fpt_precompute_2(
set[omp_get_thread_num()], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
250 fpt_precompute(
set[0], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
257 static void SO3_fpt(C *coeffs,
fpt_set set,
int l,
int k,
int m,
unsigned int flags)
261 int k_start, k_end, j;
262 int function_values = 0;
276 N =
X(next_power_of_2)(l);
280 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
282 trafo_nr = (N + k) * (2* N + 1) + (m + N);
287 for (j = 0; j <= k_end; j++)
291 for (j = 0; j <= l - k_start; j++)
293 x[j + k_start] = coeffs[j];
295 for (j = l - k_start + 1; j <= k_end - k_start; j++)
297 x[j + k_start] = K(0.0);
303 if (flags & NFSOFT_USE_DPT)
305 fpt_trafo_direct(
set, trafo_nr, &x[k_start], y, k_end, 0U
310 fpt_trafo(
set, trafo_nr, &x[k_start], y, k_end, 0U
315 for (j = 0; j <= l; j++)
322 static void SO3_fpt_transposed(C *coeffs,
fpt_set set,
int l,
int k,
int m,
325 int N, k_start, k_end, j;
327 int function_values = 0;
342 N =
X(next_power_of_2)(l);
346 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
348 trafo_nr = (N + k) * (2* N + 1) + (m + N);
355 for (j = 0; j <= l; j++)
359 for (j = l + 1; j <= k_end; j++)
364 if (flags & NFSOFT_USE_DPT)
366 fpt_transposed_direct(
set, trafo_nr, &x[k_start], y, k_end, 0U
375 for (j = 0; j <= l; j++)
391 for (j = 0; j < M; j++)
393 plan3D->
p_nfft.
x[3* j ] = plan3D->
x[3* j + 2];
394 plan3D->
p_nfft.
x[3* j + 1] = plan3D->
x[3* j ];
395 plan3D->
p_nfft.
x[3* j + 2] = plan3D->
x[3* j + 1];
426 for (
int j = 0; j < M; j++)
427 plan3D->
f[j] = plan3D->
f_hat[0];
435 #pragma omp parallel
for default(shared) num_threads(plan3D->
nthreads)
437 for (
int k = -N; k <= N; k++)
439 C wig_coeffs[(
X(next_power_of_2)(N)+1)];
441 int threadid = omp_get_thread_num();
446 for (
int m = -N; m <= N; m++)
449 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
451 int glo0 = NFSOFT_INDEX_TWO(k,m,max,N);
453 for (
int j = 0; j <= N - max; j++)
455 wig_coeffs[j] = plan3D->
f_hat[glo0 + j];
459 wig_coeffs[j] = wig_coeffs[j] * (1. / (2. * KPI))
460 * SQRT(0.5 * (2. * (max + j) + 1.));
465 if ((k < 0) && (k % 2))
467 wig_coeffs[j] = wig_coeffs[j] * (-1);
469 if ((m < 0) && (m % 2))
470 wig_coeffs[j] = wig_coeffs[j] * (-1);
473 wig_coeffs[j] = wig_coeffs[j] * (-1);
480 for (
int j = N - max + 1; j <
X(next_power_of_2)(N) + 1; j++)
485 c2e(plan3D, ABS((k + m) % 2), wig_coeffs, k, m);
492 nfft_trafo_direct(&(plan3D->
p_nfft));
500 for (
int j = 0; j < plan3D->
M_total; j++)
505 static void e2c(
nfsoft_plan *my_plan,
int even, C* wig_coeffs, C* cheby)
519 aux[0]= 1/(2*_Complex_I)*cheby[1];
523 aux[j]=1/(2*_Complex_I)*(cheby[j+1]-cheby[j-1]);
525 aux[N-1]=1/(2*_Complex_I)*(-cheby[j-1]);
534 wig_coeffs[0]=cheby[my_plan->
N_total+1];
536 for(j=1;j<=my_plan->
N_total;j++)
538 wig_coeffs[j]=0.5*(cheby[my_plan->
N_total+j+1]+cheby[my_plan->
N_total+1-j]);
556 for (
int j = 0; j < M; j++)
557 plan3D->
f_hat[0] += plan3D->
f[j];
562 for (
int j = 0; j < M; j++)
569 nfft_adjoint_direct(&(plan3D->
p_nfft));
579 #pragma omp parallel for default(shared) num_threads(plan3D->nthreads) 581 for (
int k = -N; k <= N; k++)
584 int threadid = omp_get_thread_num();
588 for (
int m = -N; m <= N; m++)
590 C wig_coeffs[(
X(next_power_of_2)(N)+1)];
591 C cheby[2*plan3D->
N_total + 2];
593 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
595 for (
int i = 1; i < 2* plan3D ->
N_total + 3; i++)
604 e2c(plan3D, ABS((k + m) % 2), wig_coeffs, cheby);
612 int glo0 = NFSOFT_INDEX_TWO(k,m,0,N);
614 for (
int j = max; j <= N; j++)
618 if ((k < 0) && (k % 2))
620 wig_coeffs[j] = -wig_coeffs[j];
622 if ((m < 0) && (m % 2))
623 wig_coeffs[j] = -wig_coeffs[j];
626 wig_coeffs[j] = wig_coeffs[j] * (-1);
630 plan3D->
f_hat[glo0+j] = wig_coeffs[j];
634 plan3D->
f_hat[glo0+j] = plan3D->
f_hat[glo0+j] * (1 / (2. * KPI)) * SQRT(
635 0.5 * (2. * (j) + 1.));
650 for (
int i=0; i<plan->
nthreads; i++)
676 static int posN(
int n,
int m,
int B)
681 pos = posN(n - 1, m, B) + B + 1 - MAX(ABS(m), ABS(n - 1));
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
void nfsoft_init_advanced(nfsoft_plan *plan, int N, int M, unsigned int nfsoft_flags)
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
fftw_complex * f_hat
Fourier coefficients.
void nfsoft_init_guru_advanced(nfsoft_plan *plan, int B, int M, unsigned int nfsoft_flags, unsigned int nfft_flags, int nfft_cutoff, int fpt_kappa, int nn_oversampled)
#define NFSOFT_NO_STABILIZATION
fftw_complex * aux
deprecated variable
fftw_complex * f_hat
Fourier coefficients.
void nfsoft_init(nfsoft_plan *plan, int N, int M)
#define NFSOFT_NORMALIZED
void SO3_gamma_row(double *gamma, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
int nthreads
the number of threads
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
void nfft_precompute_lin_psi(nfft_plan *ths)
#define NFSOFT_INDEX(m, n, l, B)
fftw_complex * wig_coeffs
deprecated variable
void SO3_beta_row(double *beta, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
#define NFSOFT_MALLOC_F_HAT
NFFT_INT M_total
Total number of samples.
#define FPT_NO_STABILIZATION
If set, no stabilization will be used.
void nfft_adjoint(nfft_plan *ths)
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
#define FPT_FUNCTION_VALUES
If set, the output are function values at Chebyshev nodes rather than Chebyshev coefficients.
Holds data for a set of cascade summations.
void nfsoft_precompute(nfsoft_plan *plan3D)
void(* mv_adjoint)(void *)
Adjoint transform.
void SO3_alpha_row(double *alpha, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
NFFT_INT N_total
Total number of Fourier coefficients.
fftw_complex * cheby
deprecated variable
NFFT_INT N_total
Total number of Fourier coefficients.
#define FPT_NO_FAST_ALGORITHM
If set, TODO complete comment.
NFFT_INT M_total
Total number of samples.
#define X(name)
Include header for C99 complex datatype.
void nfft_trafo(nfft_plan *ths)
void nfsoft_finalize(nfsoft_plan *plan)
void * nfft_malloc(size_t n)
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void nfft_finalize(nfft_plan *ths)
void nfsoft_trafo(nfsoft_plan *plan3D)
void nfft_precompute_one_psi(nfft_plan *ths)
void nfsoft_init_guru(nfsoft_plan *plan, int B, int M, unsigned int nfsoft_flags, unsigned int nfft_flags, int nfft_cutoff, int fpt_kappa)
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
void nfsoft_adjoint(nfsoft_plan *plan3D)
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
Header file for functions related to Wigner-d/D functions.
void(* mv_trafo)(void *)
Transform.
Header file for the nfft3 library.
fpt_set * internal_fpt_set
the internal FPT plan
double * x
Nodes in time/spatial domain, size is doubles.
#define FPT_NO_DIRECT_ALGORITHM
If set, TODO complete comment.
nfft_plan p_nfft
the internal NFFT plan
unsigned int flags
the planner flags