-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpectralModifier.impl.hpp
207 lines (165 loc) · 8.77 KB
/
SpectralModifier.impl.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#if !defined(SPECTRALMODIFIER_HPP)
#error "This file should only be included through SpectralModifier.hpp"
#endif
#ifndef SPECTRALMODIFIER_IMPL_HPP
#define SPECTRALMODIFIER_IMPL_HPP
#define PI (3.14159265358979323846264338327950288E0)
#define ALIGN 16 // no clue what this thing is
#include "SpectralModifier.hpp"
// Default Constructor
inline SpectralModifier::SpectralModifier ():
input(nullptr), geom(), verbose(), ba(nullptr), dmap(),
n_box_dim(), domain(), numb_boxes_dim(),
numb_boxes(), rank_mapping(), h()
{
}
// Constructor
inline SpectralModifier::SpectralModifier (MultiFab& input, Geometry& geom, int verbose):
input(&input), geom(geom), verbose(verbose), ba(&(input.boxArray())), dmap(input.DistributionMap()),
n_box_dim((*ba)[0].size()), domain(geom.Domain()), numb_boxes_dim(domain.length() / n_box_dim),
numb_boxes(numb_boxes_dim[0]*numb_boxes_dim[1]*numb_boxes_dim[2]), rank_mapping(), h(geom.CellSize(0))
{
if (input.nGrow() != 0)
amrex::Error("Current implementation requires that both input and output have no ghost cells");
// Ericka said fillGhosts could be useful to use to fill periodic Ghost cells once I have filled in with my noise.
if (numb_boxes != (*ba).size()) { // size of Box object = nx*ny*nz
amrex::Print() << ba <<std::endl;
amrex::Print() << numb_boxes << std::endl;
amrex::Error("numb_boxes not right");}
rank_mapping.resize(numb_boxes);
remap();
// amrex::Print() << "dx?"<< h << std::endl;
}
inline void SpectralModifier::remap()
{
//Building the FFT's new C-ordered rank mapping.
// AMREX has Fortran/ column-major ordering (first index, ie x, changing fastest in memory: arr[i_x][i_y][i_z] --> arr[i_x+ n_x*i_y+n_x*n_y*i_z])
// while dfft has C/row-major ordering (last index, ie x, changing fastest in memory: arr[i_x][i_y][i_z] --> arr[i_z+ n_z*i_y+n_z*n_y*i_x])
for (int ib = 0; ib < numb_boxes; ++ib) // For each subbox
{
// The smallEnd method provides the indices of the lower corner of the Box in each dimension.
int i = (*ba)[ib].smallEnd(0) / n_box_dim[0];
int j = (*ba)[ib].smallEnd(1) / n_box_dim[1];
int k = (*ba)[ib].smallEnd(2) / n_box_dim[2];
// This would be the "correct" local index if the data wasn't being transformed
// int local_index = k*nbx*nby + j*nbx + i;
int local_index = k*numb_boxes_dim[0]*numb_boxes_dim[1] + j*numb_boxes_dim[1] + i;
// This is what we pass to dfft to compensate for the Fortran ordering
// of amrex data in MultiFabs.
// int local_index = i*numb_boxes_dim[1]*numb_boxes_dim[2] + j*numb_boxes_dim[2] + k;
rank_mapping[local_index] = dmap[ib]; // copy each rank in the F-ordered Distribution Mapping to its C-order version.
if (verbose)
amrex::Print() << "LOADING RANK NUMBER " << dmap[ib] << " FOR GRID NUMBER " << ib
<< " WHICH IS LOCAL NUMBER " << local_index << std::endl;
}
}
inline MultiFab SpectralModifier::apply_func(double (*amp_func)(double))
{
MultiFab output((*input).boxArray(), (*input).DistributionMap(), (*input).nComp(), (*input).nGrow());
output.ParallelCopy((*input));
// Assume for now that nx = ny = nz
int Ndims[3] = {numb_boxes_dim[2], numb_boxes_dim[1], numb_boxes_dim[0]};
int n[3] = {domain.length(2), domain.length(1), domain.length(0)}; // C-order
// Hardware/Hybrid Cosmology Code (HACC) provides a functionality for performing Discrete Fast Fourier Transforms in AMReX (tailored for cosmological simulations.)
hacc::Distribution d(MPI_COMM_WORLD,n,Ndims,&rank_mapping[0]);
hacc::Dfft dfft(d); // creates the dfftiser
for (MFIter mfi((*input),false); mfi.isValid(); ++mfi) // iterating over the FArrayBox objects (subboxes = Box) that are stored locally on the current MPI rank.
{
int gid = mfi.index();
size_t local_size = dfft.local_size();
//
std::vector<complex_t, hacc::AlignedAllocator<complex_t, ALIGN> > a; //memory is allocated with a specific alignment
std::vector<complex_t, hacc::AlignedAllocator<complex_t, ALIGN> > b; // b is basically a buffer storage (ie intermediary things used in calculations)
a.resize(n_box_dim[0]*n_box_dim[1]*n_box_dim[2]);
b.resize(n_box_dim[0]*n_box_dim[1]*n_box_dim[2]);
dfft.makePlans(&a[0],&b[0],&a[0],&b[0]); // designs the fft to be optimal given inputs and outputs dims
// *******************************************
// Copy real data from input into real part of a -- no ghost cells and
// put into C++ ordering (not Fortran)
// *******************************************
complex_t zero(0.0, 0.0);
size_t local_indx = 0;
for(size_t k=0; k<(size_t)n_box_dim[2]; k++) {
for(size_t j=0; j<(size_t)n_box_dim[1]; j++) {
for(size_t i=0; i<(size_t)n_box_dim[0]; i++) {
complex_t temp((*input)[mfi].dataPtr()[local_indx],0.);
a[local_indx] = temp;
local_indx++;
}
}
}
dfft.forward(&a[0]); // surely this is done in parallel
// *******************************************
// Now apply amp_func to the coefficients of the transform
// *******************************************
// Warning: DFT such that kx,ky,kz range from 0 to N-1
local_indx = 0;
const int *self = dfft.self_kspace(); // rank location in k-space?
const int *local_ng = dfft.local_ng_kspace(); // local grid dimensions in k-space?
const int *global_ng = dfft.global_ng();
// global_i/j/k are the global indices for the k modes.
for(size_t i=0; i<(size_t)local_ng[0]; i++) {
size_t global_i = local_ng[0]*self[0] + i;
for(size_t j=0; j<(size_t)local_ng[1]; j++) {
size_t global_j = local_ng[1]*self[1] + j;
for(size_t k=0; k<(size_t)local_ng[2]; k++) {
size_t global_k = local_ng[2]*self[2] + k;
if (global_i == 0 && global_j == 0 & global_k == 0) {
a[local_indx] = 0;
} else {
// This is k but remember that gradients in discrete space are not that easy. It's the effective k (see Caravano21), not a problem if there is a cutoff to keep global_i,j,k<<global_ng
double fac = sqrt(-2. * (
(cos(2*PI*double(global_i)/double(global_ng[0])) - 1.) +
(cos(2*PI*double(global_j)/double(global_ng[1])) - 1.) +
(cos(2*PI*double(global_k)/double(global_ng[2])) - 1.) ));
// dimensionful effective k is fac/dx
a[local_indx] = a[local_indx] * std::abs(amp_func(fac/h)); // only keep the square root of the power spectrum.
}
local_indx++;
}
}
}
dfft.backward(&a[0]);
size_t global_size = dfft.global_size(); // is this Nx*Ny*Nz? yes
// double fac = pow(h,2) / global_size; // why is that how it was orignally normalised? because laplacian?
double fac = pow(sqrt(2*PI)/h,1.5)/sqrt(global_size);
local_indx = 0;
for(size_t k=0; k<(size_t)n_box_dim[2]; k++) {
for(size_t j=0; j<(size_t)n_box_dim[1]; j++) {
for(size_t i=0; i<(size_t)n_box_dim[0]; i++) {
output[mfi].dataPtr()[local_indx] = fac * std::real(a[local_indx]);
local_indx++;
}
}
}
// END of parallel
}
return output;
}
// Function to fill MultiFab with random noise
inline void SpectralModifier::FillInputWithRandomNoise(std::mt19937 gen) // could use a parallel for there no? no
{
double stddev = pow(double(n_box_dim[0]*n_box_dim[1]*n_box_dim[2]),-0.5); // Normalisation to get a rayleigh draw once in Fourier space.
std::normal_distribution<Real> dis(0.0, stddev);
for (MFIter mfi((*input)); mfi.isValid(); ++mfi)
{
const Box& box = mfi.validbox();
auto arr = ((*input)[mfi]).array();
for (int k = box.smallEnd(2); k <= box.bigEnd(2); ++k) {
for (int j = box.smallEnd(1); j <= box.bigEnd(1); ++j) {
for (int i = box.smallEnd(0); i <= box.bigEnd(0); ++i) {
arr(i,j,k) = dis(gen);
}
}
}
}
}
#endif /* SPECTRALMODIFIER_IMPL_HPP_ */
///////////////////////////////
// How do you do fft on small domains and then gather for bigger wavelengths??
// Well The F to C ordering conversion was not the end!!
// "SWFFT takes three-dimensional arrays of data distributed across block-structured grids,
// and redistributes the data into “pencil” grids in and then, belonging to different MPI processes.
// After each pencil conversion, a 1D FFT is performed on the data along the pencil direction using calls to the FFTW 3 library."
// See https://amrex-codes.github.io/amrex/docs_html/SWFFT.html
//////////////////////////////