TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
Redistribute_Field.cpp
1/****************************************************************************
2* Copyright (c) 2025, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <Redistribute_Field.h>
17#include <Linear_algebra_tools.h>
18#include <Perf_counters.h>
19
21 const Domaine_IJK& output,
23{
24 // map vide => ok pour des maillages de tailles identiques
25 VECT(IntTab) map;
26 initialize(input, output, loc, map);
27}
28
30 const Domaine_IJK& output,
32 const VECT(IntTab) & redistribute_maps)
33{
34 IntTab map;
35 // Calcul des tableaux send_blocs
36 for (int dir = 0; dir < 3; dir++)
37 {
38 if (redistribute_maps.size() == 0)
39 {
40 map.resize(1,3);
41 map(0,0) = 0;
42 map(0,1) = 0;
43 map(0,2) = input.get_nb_items_global(loc, dir);
44 if (output.get_nb_items_global(loc, dir) != map(0,2))
45 {
46 Cerr << "Error in Redistribute_Field::initialize: you must provide a redistribute map" << finl;
48 }
49 }
50 else
51 {
52 map = redistribute_maps[dir];
53 }
54 compute_send_blocs(input, output, loc, dir, map, send_blocs_[dir]);
55 // inverse la 'map' pour calculer les recv_blocs
56 for (int i = 0; i < map.dimension(0); i++)
57 {
58 int x = map(i,0);
59 map(i,0) = map(i,1);
60 map(i,1) = x;
61 }
62 compute_send_blocs(output, input, loc, dir, map, recv_blocs_[dir]);
63 }
64
65 // Calcul du schema de communication (liste des processeurs voisins et tailles des donnees
66 const FixedVector<IntTab, 3>& send_blocs = send_blocs_;
67 const FixedVector<IntTab, 3>& recv_blocs = recv_blocs_;
69 schema.begin_init();
71 int receive_size_buffer_for_me = 0;
72
73 // Compute send blocs sizes and destination processors
74 Int3 ibloc;
75 for (ibloc[2] = 0; ibloc[2] < send_blocs[2].dimension(0); ibloc[2] ++)
76 for (ibloc[1] = 0; ibloc[1] < send_blocs[1].dimension(0); ibloc[1] ++)
77 for (ibloc[0] = 0; ibloc[0] < send_blocs[0].dimension(0); ibloc[0] ++)
78 {
79 const int dest_slice_i = send_blocs[0](ibloc[0], 1);
80 const int ni = send_blocs[0](ibloc[0], 2);
81 const int dest_slice_j = send_blocs[1](ibloc[1], 1);
82 const int nj = send_blocs[1](ibloc[1], 2);
83 const int dest_slice_k = send_blocs[2](ibloc[2], 1);
84 const int nk = send_blocs[2](ibloc[2], 2);
85
86 const int dest_pe = output.get_processor_by_ijk(dest_slice_i, dest_slice_j, dest_slice_k);
87 if (dest_pe != Process::me())
88 schema.add_send_area_template<double>(dest_pe, ni*nj*nk);
89 else
90 size_buffer_for_me_ += ni*nj*nk;
91 }
92
93 // Compute recv bloc sizes and source processors
94 for (ibloc[2] = 0; ibloc[2] < recv_blocs[2].dimension(0); ibloc[2] ++)
95 for (ibloc[1] = 0; ibloc[1] < recv_blocs[1].dimension(0); ibloc[1] ++)
96 for (ibloc[0] = 0; ibloc[0] < recv_blocs[0].dimension(0); ibloc[0] ++)
97 {
98 const int src_slice_i = recv_blocs[0](ibloc[0], 1);
99 const int ni = recv_blocs[0](ibloc[0], 2);
100 const int src_slice_j = recv_blocs[1](ibloc[1], 1);
101 const int nj = recv_blocs[1](ibloc[1], 2);
102 const int src_slice_k = recv_blocs[2](ibloc[2], 1);
103 const int nk = recv_blocs[2](ibloc[2], 2);
104
105 const int src_pe = input.get_processor_by_ijk(src_slice_i, src_slice_j, src_slice_k);
106 if (src_pe != Process::me())
107 schema.add_recv_area_template<double>(src_pe, ni*nj*nk);
108 else
109 receive_size_buffer_for_me += ni*nj*nk;
110 }
111
112 if (size_buffer_for_me_ != receive_size_buffer_for_me)
113 {
114 Cerr << "Internal error in Redistribute_Field::initialize (size_buffer_for_me)" << finl;
116 }
117 schema.end_init();
118}
119
120// Calcule l'intersection des segments [s1, s1 + n1 - 1] et [s2, s2 + n2 - 1],
121// stocke le resultat dans s2 et n2,
122// si l'origine s2 est modifiee, met a jour s3 comme
123// nouveau s3 = ancien s3 + nouveau s2 - ancien s2
124void Redistribute_Field::intersect(const int s1, const int n1, int& s2, int& n2, int& s3)
125{
126 // end of segment (plus one)
127 int e1 = s1 + n1;
128 int e2 = s2 + n2;
129 int old_s2 = s2;
130 int zero = 0;
131 s2 = std::max(s1, s2);
132 e2 = std::min(e1, e2);
133 n2 = std::max(e2 - s2, zero);
134 s3 += s2 - old_s2;
135}
136
137// Calcule comment redistribuer les donnees locales de mon processeur d'un champ input
138// dans un champ output (calcul des donnees a envoyer aux autres processeurs)
139// pour la direction "dir".
140// global_index_mapping: tableau a trois colonnes.
141// colonne 1: index global du premier indice de segment a copier dans input
142// colonne 2: index global du debut du segment destination (dans output)
143// colonne 3: nombre d'elements du segment a copier
144// Si on veut redistribuer un champ simple, il suffit de passer un tableau global_index_mapping
145// avec une ligne et trois colonnes: { 0, 0, n } ou n est le nombre d'elements global du champ
146// Si on veut redistribuer en periodisant (construction du champ etendu pour le front-tracking)
147// on passe un tableau a trois lignes:
148// ligne 0, la zone centrale: { 0, extend_size, ni } (ni est la taille du champ non etendu)
149// ligne 1, la zone de gauche: { ni-extend_size, 0, extend_size }
150// ligne 2, la zone de droite: { 0, ni + extend_size, extend_size }
151//
152// On remplit le tableau send_blocs: liste de segments de donnees a envoyer aux autres processeurs
153// (colonne 0: indice local du premier element a envoyer,
154// colonne 1: numero de la tranche destination,
155// colonne 2: nombre d'elements consecutifs a envoyer)
157 const Domaine_IJK& output,
158 const Domaine_IJK::Localisation localisation,
159 const int dir,
160 const IntTab& global_index_mapping,
161 IntTab& send_blocs)
162{
163 // Distribution du champ output sur les differentes tranches de processeurs:
164 ArrOfInt output_slice_offsets;
165 ArrOfInt output_slice_size;
166 output.get_slice_offsets(dir, output_slice_offsets);
167 output.get_slice_size(dir, localisation, output_slice_size);
168
169 // Quelle partie du champ input est-ce que j'ai sur mon processeur:
170 const int input_slice_start = input.get_offset_local(dir);
171 const int input_slice_size = input.get_nb_items_local(localisation, dir);
172 send_blocs.resize(0,3);
173 // Boucle sur les segments a copier (un par ligne du global_index_mapping)
174 const int n_segments = global_index_mapping.dimension(0);
175 for (int i_segment = 0; i_segment < n_segments; i_segment++)
176 {
177 int input_seg_start = global_index_mapping(i_segment, 0);
178 int output_seg_start = global_index_mapping(i_segment, 1);
179 int seg_length = global_index_mapping(i_segment, 2);
180
181 // Intersection du segment a copier avec les donnees locales du processeur:
182 intersect(input_slice_start, input_slice_size, input_seg_start, seg_length, output_seg_start);
183
184 // Boucle sur les tranches du champ output:
185 const int nb_output_slices = output_slice_offsets.size_array();
186 for (int oslice = 0; oslice < nb_output_slices; oslice++)
187 {
188 int input_start = input_seg_start;
189 int output_start = output_seg_start;
190 int n = seg_length;
191 // Cette tranche oslice a-t-elle une intersection avec le output_segment courant ?
192 intersect(output_slice_offsets[oslice], output_slice_size[oslice], output_start, n, input_start);
193 if (n > 0)
194 {
195 const int input_start_local = input_start - input_slice_start;
196 send_blocs.append_line(input_start_local, oslice, n);
197 }
198 }
199 }
200}
201
202void Redistribute_Field::redistribute_(const IJK_Field_double& input_field,
203 IJK_Field_double& output_field,
204 bool add)
205{
206 statistics().create_custom_counter("Redistribute",2,"IJK");
207 statistics().begin_count("Redistribute",statistics().get_last_opened_counter_level()+1);
208
209 // For each direction, intersections between the input and output field:
210 // Each line contain a bloc to copy from-to another processor
211 // Column 0: local index in first field of the first element of the bloc
212 // Column 1: slice number in the other field
213 // Column 2: number of elements to copy
214 const FixedVector<IntTab, 3>& send_blocs = send_blocs_;
215 const FixedVector<IntTab, 3>& recv_blocs = recv_blocs_;
217 schema.begin_comm();
218
219 ArrOfDouble buffer_for_me;
220 ArrOfDouble tmp;
221 buffer_for_me.resize_array(size_buffer_for_me_, RESIZE_OPTIONS::NOCOPY_NOINIT);
222 int index_buffer_for_me = 0;
223 // Prepare data to send:
224 Int3 ibloc;
225 for (ibloc[2] = 0; ibloc[2] < send_blocs[2].dimension(0); ibloc[2] ++)
226 for (ibloc[1] = 0; ibloc[1] < send_blocs[1].dimension(0); ibloc[1] ++)
227 for (ibloc[0] = 0; ibloc[0] < send_blocs[0].dimension(0); ibloc[0] ++)
228 {
229 const int input_i_start = send_blocs[0](ibloc[0], 0);
230 const int dest_slice_i = send_blocs[0](ibloc[0], 1);
231 const int ni = send_blocs[0](ibloc[0], 2);
232 const int input_j_start = send_blocs[1](ibloc[1], 0);
233 const int dest_slice_j = send_blocs[1](ibloc[1], 1);
234 const int nj = send_blocs[1](ibloc[1], 2);
235 const int input_k_start = send_blocs[2](ibloc[2], 0);
236 const int dest_slice_k = send_blocs[2](ibloc[2], 1);
237 const int nk = send_blocs[2](ibloc[2], 2);
238
239 const int dest_pe = output_field.get_domaine().get_processor_by_ijk(dest_slice_i, dest_slice_j, dest_slice_k);
240 // Si le processeur destination est moi meme, on prend un tableau local sans passer par MPI:
241 ArrOfDouble *buf_ptr;
242 if (dest_pe == Process::me())
243 {
244 // On fait pointer tmp sur une zone du buffer local de la bonne taille:
245 tmp.ref_array(buffer_for_me, index_buffer_for_me /* start index */, ni*nj*nk /* size */);
246 index_buffer_for_me += ni*nj*nk; // avance de la taille du bloc dans le buffer
247 buf_ptr = &tmp;
248 }
249 else
250 {
251 // Le buffer est dans le schema de communication:
252 buf_ptr = &schema.get_next_area_template<double>(dest_pe, ni*nj*nk);
253 }
254 ArrOfDouble& buf = *buf_ptr;
255
256 for (int k = 0; k < nk; k++)
257 {
258 for (int j = 0; j < nj; j++)
259 {
260 for (int i = 0; i < ni; i++)
261 {
262 buf[k*nj*ni + j*ni + i] = input_field(i + input_i_start, j + input_j_start, k + input_k_start);
263 }
264 }
265 }
266 }
267
268 // Exchange data
269 schema.exchange();
270 index_buffer_for_me = 0;
271 // Get data to receive:
272 for (ibloc[2] = 0; ibloc[2] < recv_blocs[2].dimension(0); ibloc[2] ++)
273 for (ibloc[1] = 0; ibloc[1] < recv_blocs[1].dimension(0); ibloc[1] ++)
274 for (ibloc[0] = 0; ibloc[0] < recv_blocs[0].dimension(0); ibloc[0] ++)
275 {
276 const int output_i_start= recv_blocs[0](ibloc[0], 0);
277 const int src_slice_i = recv_blocs[0](ibloc[0], 1);
278 const int ni = recv_blocs[0](ibloc[0], 2);
279 const int output_j_start= recv_blocs[1](ibloc[1], 0);
280 const int src_slice_j = recv_blocs[1](ibloc[1], 1);
281 const int nj = recv_blocs[1](ibloc[1], 2);
282 const int output_k_start= recv_blocs[2](ibloc[2], 0);
283 const int src_slice_k = recv_blocs[2](ibloc[2], 1);
284 const int nk = recv_blocs[2](ibloc[2], 2);
285
286 const int src_pe = input_field.get_domaine().get_processor_by_ijk(src_slice_i, src_slice_j, src_slice_k);
287 ArrOfDouble *buf_ptr;
288 if (src_pe == Process::me())
289 {
290 // On fait pointer tmp sur une zone du buffer local de la bonne taille:
291 tmp.ref_array(buffer_for_me, index_buffer_for_me /* start index */, ni*nj*nk /* size */);
292 index_buffer_for_me += ni*nj*nk; // avance de la taille du bloc dans le buffer
293 buf_ptr = &tmp;
294 }
295 else
296 {
297 // Le buffer est dans le schema de communication:
298 buf_ptr = &schema.get_next_area_template<double>(src_pe, ni*nj*nk);
299 }
300 ArrOfDouble& buf = *buf_ptr;
301 if (!add)
302 for (int k = 0; k < nk; k++)
303 {
304 for (int j = 0; j < nj; j++)
305 {
306 for (int i = 0; i < ni; i++)
307 {
308 output_field(i + output_i_start, j + output_j_start, k + output_k_start) = buf[k*nj*ni + j*ni + i];
309 }
310 }
311 }
312 else
313 for (int k = 0; k < nk; k++)
314 {
315 for (int j = 0; j < nj; j++)
316 {
317 for (int i = 0; i < ni; i++)
318 {
319 output_field(i + output_i_start, j + output_j_start, k + output_k_start) += buf[k*nj*ni + j*ni + i];
320 }
321 }
322 }
323 }
324 schema.end_comm();
325 statistics().end_count("Redistribute");
326
327}
328
329void Redistribute_Field::redistribute_(const IJK_Field_float& input_field,
330 IJK_Field_float& output_field,
331 bool add)
332{
333 Cerr << "Redistribute_Field::redistribute_(const IJK_Field_float & input_field,...) not implemented" << finl;
335}
This class encapsulates all the information related to the eulerian mesh for TrioIJK.
Definition Domaine_IJK.h:47
int get_offset_local(int direction) const
Returns the local offset in requested direction.
int get_processor_by_ijk(const FixedVector< int, 3 > &slice) const
Return the global index of the processor according to its position.
int get_nb_items_local(Localisation loc, int direction) const
Returns the number of local items (on this processor) for the given localisation in the requested dir...
void get_slice_offsets(int direction, ArrOfInt &tab) const
Returns the indices of the first cell in requested direction of every slices in this direction.
int get_nb_items_global(Localisation loc, int direction) const
Returns the number of local items (on this processor) for the given localisation in the requested dir...
void get_slice_size(int direction, Localisation loc, ArrOfInt &tab) const
Returns the number of items of given location (elements, nodes, faces...) for all slices in the reque...
Localisation
Localisation sub class.
Definition Domaine_IJK.h:53
const Domaine_IJK & get_domaine() const
static int me()
renvoie mon rang dans le groupe de communication courant.
Definition Process.cpp:125
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
Schema_Comm_Vecteurs schema_comm_
static void intersect(const int s1, const int n1, int &s2, int &n2, int &s3)
void redistribute_(const IJK_Field_double &input_field, IJK_Field_double &output_field, bool add)
void compute_send_blocs(const Domaine_IJK &input, const Domaine_IJK &output, const Domaine_IJK::Localisation localisation, const int dir, const IntTab &global_index_mapping, IntTab &send_blocs)
FixedVector< IntTab, 3 > recv_blocs_
FixedVector< IntTab, 3 > send_blocs_
void initialize(const Domaine_IJK &input, const Domaine_IJK &output, const Domaine_IJK::Localisation loc)
void add_send_area_template(int pe, int size)
TRUSTArray< _TYPE_ > & get_next_area_template(int pe, int array_size)
void add_recv_area_template(int pe, int size)
void end_init()
Une fois les donnees a echanger declarees avec add_send/recv_area_.
void begin_init()
Reinitialise les tailles de buffers.
void exchange(IsExchangeBlocking exchange_type=IsExchangeBlocking::DefaultBlocking, const std::string kernel_name="noname")
void begin_comm(bool bufferOnDevice=false)
Commence un nouvel echange de donnees (les tailles de buffers doivent avoir ete initialisees avec beg...
_SIZE_ size_array() const
virtual void ref_array(TRUSTArray &, _SIZE_ start=0, _SIZE_ sz=-1)
void resize_array(_SIZE_ new_size, RESIZE_OPTIONS opt=RESIZE_OPTIONS::COPY_INIT)
void resize(_SIZE_ n, RESIZE_OPTIONS opt=RESIZE_OPTIONS::COPY_INIT)
Definition TRUSTTab.tpp:469
void append_line(_TYPE_)
Definition TRUSTTab.tpp:213
_SIZE_ dimension(int d) const
Definition TRUSTTab.tpp:133
_SIZE_ size() const
Definition TRUSTVect.tpp:45