TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
TRUSTArray.cpp
1/****************************************************************************
2* Copyright (c) 2025, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <arch.h>
17#include <TRUSTArray.h>
18#include <string.h>
19#ifdef TRUST_USE_GPU
20#include <DeviceMemory.h>
21#endif
22
23#ifndef LATATOOLS
24#include <Perf_counters.h>
25#endif
26
27// TRUSTArray kernels for device moved in .cpp file to avoid multiple definition during link
28template <typename _TYPE_, typename _SIZE_>
30{
31#ifndef LATATOOLS
32 this->ensureDataOnHost();
33 _SIZE_ sz = size_array();
34 os << sz << finl;
35 if (sz > 0)
36 {
37 const _TYPE_* v = span_.data();
38 os.put(v,sz,sz);
39 }
40#endif
41 return os;
42}
43
44template <typename _TYPE_, typename _SIZE_>
46{
47#ifndef LATATOOLS
48 _SIZE_ sz;
49 is >> sz;
50 if (sz >= 0)
51 {
52 // Appel a la methode sans precondition sur le type derive (car readOn est virtuelle, les autres proprietes seront initialisees correctement)
53 resize_array_(sz);
54 if (sz > 0)
55 {
56 _TYPE_* v = span_.data();
57 is.get(v,sz);
58 }
59 }
60 else
61 {
62 Cerr << "Error in TRUSTArray:readOn : size = " << sz << finl;
64 }
65#endif
66 return is;
67}
68
69
70/** Protected method for resize. Used by derived classes.
71 * Same as resize_array() with less checks.
72 *
73 * This is also where we deal with the STORAGE::TEMP_STORAGE capability, i.e. the Trav arrays.
74 * There memory is taken from a shared pool (TRUSTTravPool). This kind of array should never be
75 * used in 64bits, since Trav are meaningful when inside the timestepping (so the 32bit world after the
76 * Scatter isntruction).
77 */
78template <typename _TYPE_, typename _SIZE_>
79void TRUSTArray<_TYPE_, _SIZE_>::resize_array_(_SIZE_ new_size, RESIZE_OPTIONS opt)
80{
81 assert(new_size >= 0);
82
83 if (mem_ == nullptr)
84 {
85 if (!span_.empty()) // ref_data! We may pass here if just changing the shape of a tab
86 {
87 assert(size_array() == new_size);
88 return; // Nothing to do ...
89 }
90 // We avoid allocating for empty arrays ... those are typically situations where we will resize (with a non
91 // null size) just after, so the real allocation will be made at that point.
92 if(new_size == 0) return;
93
94 // First allocation - memory space should really be malloc'd:
95 if(storage_type_ == STORAGE::TEMP_STORAGE)
96 mem_ = TRUSTTravPool<_TYPE_>::GetFreeBlock((int)new_size);
97 else
98 mem_ = std::make_shared<Vector_>(Vector_(new_size));
99
100 span_ = Span_(*mem_);
101
102 // We should never have to worry about device allocation here:
103 if (isAllocatedOnDevice(mem_->data()))
104 data_location_ = std::make_shared<DataLocation>(DataLocation::Device);
105 else
106 data_location_ = std::make_shared<DataLocation>(DataLocation::HostOnly);
107
108 if(opt == RESIZE_OPTIONS::COPY_INIT)
109 operator=((_TYPE_)0); // To initialize on device or host
110 //std::fill(mem_->begin(), mem_->end(), (_TYPE_) 0);
111 }
112 else
113 {
114 // Array is already allocated, we want to resize:
115 // array must not be shared! (also checked in resize_array()) ... but, still, we allow passing here (i.e. no assert)
116 // only if we keep the same size_array(). This is for example invoked by TRUSTTab when just changing the overall shape of
117 // the array without modifying the total number of elems ...
118 _SIZE_ sz_arr = size_array();
119 if(new_size != sz_arr) // Yes, we compare to the span's size
120 {
121 assert(ref_count() == 1); // from here on, we *really* should not be shared
122
123 if (storage_type_ == STORAGE::TEMP_STORAGE)
124 {
125 // No 64b Trav:
126 assert( (std::is_same<trustIdType, int>::value || !std::is_same<_SIZE_, trustIdType>::value) );
127
128 // Resize of a Trav: if the underlying mem_ is already big enough, just update the span, and possibly fill with 0
129 // else, really increase memory allocation using the TRUSTTravPool.
130 _SIZE_ mem_sz = (_SIZE_)mem_->size();
131 if (new_size <= mem_sz)
132 {
133 // Cheat, simply update the span (up or down)
134 span_ = Span_(span_.begin(), span_.begin()+new_size);
135 // Possibly set to 0 extended part:
136 if (new_size > sz_arr && opt == RESIZE_OPTIONS::COPY_INIT)
137 {
138 ensureDataOnHost();
139 std::fill(span_.begin() + sz_arr, span_.end(), (_TYPE_) 0);
140 }
141 }
142 else // Real size increase of the underlying std::vector
143 {
144 // ResizeBlock
145 mem_ = TRUSTTravPool<_TYPE_>::ResizeBlock(mem_, (int)new_size);
146 span_ = Span_(*mem_);
147 if (opt == RESIZE_OPTIONS::COPY_INIT)
148 {
149 ensureDataOnHost();
150 std::fill(span_.begin() + sz_arr, span_.end(), (_TYPE_) 0);
151 }
152 }
153 }
154 else // Normal (non Trav) arrays
155 {
156#ifndef LATATOOLS
157 bool onDevice = isAllocatedOnDevice(*this);
158 if (onDevice)
159 {
160 // ToDo Kokkos: resize on device is not optimal for the moment as it makes 2 copy D2H and H2D
161 copyFromDevice(*this); // Force copie sur le host
162 _TYPE_ * prev_ad = span_.data(); // before resize!
163 deleteOnDevice(prev_ad, sz_arr); // Delete current block
164 set_data_location(DataLocation::HostOnly);
165 }
166#endif
167 mem_->resize(new_size);
168 span_ = Span_(*mem_);
169 // Possibly set to 0 extended part, since we have a custom Vector allocator not doing it by default (TVAlloc):
170 if (new_size > sz_arr && opt == RESIZE_OPTIONS::COPY_INIT)
171 std::fill(span_.begin()+sz_arr, span_.end(), (_TYPE_) 0);
172#ifndef LATATOOLS
173 if (onDevice)
174 {
175 // Re-allocate and copy on device:
176 mapToDevice(*this);
177 }
178#endif
179 }
180 }
181 }
182}
183
184/** Copie les elements source[first_element_source + i] dans les elements (*this)[first_element_dest + i] pour 0 <= i < nb_elements
185* Les autres elements de (*this) sont inchanges.
186
187* @param const ArrOfDouble& m: le tableau a utiliser, doit etre different de *this !
188* @param _SIZE_ nb_elements: nombre d'elements a copier, nb_elements >= -1. Si nb_elements==-1, on copie tout le tableau m. Valeurs par defaut: -1
189* @param _SIZE_ first_element_dest. Valeurs par defaut: 0
190* @param _SIZE_ first_element_source. Valeurs par defaut: 0
191* @return ArrOfDouble& : *this
192* @throw Sort en erreur si la taille du tableau m est plus grande que la taille de tableau this.
193*/
194template <typename _TYPE_, typename _SIZE_>
195TRUSTArray<_TYPE_, _SIZE_>& TRUSTArray<_TYPE_, _SIZE_>::inject_array(const TRUSTArray& source, _SIZE_ nb_elements, _SIZE_ first_element_dest, _SIZE_ first_element_source)
196{
197 assert(&source != this && nb_elements >= -1);
198 assert(first_element_dest >= 0 && first_element_source >= 0);
199
200 if (nb_elements < 0) nb_elements = source.size_array();
201
202 assert(first_element_source + nb_elements <= source.size_array());
203 assert(first_element_dest + nb_elements <= size_array());
204
205 if (nb_elements > 0)
206 {
207 bool kernelOnDevice = checkDataOnDevice(source);
208#ifndef LATATOOLS
209 if (statistics().get_use_gpu() && nb_elements>100) start_gpu_timer(__KERNEL_NAME__);
210#endif
211 if (kernelOnDevice)
212 {
213#ifndef LATATOOLS
214 const auto addr_source = source.view_ro<1>();
215 auto addr_dest = view_rw<1>();
216 Kokkos::parallel_for(__KERNEL_NAME__, nb_elements, KOKKOS_LAMBDA(const _SIZE_ i) { addr_dest[first_element_dest+i] = addr_source[first_element_source+i]; });
217#endif
218 }
219 else
221 // PL: On utilise le memcpy car c'est VRAIMENT plus rapide (10% +vite sur RNR_G20)
222 const _TYPE_ * addr_source = source.span_.data() + first_element_source;
223 _TYPE_ * addr_dest = span_.data() + first_element_dest;
224 memcpy(addr_dest, addr_source, nb_elements * sizeof(_TYPE_));
225#ifdef TRUST_USE_GPU
227 Cerr << "[Host] Filling a large TRUSTArray (" << nb_elements << " items) which is slow during a GPU run! Set a breakpoint to fix." << finl;
228#endif
229 }
230#ifndef LATATOOLS
231 if (statistics().get_use_gpu() && nb_elements>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
232#endif
233 }
234 return *this;
235}
236
237template<typename _TYPE_, typename _SIZE_>
238template<typename _TAB_>
239void TRUSTArray<_TYPE_, _SIZE_>::ref_conv_helper_(_TAB_& out) const
240{
241 out.detach_array();
242 // Same as 'attach_array()', but since we are crossing templates parameters, we can not call it directly:
243 out.mem_ = mem_;
244 out.span_ = span_;
245 out.data_location_ = data_location_;
246 out.storage_type_ = storage_type_;
247}
248
249/*! Conversion methods - from a small array (_SIZE_=int) of TID (_TYPE_=trustIdType), return a big one (_SIZE_=trustIdType).
250 * No data copied! This behaves somewhat like a ref_array. Used in LATA stuff notably. Not implemented for _TYPE_=double or float
251 * (because never needed).
252 */
253template<>
255{
256 ref_conv_helper_(out);
257}
258
259template<typename _TYPE_, typename _SIZE_>
261{
262 // Should no be used for anything else than specialisations listed above.
263 assert(false);
264 Process::exit("TRUSTArray<>::ref_as_big() should not be used with those current template types.");
265}
266
267/*! Conversion methods - from a big array (_SIZE_=trustIdType), return a small one (_SIZE_=int).
268 * Overflow is detected in debug if array is too big to be fit into _SIZE_=int.
269 * No data copied! This behaves somewhat like a ref_array. Used in LATA stuff and FT notably.
270 */
271template<>
273{
274 // Check size fits in 32bits:
275 assert(size_array() < std::numeric_limits<int>::max());
276 ref_conv_helper_(out);
277}
278
279template<>
281{
282 // Check size fits in 32bits:
283 assert(size_array() < std::numeric_limits<int>::max());
284 ref_conv_helper_(out);
285}
286
287template<typename _TYPE_, typename _SIZE_>
289{
290 // Should no be used for anything else than specialisations listed above.
291 assert(false);
292 Process::exit("TRUSTArray<>::ref_as_big() should not be used with those current template types.");
293}
294
295/*! Conversion from a BigArrOfTID to an ArrOfInt. Careful, it always does a copy! It is your responsibility
296 * to invoke it only when necessary (typically you should avoid this when trustIdType == int ...)
297 */
298template<>
300{
301 // Not too big?
302 assert(size_array() < std::numeric_limits<int>::max());
303 int sz_int = (int)size_array(); // we may cast!
304 out.resize_array_(sz_int); // the one with '_' skipping the checks, so we can be called from Tab too
305 if (sz_int)
306 {
307 // All values within int range?
308 assert(( *std::min_element(span_.begin(), span_.end()) > std::numeric_limits<int>::min() ));
309 assert(( *std::max_element(span_.begin(), span_.end()) < std::numeric_limits<int>::max() ));
310 }
311 // Yes, copy:
312 std::copy(span_.begin(), span_.end(), out.span_.begin());
313}
314
315template<typename _TYPE_, typename _SIZE_>
317{
318 // Should no be used for anything else than specialisations listed above.
319 assert(false);
320 Process::exit("TRUSTArray<>::from_tid_to_int() should not be used with those current template types.");
321}
322
323
324/** Remplit le tableau avec la x en parametre (x est affecte a toutes les cases du tableau)
325 */
326template <typename _TYPE_, typename _SIZE_>
328{
329 const _SIZE_ size = size_array();
330 bool kernelOnDevice = checkDataOnDevice();
331#ifndef LATATOOLS
332 if (statistics().get_use_gpu() && size>100) start_gpu_timer(__KERNEL_NAME__);
333#endif
334 if (kernelOnDevice)
335 {
336#ifndef LATATOOLS
337 auto data = view_rw<1>();
338 Kokkos::parallel_for(__KERNEL_NAME__, size, KOKKOS_LAMBDA(const int i) { data[i] = x; });
339#endif
340 }
341 else
342 {
343 _TYPE_ *data = span_.data();
344 for (_SIZE_ i = 0; i < size; i++) data[i] = x;
345 }
346#ifndef LATATOOLS
347 if (statistics().get_use_gpu() && size>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
348#endif
349 return *this;
350}
351
352/** Addition case a case sur toutes les cases du tableau : la taille de y doit etre au moins egale a la taille de this
353 */
354template <typename _TYPE_, typename _SIZE_>
356{
357 assert(size_array()==y.size_array());
358 _SIZE_ size = size_array();
359 bool kernelOnDevice = checkDataOnDevice(y);
360#ifndef LATATOOLS
361 if (statistics().get_use_gpu() && size>100) start_gpu_timer(__KERNEL_NAME__);
362#endif
363 if (kernelOnDevice)
364 {
365#ifndef LATATOOLS
366 const auto dy = y.view_ro<1>();
367 auto dx = view_rw<1>();
368 Kokkos::parallel_for(__KERNEL_NAME__, size, KOKKOS_LAMBDA(const _SIZE_ i) { dx[i] += dy[i]; });
369#endif
370 }
371 else
372 {
373 const _TYPE_* dy = y.span_.data();
374 _TYPE_* dx = span_.data();
375 for (_SIZE_ i = 0; i < size; i++) dx[i] += dy[i];
376 }
377#ifndef LATATOOLS
378 if (statistics().get_use_gpu() && size>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
379#endif
380 return *this;
381}
382
383/** Ajoute la meme valeur a toutes les cases du tableau
384 */
385template <typename _TYPE_, typename _SIZE_>
387{
388 _SIZE_ size = size_array();
389 bool kernelOnDevice = checkDataOnDevice();
390#ifndef LATATOOLS
391 if (statistics().get_use_gpu() && size>100) start_gpu_timer(__KERNEL_NAME__);
392#endif
393 if (kernelOnDevice)
394 {
395#ifndef LATATOOLS
396 auto data = view_rw<1>();
397 Kokkos::parallel_for(__KERNEL_NAME__, size, KOKKOS_LAMBDA(const _SIZE_ i) { data[i] += dy; });
398#endif
399 }
400 else
401 {
402 _TYPE_ *data = span_.data();
403 for(_SIZE_ i = 0; i < size; i++) data[i] += dy;
404 }
405#ifndef LATATOOLS
406 if (statistics().get_use_gpu() && size>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
407#endif
408 return *this;
409}
410
411/** Soustraction case a case sur toutes les cases du tableau : tableau de meme taille que *this
412 */
413template <typename _TYPE_, typename _SIZE_>
415{
416 assert(size_array() == y.size_array());
417 _SIZE_ size = size_array();
418 bool kernelOnDevice = checkDataOnDevice(y);
419#ifndef LATATOOLS
420 if (statistics().get_use_gpu() && size>100) start_gpu_timer(__KERNEL_NAME__);
421#endif
422 if (kernelOnDevice)
423 {
424#ifndef LATATOOLS
425 auto data = view_rw<1>();
426 const auto data_y = y.view_ro<1>();
427 Kokkos::parallel_for(__KERNEL_NAME__, size, KOKKOS_LAMBDA(const _SIZE_ i) { data[i] -= data_y[i]; });
428#endif
429 }
430 else
431 {
432 _TYPE_ * data = span_.data();
433 const _TYPE_ * data_y = y.span_.data();
434 for (_SIZE_ i = 0; i < size; i++) data[i] -= data_y[i];
435 }
436#ifndef LATATOOLS
437 if (statistics().get_use_gpu() && size>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
438#endif
439 return *this;
440}
441
442/** soustrait la meme valeur a toutes les cases
443 */
444template <typename _TYPE_, typename _SIZE_>
446{
447 operator+=(-dy);
448 return *this;
449}
450
451/** muliplie toutes les cases par dy
452 */
453template <typename _TYPE_, typename _SIZE_>
455{
456 _SIZE_ size = size_array();
457 bool kernelOnDevice = checkDataOnDevice();
458#ifndef LATATOOLS
459 if (statistics().get_use_gpu() && size>100) start_gpu_timer(__KERNEL_NAME__);
460#endif
461 if (kernelOnDevice)
462 {
463#ifndef LATATOOLS
464 auto data = view_rw<1>();
465 Kokkos::parallel_for(__KERNEL_NAME__, size, KOKKOS_LAMBDA(const _SIZE_ i) { data[i] *= dy; });
466#endif
467 }
468 else
469 {
470 _TYPE_ *data = span_.data();
471 for(_SIZE_ i=0; i < size; i++) data[i] *= dy;
472 }
473#ifndef LATATOOLS
474 if (statistics().get_use_gpu() && size>100) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
475#endif
476 return *this;
477}
478
479/** divise toutes les cases par dy (pas pour TRUSTArray<int>)
480 */
481template <typename _TYPE_, typename _SIZE_>
483{
484 if (std::is_integral<_TYPE_>::value) throw; // division should not be called on integral types.
485 operator*=(1/dy);
486 return *this;
487}
488
489// Pour instancier les methodes templates dans un .cpp
490template class TRUSTArray<double, int>;
491template class TRUSTArray<int, int>;
492template class TRUSTArray<float, int>;
493
494#if INT_is_64_ == 2
496template class TRUSTArray<int, trustIdType>;
498template class TRUSTArray<trustIdType, int>;
499template class TRUSTArray<float, trustIdType>;
500#endif
virtual int get(int *ob, std::streamsize n)
Definition Entree.cpp:222
friend class Entree
Definition Objet_U.h:76
friend class Sortie
Definition Objet_U.h:75
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
static int je_suis_maitre()
renvoie 1 si on est sur le processeur maitre du groupe courant (c'est a dire me() == 0),...
Definition Process.cpp:86
virtual int put(const unsigned *ob, std::streamsize n, std::streamsize nb_colonnes=1)
Definition Sortie.cpp:101
Represents a an array of int/int64/double/... values.
Definition TRUSTArray.h:81
void from_tid_to_int(TRUSTArray< int, int > &out) const
void resize_array_(_SIZE_ n, RESIZE_OPTIONS opt=RESIZE_OPTIONS::COPY_INIT)
TRUSTArray & operator*=(const _TYPE_ dy)
_SIZE_ size_array() const
TRUSTArray & operator/=(const _TYPE_ dy)
TRUSTArray & inject_array(const TRUSTArray &source, _SIZE_ nb_elements=-1, _SIZE_ first_element_dest=0, _SIZE_ first_element_source=0)
void ref_as_big(TRUSTArray< _TYPE_, trustIdType > &out) const
TRUSTArray & operator+=(const TRUSTArray &y)
TRUSTArray & operator-=(const TRUSTArray &y)
void ref_as_small(TRUSTArray< _TYPE_, int > &out) const
std::vector< int, TVAlloc< int > > Vector_
Definition TRUSTArray.h:101
tcb::span< int > Span_
Definition TRUSTArray.h:102
TRUSTArray & operator=(const TRUSTArray &)
Entree & readOn(Entree &is) override
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
friend class TRUSTArray
Definition TRUSTArray.h:108
Sortie & printOn(Sortie &os) const override
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
static block_ptr_t GetFreeBlock(int sz)
static block_ptr_t ResizeBlock(block_ptr_t p, int new_sz)
static bool warning(trustIdType nb_items)