TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
MD_Vector_std_tools.cpp
1/****************************************************************************
2* Copyright (c) 2025, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <MD_Vector_std.h>
17#include <Device.h>
18#include <sstream>
19
20#ifndef LATATOOLS
21#include <Perf_counters.h>
22#endif
23
24/**************************************************************************************/
25/* Warning ! This kernels are critical for performance into several TRUST applications !
26 * Do not change implementation without using performance regression testing !
27 * You are warned.
28 **************************************************************************************/
29
30#ifndef LATATOOLS
31template<typename ExecSpace, typename _TYPE_, VECT_ITEMS_TYPE _ITEM_TYPE_>
32void vect_items_generic_kernel(int line_size, int idx, int idx_end_of_list, const Static_Int_Lists& list, TRUSTArray<_TYPE_>& vect, TRUSTArray<_TYPE_>& buffer)
33{
34 static constexpr bool IS_READ = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::READ), IS_WRITE = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::WRITE),
35 IS_ADD = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::ADD), IS_MAX = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::MAX);
36 static constexpr bool kernelOnDevice = !std::is_same<ExecSpace, Kokkos::DefaultHostExecutionSpace>::value;
37
38 const int bloc_size = 1;
39 const int n = line_size * bloc_size;
40 Kokkos::RangePolicy<ExecSpace> policy(idx, idx_end_of_list);
41 auto items_to_process_view = list.get_data().template view_ro<1, ExecSpace>().data();
42 if (IS_READ)
43 {
44 auto buffer_view = buffer.template view_wo<1, ExecSpace>().data();
45 auto vect_view = vect.template view_ro<1, ExecSpace>().data();
46 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
47 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
48 const int item)
49 {
50 // Indice de l'item geometrique a copier (ou du premier item du bloc)
51 int premier_item_bloc = items_to_process_view[item];
52 // Adresse des elements a copier dans le vecteur
53 for (int j = 0; j < n; j++)
54 {
55 int ii = (item - idx) * n + j;
56 int jj = premier_item_bloc * line_size + j;
57 buffer_view[ii] = vect_view[jj];
58 }
59 });
60 }
61 else
62 {
63 auto buffer_view = buffer.template view_ro<1, ExecSpace>().data();
64 auto vect_view = vect.template view_rw<1, ExecSpace>().data();
65 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
66 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
67 const int item)
68 {
69 // Indice de l'item geometrique a copier (ou du premier item du bloc)
70 int premier_item_bloc = items_to_process_view[item];
71 // Adresse des elements a copier dans le vecteur
72 for (int j = 0; j < n; j++)
73 {
74 int ii = (item - idx) * n + j;
75 int jj = premier_item_bloc * line_size + j;
76 if (IS_WRITE) vect_view[jj] = buffer_view[ii];
77 else if (IS_ADD) vect_view[jj] += buffer_view[ii];
78 else if (IS_MAX)
79 {
80 _TYPE_ dest = vect_view[jj];
81 _TYPE_ src = buffer_view[ii];
82 vect_view[jj] = (dest > src) ? dest : src;
83 }
84 }
85 });
86 }
87 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
88}
89#endif
90
91template<typename _TYPE_, VECT_ITEMS_TYPE _ITEM_TYPE_>
92void vect_items_generic(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<_TYPE_>& vect, Schema_Comm_Vecteurs& buffers)
93{
94 assert(line_size > 0);
95 const ArrOfInt& index = list.get_index();
96 const int nb_voisins = list.get_nb_lists();
97 for (int i_voisin = 0; i_voisin < nb_voisins; i_voisin++)
98 {
99 // Indice dans list.get_data() de la fin de la liste d'items/blocs pour ce voisin:
100 const int idx = index[i_voisin];
101 const int idx_end_of_list = index[i_voisin + 1];
102 // Nombre d'elements de tableau a envoyer/recevoir de ce voisin
103 const int nb_elems = (idx_end_of_list - idx) * line_size;
104 if (nb_elems>0)
105 {
106 TRUSTArray<_TYPE_>& buffer = buffers.get_next_area_template<_TYPE_>(voisins[i_voisin], nb_elems);
107 assert(nb_elems == buffer.size_array());
108 assert(idx_end_of_list <= list.get_data().size_array());
109 bool kernelOnDevice = vect.checkDataOnDevice();
110 if (kernelOnDevice)
111 vect_items_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
112 else
113 vect_items_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
114 }
115 }
116}
117template void vect_items_generic<double, VECT_ITEMS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
118template void vect_items_generic<double, VECT_ITEMS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
119template void vect_items_generic<double, VECT_ITEMS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
120template void vect_items_generic<double, VECT_ITEMS_TYPE::MAX>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
121template void vect_items_generic<float, VECT_ITEMS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
122template void vect_items_generic<float, VECT_ITEMS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
123template void vect_items_generic<float, VECT_ITEMS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
124template void vect_items_generic<float, VECT_ITEMS_TYPE::MAX>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
125template void vect_items_generic<int, VECT_ITEMS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
126template void vect_items_generic<int, VECT_ITEMS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
127template void vect_items_generic<int, VECT_ITEMS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
128template void vect_items_generic<int, VECT_ITEMS_TYPE::MAX>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
129#if INT_is_64_ == 2
130template void vect_items_generic<trustIdType, VECT_ITEMS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
131template void vect_items_generic<trustIdType, VECT_ITEMS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
132template void vect_items_generic<trustIdType, VECT_ITEMS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
133template void vect_items_generic<trustIdType, VECT_ITEMS_TYPE::MAX>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
134#endif
135
136
137#ifndef LATATOOLS
138template<typename ExecSpace, typename _TYPE_, VECT_BLOCS_TYPE _ITEM_TYPE_>
139void vect_blocs_generic_kernel(int line_size, int idx, int idx_end_of_list, const Static_Int_Lists& list, TRUSTArray<_TYPE_>& vect, TRUSTArray<_TYPE_>& buffer)
140{
141 static constexpr bool IS_READ = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::READ), IS_WRITE = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::WRITE), IS_ADD = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::ADD);
142 static constexpr bool kernelOnDevice = !std::is_same<ExecSpace, Kokkos::DefaultHostExecutionSpace>::value;
143
144 const TRUSTArray<int>& items_to_process = list.get_data();
145 int ii_base = 0;
146 for (int item = idx; item < idx_end_of_list; item += 2)
147 {
148 // Indice de l'item geometrique a copier (ou du premier item du bloc)
149 int premier_item_bloc = items_to_process[item];
150 // For blocs, the array contains begin_bloc, end_bloc, begin_bloc, end_bloc...
151 const int dernier_item_bloc = items_to_process[item + 1];
152 const int bloc_size = dernier_item_bloc - premier_item_bloc;
153 // Adresse des elements a copier dans le vecteur
154#ifndef TRUST_USE_GPU
155 assert(premier_item_bloc >= 0 && bloc_size > 0 &&
156 (premier_item_bloc + bloc_size) * line_size <= vect.size_array());
157#endif
158 const int n = line_size * bloc_size;
159 Kokkos::RangePolicy<ExecSpace> policy(0, n);
160 if (IS_READ)
161 {
162 auto buffer_view = buffer.template view_wo<1, ExecSpace>().data();
163 auto vect_view = vect.template view_ro<1, ExecSpace>().data();
164 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
165 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
166 const int j)
167 {
168 int ii = ii_base * line_size + j;
169 int jj = premier_item_bloc * line_size + j;
170 buffer_view[ii] = vect_view[jj];
171 });
172 }
173 else
174 {
175 auto buffer_view = buffer.template view_ro<1, ExecSpace>().data();
176 auto vect_view = vect.template view_rw<1, ExecSpace>().data();
177 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
178 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
179 const int j)
180 {
181 int ii = ii_base * line_size + j;
182 int jj = premier_item_bloc * line_size + j;
183 if (IS_WRITE) vect_view[jj] = buffer_view[ii];
184 else if (IS_ADD) vect_view[jj] += buffer_view[ii];
185 });
186 }
187 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
188 ii_base += bloc_size;
189 }
190}
191#endif
192
193template<typename _TYPE_, VECT_BLOCS_TYPE _ITEM_TYPE_>
194void vect_blocs_generic(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<_TYPE_>& vect, Schema_Comm_Vecteurs& buffers)
195{
196 assert(line_size > 0);
197 const ArrOfInt& index = list.get_index();
198 const int nb_voisins = list.get_nb_lists();
199 for (int i_voisin = 0; i_voisin < nb_voisins; i_voisin++)
200 {
201 // Nombre d'elements de tableau a envoyer/recevoir de ce voisin
202 const int nb_elems = nb_items_par_voisin[i_voisin] * line_size;
203 if (nb_elems > 0)
204 {
205 // Indice dans list.get_data() de la fin de la liste d'items/blocs pour ce voisin:
206 const int idx = index[i_voisin];
207 const int idx_end_of_list = index[i_voisin + 1];
208 TRUSTArray<_TYPE_>& buffer = buffers.get_next_area_template<_TYPE_>(voisins[i_voisin], nb_elems);
209 assert(nb_elems == buffer.size_array());
210 assert(idx_end_of_list <= list.get_data().size_array());
211 bool kernelOnDevice = vect.checkDataOnDevice();
212 if (kernelOnDevice)
213 vect_blocs_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
214 else
215 vect_blocs_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
216 }
217 }
218}
219
220template void vect_blocs_generic<double, VECT_BLOCS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
221template void vect_blocs_generic<double, VECT_BLOCS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
222template void vect_blocs_generic<double, VECT_BLOCS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<double>& vect, Schema_Comm_Vecteurs& buffers);
223template void vect_blocs_generic<float, VECT_BLOCS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
224template void vect_blocs_generic<float, VECT_BLOCS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
225template void vect_blocs_generic<float, VECT_BLOCS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<float>& vect, Schema_Comm_Vecteurs& buffers);
226template void vect_blocs_generic<int, VECT_BLOCS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
227template void vect_blocs_generic<int, VECT_BLOCS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
228template void vect_blocs_generic<int, VECT_BLOCS_TYPE::ADD>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<int>& vect, Schema_Comm_Vecteurs& buffers);
229#if INT_is_64_ == 2
230template void vect_blocs_generic<trustIdType, VECT_BLOCS_TYPE::READ>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
231template void vect_blocs_generic<trustIdType, VECT_BLOCS_TYPE::WRITE>(const int line_size, const ArrOfInt& voisins, const Static_Int_Lists& list, const ArrOfInt& nb_items_par_voisin, TRUSTArray<trustIdType>& vect, Schema_Comm_Vecteurs& buffers);
232#endif
TRUSTArray< _TYPE_ > & get_next_area_template(int pe, int array_size)
const ArrOfInt_t & get_index() const
int_t get_nb_lists() const
renvoie le nombre de listes stockees
const ArrOfInt_t & get_data() const
Represents a an array of int/int64/double/... values.
Definition TRUSTArray.h:81
_SIZE_ size_array() const
_TYPE_ * data()