16#include <MD_Vector_std.h>
21#include <Perf_counters.h>
31template<
typename ExecSpace,
typename _TYPE_, VECT_ITEMS_TYPE _ITEM_TYPE_>
34 static constexpr bool IS_READ = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::READ), IS_WRITE = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::WRITE),
35 IS_ADD = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::ADD), IS_MAX = (_ITEM_TYPE_ == VECT_ITEMS_TYPE::MAX);
36 static constexpr bool kernelOnDevice = !std::is_same<ExecSpace, Kokkos::DefaultHostExecutionSpace>::value;
38 const int bloc_size = 1;
39 const int n = line_size * bloc_size;
40 Kokkos::RangePolicy<ExecSpace> policy(idx, idx_end_of_list);
41 auto items_to_process_view = list.
get_data().template view_ro<1, ExecSpace>().
data();
44 auto buffer_view = buffer.template view_wo<1, ExecSpace>().
data();
45 auto vect_view = vect.template view_ro<1, ExecSpace>().
data();
46 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
47 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
51 int premier_item_bloc = items_to_process_view[item];
53 for (
int j = 0; j < n; j++)
55 int ii = (item - idx) * n + j;
56 int jj = premier_item_bloc * line_size + j;
57 buffer_view[ii] = vect_view[jj];
63 auto buffer_view = buffer.template view_ro<1, ExecSpace>().
data();
64 auto vect_view = vect.template view_rw<1, ExecSpace>().
data();
65 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
66 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
70 int premier_item_bloc = items_to_process_view[item];
72 for (
int j = 0; j < n; j++)
74 int ii = (item - idx) * n + j;
75 int jj = premier_item_bloc * line_size + j;
76 if (IS_WRITE) vect_view[jj] = buffer_view[ii];
77 else if (IS_ADD) vect_view[jj] += buffer_view[ii];
80 _TYPE_ dest = vect_view[jj];
81 _TYPE_ src = buffer_view[ii];
82 vect_view[jj] = (dest > src) ? dest : src;
87 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
91template<
typename _TYPE_, VECT_ITEMS_TYPE _ITEM_TYPE_>
94 assert(line_size > 0);
97 for (
int i_voisin = 0; i_voisin < nb_voisins; i_voisin++)
100 const int idx = index[i_voisin];
101 const int idx_end_of_list = index[i_voisin + 1];
103 const int nb_elems = (idx_end_of_list - idx) * line_size;
111 vect_items_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
113 vect_items_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
117template void vect_items_generic<double, VECT_ITEMS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
118template void vect_items_generic<double, VECT_ITEMS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
119template void vect_items_generic<double, VECT_ITEMS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
120template void vect_items_generic<double, VECT_ITEMS_TYPE::MAX>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
121template void vect_items_generic<float, VECT_ITEMS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
122template void vect_items_generic<float, VECT_ITEMS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
123template void vect_items_generic<float, VECT_ITEMS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
124template void vect_items_generic<float, VECT_ITEMS_TYPE::MAX>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
125template void vect_items_generic<int, VECT_ITEMS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
126template void vect_items_generic<int, VECT_ITEMS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
127template void vect_items_generic<int, VECT_ITEMS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
128template void vect_items_generic<int, VECT_ITEMS_TYPE::MAX>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
138template<
typename ExecSpace,
typename _TYPE_, VECT_BLOCS_TYPE _ITEM_TYPE_>
141 static constexpr bool IS_READ = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::READ), IS_WRITE = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::WRITE), IS_ADD = (_ITEM_TYPE_ == VECT_BLOCS_TYPE::ADD);
142 static constexpr bool kernelOnDevice = !std::is_same<ExecSpace, Kokkos::DefaultHostExecutionSpace>::value;
146 for (
int item = idx; item < idx_end_of_list; item += 2)
149 int premier_item_bloc = items_to_process[item];
151 const int dernier_item_bloc = items_to_process[item + 1];
152 const int bloc_size = dernier_item_bloc - premier_item_bloc;
155 assert(premier_item_bloc >= 0 && bloc_size > 0 &&
156 (premier_item_bloc + bloc_size) * line_size <= vect.
size_array());
158 const int n = line_size * bloc_size;
159 Kokkos::RangePolicy<ExecSpace> policy(0, n);
162 auto buffer_view = buffer.template view_wo<1, ExecSpace>().
data();
163 auto vect_view = vect.template view_ro<1, ExecSpace>().
data();
164 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
165 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
168 int ii = ii_base * line_size + j;
169 int jj = premier_item_bloc * line_size + j;
170 buffer_view[ii] = vect_view[jj];
175 auto buffer_view = buffer.template view_ro<1, ExecSpace>().
data();
176 auto vect_view = vect.template view_rw<1, ExecSpace>().
data();
177 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
178 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
181 int ii = ii_base * line_size + j;
182 int jj = premier_item_bloc * line_size + j;
183 if (IS_WRITE) vect_view[jj] = buffer_view[ii];
184 else if (IS_ADD) vect_view[jj] += buffer_view[ii];
187 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, kernelOnDevice);
188 ii_base += bloc_size;
193template<
typename _TYPE_, VECT_BLOCS_TYPE _ITEM_TYPE_>
194void vect_blocs_generic(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<_TYPE_>& vect,
Schema_Comm_Vecteurs& buffers)
196 assert(line_size > 0);
197 const ArrOfInt& index = list.
get_index();
199 for (
int i_voisin = 0; i_voisin < nb_voisins; i_voisin++)
202 const int nb_elems = nb_items_par_voisin[i_voisin] * line_size;
206 const int idx = index[i_voisin];
207 const int idx_end_of_list = index[i_voisin + 1];
213 vect_blocs_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
215 vect_blocs_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _ITEM_TYPE_>(line_size, idx, idx_end_of_list, list, vect, buffer);
220template void vect_blocs_generic<double, VECT_BLOCS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
221template void vect_blocs_generic<double, VECT_BLOCS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
222template void vect_blocs_generic<double, VECT_BLOCS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<double>& vect,
Schema_Comm_Vecteurs& buffers);
223template void vect_blocs_generic<float, VECT_BLOCS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
224template void vect_blocs_generic<float, VECT_BLOCS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
225template void vect_blocs_generic<float, VECT_BLOCS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<float>& vect,
Schema_Comm_Vecteurs& buffers);
226template void vect_blocs_generic<int, VECT_BLOCS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
227template void vect_blocs_generic<int, VECT_BLOCS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
228template void vect_blocs_generic<int, VECT_BLOCS_TYPE::ADD>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<int>& vect,
Schema_Comm_Vecteurs& buffers);
230template void vect_blocs_generic<trustIdType, VECT_BLOCS_TYPE::READ>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<trustIdType>& vect,
Schema_Comm_Vecteurs& buffers);
231template void vect_blocs_generic<trustIdType, VECT_BLOCS_TYPE::WRITE>(
const int line_size,
const ArrOfInt& voisins,
const Static_Int_Lists& list,
const ArrOfInt& nb_items_par_voisin,
TRUSTArray<trustIdType>& vect,
Schema_Comm_Vecteurs& buffers);
TRUSTArray< _TYPE_ > & get_next_area_template(int pe, int array_size)
const ArrOfInt_t & get_index() const
int_t get_nb_lists() const
renvoie le nombre de listes stockees
const ArrOfInt_t & get_data() const
Represents a an array of int/int64/double/... values.
_SIZE_ size_array() const