165 la_matrice.typer(
"Matrice_Bloc");
168 matrice.
get_bloc(0,0).typer(
"Matrice_Morse_Sym");
169 matrice.
get_bloc(0,1).typer(
"Matrice_Morse");
170 matrice.
get_bloc(1,0).typer(
"Matrice_Morse");
171 matrice.
get_bloc(1,1).typer(
"Matrice_Morse");
186 int ndeb = le_dom_VEF->premiere_face_int();
187 int nfin = le_dom_VEF->nb_faces_tot();
188 int nb_faces = le_dom_VEF->nb_faces();
191 ArrOfTID rang_voisinRR(n2);
192 ArrOfTID rang_voisinRV(n2);
193 ArrOfTID rang_voisinVV(n1-n2);
195 ArrOfInt rang_voisinRR(n2);
196 ArrOfInt rang_voisinRV(n2);
197 ArrOfInt rang_voisinVV(n1-n2);
203 CIntTabView face_voisins = le_dom.
face_voisins().view_ro();
204 CIntArrView ind_faces_virt_bord = le_dom_VEF->ind_faces_virt_bord().view_ro();
205 auto rang_voisinRR_v = rang_voisinRR.view_rw();
206 auto rang_voisinRV_v = rang_voisinRV.view_rw();
207 auto rang_voisinVV_v = rang_voisinVV.view_rw();
208 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(ndeb, nfin), KOKKOS_LAMBDA(
const int num_face)
210 int elem1 = face_voisins(num_face, 0);
211 int elem2 = face_voisins(num_face, 1);
212 const bool is_face_virt_bord = (num_face >= nb_faces) && (ind_faces_virt_bord(num_face - nb_faces) != -1);
213 if (!is_face_virt_bord && elem1 != -1 && elem2 != -1)
218 Kokkos::atomic_add(&rang_voisinRR_v(elem2), 1);
222 Kokkos::atomic_add(&rang_voisinRV_v(elem2), 1);
224 Kokkos::atomic_add(&rang_voisinVV_v(elem2 - n2), 1);
230 Kokkos::atomic_add(&rang_voisinRR_v(elem1), 1);
234 Kokkos::atomic_add(&rang_voisinRV_v(elem1), 1);
236 Kokkos::atomic_add(&rang_voisinVV_v(elem1 - n2), 1);
241 end_gpu_timer(__KERNEL_NAME__);
245 for (
int i=0; i<les_cl.size(); i++)
254 CIntArrView front_num_face = le_bord.
num_face().view_ro();
255 CIntArrView face_associee = la_cl_perio.
face_associee().view_ro();
256 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(0, nb_faces_bord_tot), KOKKOS_LAMBDA(
const int ind_face)
258 if (ind_face < face_associee(ind_face))
260 int num_face = front_num_face(ind_face);
261 int elem1 = face_voisins(num_face, 0);
262 int elem2 = face_voisins(num_face, 1);
263 if (elem1 != -1 && elem2 != -1)
268 Kokkos::atomic_add(&rang_voisinRR_v(elem2), 1);
272 Kokkos::atomic_add(&rang_voisinRV_v(elem2), 1);
274 Kokkos::atomic_add(&rang_voisinVV_v(elem2 - n2), 1);
280 Kokkos::atomic_add(&rang_voisinRR_v(elem1), 1);
284 Kokkos::atomic_add(&rang_voisinRV_v(elem1), 1);
286 Kokkos::atomic_add(&rang_voisinVV_v(elem1 - n2), 1);
292 end_gpu_timer(__KERNEL_NAME__);
306 auto tab1RR_v = tab1RR.view_rw();
307 auto tab1RV_v = tab1RV.view_rw();
308 auto tab1VV_v = tab1VV.view_rw();
309 using tab1_value_t =
typename decltype(tab1RR_v)::value_type;
310 Kokkos::parallel_scan(start_gpu_timer(__KERNEL_NAME__), range_1D(0, n2), KOKKOS_LAMBDA(
const int i, tab1_value_t& update,
const bool final)
312 update += rang_voisinRR_v(i);
313 if (
final) tab1RR_v(i+1) = update + 1;
315 end_gpu_timer(__KERNEL_NAME__);
316 Kokkos::parallel_scan(start_gpu_timer(__KERNEL_NAME__), range_1D(0, n2), KOKKOS_LAMBDA(
const int i, tab1_value_t& update,
const bool final)
318 update += rang_voisinRV_v(i);
319 if (
final) tab1RV_v(i+1) = update + 1;
321 end_gpu_timer(__KERNEL_NAME__);
322 Kokkos::parallel_scan(start_gpu_timer(__KERNEL_NAME__), range_1D(0, n1-n2), KOKKOS_LAMBDA(
const int i, tab1_value_t& update,
const bool final)
324 update += rang_voisinVV_v(i);
325 if (
final) tab1VV_v(i+1) = update + 1;
327 end_gpu_timer(__KERNEL_NAME__);
332 auto tab2RR_v = tab2RR.view_rw();
333 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(0, n2), KOKKOS_LAMBDA(
const int i)
335 tab2RR_v(tab1RR_v(i) - 1) = i+1;
336 rang_voisinRR_v(i) = tab1RR_v(i);
337 rang_voisinRV_v(i) = tab1RV_v(i) - 1;
339 end_gpu_timer(__KERNEL_NAME__);
340 auto tab2VV_v = tab2VV.view_rw();
341 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(0, n1-n2), KOKKOS_LAMBDA(
const int i)
343 tab2VV_v(tab1VV_v(i) - 1) = i+1;
344 rang_voisinVV_v(i) = tab1VV_v(i);
346 end_gpu_timer(__KERNEL_NAME__);
354 auto tab2RV_v = tab2RV.view_rw();
358 CDoubleTabView face_normales = le_dom.
face_normales().view_ro();
359 CDoubleTabView inverse_quantitee_entrelacee_v = inverse_quantitee_entrelacee.
view_ro();
360 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(ndeb, nfin), KOKKOS_LAMBDA(
const int num_face)
362 int elem1 = face_voisins(num_face, 0);
363 int elem2 = face_voisins(num_face, 1);
364 const bool is_face_virt_bord = (num_face >= nb_faces) && (ind_faces_virt_bord(num_face - nb_faces) != -1);
365 if (!is_face_virt_bord && elem1 != -1 && elem2 != -1)
368 for (
int d = 0; d < dim; d++)
369 val += face_normales(num_face, d) * face_normales(num_face, d) * inverse_quantitee_entrelacee_v(num_face, d);
372 if (elem1 < n2) Kokkos::atomic_add(&coeffRR(tab1RR_v(elem1) - 1), val);
373 else Kokkos::atomic_add(&coeffVV(tab1VV_v(elem1 - n2) - 1), val);
374 if (elem2 < n2) Kokkos::atomic_add(&coeffRR(tab1RR_v(elem2) - 1), val);
375 else Kokkos::atomic_add(&coeffVV(tab1VV_v(elem2 - n2) - 1), val);
381 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRR_v(elem2), 1);
382 tab2RR_v(slot) = elem1 + 1;
383 coeffRR(slot) -= val;
389 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRV_v(elem2), 1);
390 tab2RV_v(slot) = (elem1 - n2) + 1;
391 coeffRV(slot) -= val;
395 auto slot = Kokkos::atomic_fetch_add(&rang_voisinVV_v(elem2 - n2), 1);
396 tab2VV_v(slot) = (elem1 - n2) + 1;
397 coeffVV(slot) -= val;
405 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRR_v(elem1), 1);
406 tab2RR_v(slot) = elem2 + 1;
407 coeffRR(slot) -= val;
413 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRV_v(elem1), 1);
414 tab2RV_v(slot) = (elem2 - n2) + 1;
415 coeffRV(slot) -= val;
419 auto slot = Kokkos::atomic_fetch_add(&rang_voisinVV_v(elem1 - n2), 1);
420 tab2VV_v(slot) = (elem2 - n2) + 1;
421 coeffVV(slot) -= val;
427 end_gpu_timer(__KERNEL_NAME__);
429 for (
int i=0; i<les_cl.size(); i++)
446 CIntArrView front_num_face = le_bord.
num_face().view_ro();
447 DoubleArrView coeff_pression =
static_cast<ArrOfDouble&
>(
les_coeff_pression).view_rw();
449 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(0, nb_faces_bord_tot), KOKKOS_LAMBDA(
const int ind_face)
451 int num_face = front_num_face(ind_face);
453 for (
int d = 0; d < dim; d++)
454 val += face_normales(num_face, d) * face_normales(num_face, d) * inverse_quantitee_entrelacee_v(num_face, d);
456 int elem = face_voisins(num_face, 0);
457 if (elem < n2) Kokkos::atomic_add(&coeffRR(tab1RR_v(elem) - 1), val);
458 else Kokkos::atomic_add(&coeffVV(tab1VV_v(elem - n2) - 1), val);
460 if (num_face < coeff_pression_size)
461 coeff_pression(num_face) = val;
463 end_gpu_timer(__KERNEL_NAME__);
465 else if (sub_type(
Periodique,la_cl.valeur()) )
468 CIntArrView front_num_face_coeff = le_bord.
num_face().view_ro();
469 CIntArrView face_associee_coeff = la_cl_perio.
face_associee().view_ro();
470 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), range_1D(0, nb_faces_bord_tot), KOKKOS_LAMBDA(
const int ind_face)
472 if (ind_face < face_associee_coeff(ind_face))
474 int num_face = front_num_face_coeff(ind_face);
475 int elem1 = face_voisins(num_face, 0);
476 int elem2 = face_voisins(num_face, 1);
478 for (
int d = 0; d < dim; d++)
479 val += face_normales(num_face, d) * face_normales(num_face, d) * inverse_quantitee_entrelacee_v(num_face, d);
482 if (elem1 < n2) Kokkos::atomic_add(&coeffRR(tab1RR_v(elem1) - 1), val);
483 else Kokkos::atomic_add(&coeffVV(tab1VV_v(elem1 - n2) - 1), val);
484 if (elem2 < n2) Kokkos::atomic_add(&coeffRR(tab1RR_v(elem2) - 1), val);
485 else Kokkos::atomic_add(&coeffVV(tab1VV_v(elem2 - n2) - 1), val);
491 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRR_v(elem2), 1);
492 tab2RR_v(slot) = elem1 + 1;
493 coeffRR(slot) -= val;
499 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRV_v(elem2), 1);
500 tab2RV_v(slot) = elem1 - n2 + 1;
501 coeffRV(slot) -= val;
505 auto slot = Kokkos::atomic_fetch_add(&rang_voisinVV_v(elem2 - n2), 1);
506 tab2VV_v(slot) = elem1 - n2 + 1;
507 coeffVV(slot) -= val;
515 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRR_v(elem1), 1);
516 tab2RR_v(slot) = elem2 + 1;
517 coeffRR(slot) -= val;
523 auto slot = Kokkos::atomic_fetch_add(&rang_voisinRV_v(elem1), 1);
524 tab2RV_v(slot) = elem2 - n2 + 1;
525 coeffRV(slot) -= val;
529 auto slot = Kokkos::atomic_fetch_add(&rang_voisinVV_v(elem1 - n2), 1);
530 tab2VV_v(slot) = elem2 - n2 + 1;
531 coeffVV(slot) -= val;
537 end_gpu_timer(__KERNEL_NAME__);