TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
Solv_Petsc.cpp
1/****************************************************************************
2* Copyright (c) 2026, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <Solv_Petsc.h>
17#ifdef PETSCKSP_H
18#include <petscis.h>
19#include <petscdmshell.h>
20#include <petscsection.h>
21#ifdef PETSC_HAVE_HYPRE
22#include <HYPRE_config.h>
23#endif
24#include <cfenv>
25#include <tuple>
26#include <Matrice_Morse_Sym.h>
27#include <Matrice_Bloc_Sym.h>
28#include <Matrice_Bloc.h>
29#include <Process.h>
30#include <MD_Vector_tools.h>
31#include <PE_Groups.h>
32#include <Comm_Group_MPI.h>
33#include <map>
34#include <ctime>
35#include <EFichier.h>
36#include <sys/stat.h>
37#endif
38#include <Matrice_Petsc.h>
39#include <Motcle.h>
40#include <SChaine.h>
41#include <SFichier.h>
42#include <TRUSTTrav.h>
43#include <MD_Vector_composite.h>
44#include <vector>
45#include <functional>
46#include <Perf_counters.h>
47#include <chrono>
48
49Implemente_instanciable_sans_constructeur_ni_destructeur(Solv_Petsc,"Solv_Petsc",Solv_Externe);
50
51// XD petsc solveur_sys_base petsc NO_BRACE Solver via Petsc API
52// XD attr solveur solveur_petsc_deriv solveur REQ solver type and options
53
54// XD solveur_petsc_deriv objet_u solveur_petsc_deriv INHERITS_BRACE Additional information is available in the PETSC
55// XD_CONT documentation: https://petsc.org/release/manual/
56// XD attr seuil floattant seuil OPT corresponds to the iterative solver convergence value. The iterative solver
57// XD_CONT converges when the Euclidean residue standard ||Ax-B|| is less than seuil.
58// XD attr quiet rien quiet OPT is a keyword which is used to not displaying any outputs of the solver.
59// XD attr impr rien impr OPT used to request display of the Euclidean residue standard each time this iterates through
60// XD_CONT the conjugated gradient (display to the standard outlet).
61// XD attr rtol floattant rtol OPT not_set
62// XD attr atol floattant atol OPT not_set
63// XD attr save_matrix_mtx_format rien save_matrix_mtx_format OPT not_set
64
65// XD solveur_petsc_lu solveur_petsc_deriv lu INHERITS_BRACE Several solvers through PETSc API are available. NL2 TIPS:
66// XD_CONT NL2 NL2 NL2 A) Solver for symmetric linear systems (e.g: Pressure system from Navier-Stokes equations): NL2
67// XD_CONT -The CHOLESKY parallel solver is from MUMPS library. It offers better performance than all others solvers if
68// XD_CONT you have enough RAM for your calculation. A parallel calculation on a cluster with 4GBytes on each processor,
69// XD_CONT 40000 cells/processor seems the upper limit. Seems to be very slow to initialize above 500 cpus/cores. NL2
70// XD_CONT -When running a parallel calculation with a high number of cpus/cores (typically more than 500) where
71// XD_CONT preconditioner scalabilty is the key for CPU performance, consider BICGSTAB with BLOCK_JACOBI_ICC(1) as
72// XD_CONT preconditioner or if not converges, GCP with BLOCK_JACOBI_ICC(1) as preconditioner. NL2 -For other
73// XD_CONT situations, the first choice should be GCP/SSOR. In order to fine tune the solver choice, each one of the
74// XD_CONT previous list should be considered. Indeed, the CPU speed of a solver depends of a lot of parameters. You may
75// XD_CONT give a try to the OPTIMAL solver to help you to find the fastest solver on your study. NL2 NL2 B) Solver for
76// XD_CONT non symmetric linear systems (e.g.: Implicit schemes): NL2 The BICGSTAB/DIAG solver seems to offer the best
77// XD_CONT performances.
78
79// XD solveur_petsc_Cholesky_superlu solveur_petsc_deriv Cholesky_superlu INHERITS_BRACE Parallelized Cholesky from
80// XD_CONT SUPERLU_DIST library (less CPU and RAM, efficient than the previous one)
81
82// XD solveur_petsc_Cholesky_pastix solveur_petsc_deriv Cholesky_pastix INHERITS_BRACE Parallelized Cholesky from PASTIX
83// XD_CONT library.
84
85// XD solveur_petsc_Cholesky_umfpack solveur_petsc_deriv Cholesky_umfpack INHERITS_BRACE Sequential Cholesky from
86// XD_CONT UMFPACK library (seems fast).
87
88// XD solveur_petsc_Cholesky_out_of_core solveur_petsc_deriv Cholesky_out_of_core INHERITS_BRACE Same as the previous
89// XD_CONT one but with a written LU decomposition of disk (save RAM memory but add an extra CPU cost during Ax=B
90// XD_CONT solve).
91
92// XD solveur_petsc_cholesky_lapack solveur_petsc_deriv cholesky_lapack INHERITS_BRACE Sequential Cholesky via LAPACK
93// XD_CONT (cannot be used in parallel). Accepts factored_matrix to save/read/disk-cache the factorisation.
94// XD attr factored_matrix chaine(into=["save","read","disk"]) factored_matrix OPT Cache the LU factorisation: save
95// XD_CONT writes it after computing, read loads a precomputed one, disk reads if present and otherwise
96// XD_CONT computes-then-saves.
97
98// XD solveur_petsc_cholesky solveur_petsc_deriv cholesky INHERITS_BRACE Parallelized version of Cholesky from MUMPS
99// XD_CONT library. This solver accepts an option to select a different ordering than the automatic selected one by
100// XD_CONT MUMPS (and printed by using the impr option). The possible choices are Metis, Scotch, PT-Scotch or Parmetis.
101// XD_CONT The two last options can only be used during a parallel calculation, whereas the two first are available for
102// XD_CONT sequential or parallel calculations. It seems that the CPU cost of A=LU factorization but also of the
103// XD_CONT backward/forward elimination steps may sometimes be reduced by selecting a different ordering (Scotch seems
104// XD_CONT often the best for b/f elimination) than the default one. NL2 Notice that this solver requires a huge amont
105// XD_CONT of memory compared to iterative methods. To know how much RAM you will need by core, then use the impr option
106// XD_CONT to have detailled informations during the analysis phase and before the factorisation phase (in the following
107// XD_CONT output, you will learn that the largest memory is taken by the zeroth CPU with 108MB): NL2 Rank of proc
108// XD_CONT needing largest memory in IC facto : 0 NL2 Estimated corresponding MBYTES for IC facto : 108 NL2 Thanks to
109// XD_CONT the following graph, you read that in order to solve for instance a flow on a mesh with 2.6e6 cells, you will
110// XD_CONT need to run a parallel calculation on 32 CPUs if you have cluster nodes with only 4GB/core (6.2GB*0.42~2.6GB)
111// XD_CONT : NL2 \includeimage{{petscgraph.jpeg}}
112// XD attr save_matrice|save_matrix rien save_matrix OPT not_set
113// XD attr save_matrix_petsc_format rien save_matrix_petsc_format OPT not_set
114// XD attr reduce_ram rien reduce_ram OPT not_set
115// XD attr cli_quiet solveur_petsc_option_cli cli_quiet OPT not_set
116// XD attr cli solveur_petsc_option_cli cli OPT not_set
117
118// XD solveur_petsc_cholesky_mumps_blr solveur_petsc_deriv cholesky_mumps_blr INHERITS_BRACE BLR for (Block Low-Rank)
119// XD attr reduce_ram rien reduce_ram OPT not_set
120// XD attr dropping_parameter floattant dropping_parameter OPT not_set
121// XD attr cli solveur_petsc_option_cli cli OPT not_set
122
123// XD solveur_petsc_option_cli bloc_lecture nul INHERITS_BRACE solver
124
125// XD solveur_petsc_cli solveur_petsc_deriv cli NO_BRACE Command Line Interface. Should be used only by advanced users,
126// XD_CONT to access the whole solver/preconditioners from the PETSC API. To find all the available options, run your
127// XD_CONT calculation with the -ksp_view -help options: NL2 trust datafile [N] --ksp_view --help NL2 -pc_type
128// XD_CONT Preconditioner:(one of) none jacobi pbjacobi bjacobi sor lu shell mg eisenstat ilu icc cholesky asm ksp
129// XD_CONT composite redundant nn mat fieldsplit galerkin openmp spai hypre tfs (PCSetType) NL2 HYPRE preconditioner
130// XD_CONT options: NL2 -pc_hypre_type pilut (choose one of) pilut parasails boomeramg NL2 HYPRE ParaSails Options NL2
131// XD_CONT -pc_hypre_parasails_nlevels 1: Number of number of levels (None) NL2 -pc_hypre_parasails_thresh 0.1:
132// XD_CONT Threshold (None) NL2 -pc_hypre_parasails_filter 0.1: filter (None) NL2 -pc_hypre_parasails_loadbal 0: Load
133// XD_CONT balance (None) NL2 -pc_hypre_parasails_logging: FALSE Print info to screen (None) NL2
134// XD_CONT -pc_hypre_parasails_reuse: FALSE Reuse nonzero pattern in preconditioner (None) NL2 -pc_hypre_parasails_sym
135// XD_CONT nonsymmetric (choose one of) nonsymmetric SPD nonsymmetric,SPD NL2 NL2 Krylov Method (KSP) Options NL2
136// XD_CONT -ksp_type Krylov method:(one of) cg cgne stcg gltr richardson chebychev gmres tcqmr bcgs bcgsl cgs tfqmr cr
137// XD_CONT lsqr preonly qcg bicg fgmres minres symmlq lgmres lcd (KSPSetType) NL2 -ksp_max_it 10000: Maximum number of
138// XD_CONT iterations (KSPSetTolerances) NL2 -ksp_rtol 0: Relative decrease in residual norm (KSPSetTolerances) NL2
139// XD_CONT -ksp_atol 1e-12: Absolute value of residual norm (KSPSetTolerances) NL2 -ksp_divtol 10000: Residual norm
140// XD_CONT increase cause divergence (KSPSetTolerances) NL2 -ksp_converged_use_initial_residual_norm: Use initial
141// XD_CONT residual residual norm for computing relative convergence NL2 -ksp_monitor_singular_value stdout: Monitor
142// XD_CONT singular values (KSPMonitorSet) NL2 -ksp_monitor_short stdout: Monitor preconditioned residual norm with
143// XD_CONT fewer digits (KSPMonitorSet) NL2 -ksp_monitor_draw: Monitor graphically preconditioned residual norm
144// XD_CONT (KSPMonitorSet) NL2 -ksp_monitor_draw_true_residual: Monitor graphically true residual norm (KSPMonitorSet)
145// XD_CONT NL2 NL2 Example to use the multigrid method as a solver, not only as a preconditioner: NL2 Solveur_pression
146// XD_CONT Petsc CLI {-ksp_type richardson -pc_type hypre -pc_hypre_type boomeramg -ksp_atol 1.e-7 }
147// XD attr seuil suppress_param seuil OPT corresponds to the iterative solver convergence value. The iterative solver
148// XD_CONT converges when the Euclidean residue standard is less than seuil.
149// XD attr quiet suppress_param quiet OPT is a keyword which is used to not displaying any outputs of the solver.
150// XD attr impr suppress_param impr OPT impress or not
151// XD attr rtol suppress_param rtol OPT not_set
152// XD attr atol suppress_param atol OPT not_set
153// XD attr save_matrix_mtx_format suppress_param save_matrix_mtx_format OPT not_set
154// XD attr cli_bloc bloc_lecture cli_bloc REQ bloc
155
156// XD solveur_petsc_cli_quiet solveur_petsc_deriv cli_quiet NO_BRACE solver
157// XD attr seuil suppress_param seuil OPT corresponds to the iterative solver convergence value. The iterative solver
158// XD_CONT converges when the Euclidean residue standard is less than seuil.
159// XD attr quiet suppress_param quiet OPT is a keyword which is used to not displaying any outputs of the solver.
160// XD attr impr suppress_param impr OPT impress or not
161// XD attr rtol suppress_param rtol OPT not_set
162// XD attr atol suppress_param atol OPT not_set
163// XD attr save_matrix_mtx_format suppress_param save_matrix_mtx_format OPT not_set
164// XD attr cli_quiet_bloc bloc_lecture cli_quiet_bloc REQ bloc
165
166// XD solveur_petsc_IBICGSTAB solveur_petsc_deriv IBICGSTAB INHERITS_BRACE Improved version of previous one for massive
167// XD_CONT parallel computations (only a single global reduction operation instead of the usual 3 or 4).
168// XD attr precond preconditionneur_petsc_deriv precond OPT not_set
169
170// XD solveur_petsc_BICGSTAB solveur_petsc_deriv BICGSTAB INHERITS_BRACE Stabilized Bi-Conjugate Gradient
171// XD attr precond preconditionneur_petsc_deriv precond OPT not_set
172
173// XD solveur_petsc_gmres solveur_petsc_deriv gmres INHERITS_BRACE Generalized Minimal Residual
174// XD attr precond preconditionneur_petsc_deriv precond OPT not_set
175// XD attr reuse_preconditioner_nb_it_max entier reuse_preconditioner_nb_it_max OPT not_set
176// XD attr save_matrix_petsc_format rien save_matrix_petsc_format OPT not_set
177// XD attr nb_it_max entier nb_it_max OPT In order to specify a given number of iterations instead of a condition on the
178// XD_CONT residue with the keyword seuil. May be useful when defining a PETSc solver for the implicit time scheme where
179// XD_CONT convergence is very fast: 5 or less iterations seems enough.
180
181// XD solveur_petsc_gcp solveur_petsc_deriv gcp INHERITS_BRACE Preconditioned Conjugate Gradient
182// XD attr precond preconditionneur_petsc_deriv precond OPT preconditioner
183// XD attr precond_nul rien precond_nul OPT No preconditioner used, equivalent to precond null { }
184// XD attr rtol floattant rtol OPT not_set
185// XD attr reuse_preconditioner_nb_it_max entier reuse_preconditioner_nb_it_max OPT not_set
186// XD attr cli solveur_petsc_option_cli cli OPT not_set
187// XD attr reorder_matrix entier reorder_matrix OPT not_set
188// XD attr read_matrix rien read_matrix OPT save_matrix|read_matrix are the keywords to save|read into a file the
189// XD_CONT constant matrix A of the linear system Ax=B solved (eg: matrix from the pressure linear system for an
190// XD_CONT incompressible flow). It is useful when you want to minimize the MPI communications on massive parallel
191// XD_CONT calculation. Indeed, in VEF discretization, the overlapping width (generaly 2, specified with the
192// XD_CONT largeur_joint option in the partition keyword partition) can be reduced to 1, once the matrix has been
193// XD_CONT properly assembled and saved. The cost of the MPI communications in TRUST itself (not in PETSc) will be
194// XD_CONT reduced with length messages divided by 2. So the strategy is: NL2 I) Partition your VEF mesh with a
195// XD_CONT largeur_joint value of 2 NL2 II) Run your parallel calculation on 0 time step, to build and save the matrix
196// XD_CONT with the save_matrix option. A file named Matrix_NBROWS_rows_NCPUS_cpus.petsc will be saved to the disk
197// XD_CONT (where NBROWS is the number of rows of the matrix and NCPUS the number of CPUs used). NL2 III) Partition your
198// XD_CONT VEF mesh with a largeur_joint value of 1 NL2 IV) Run your parallel calculation completly now and substitute
199// XD_CONT the save_matrix option by the read_matrix option. Some interesting gains have been noticed when the cost of
200// XD_CONT linear system solve with PETSc is small compared to all the other operations.
201// XD attr save_matrice|save_matrix rien save_matrix OPT see read_matrix
202// XD attr petsc_decide entier petsc_decide OPT not_set
203// XD attr pcshell chaine pcshell OPT not_set
204// XD attr aij rien aij OPT not_set
205
206// XD solveur_petsc_PIPECG solveur_petsc_deriv PIPECG INHERITS_BRACE Pipelined Conjugate Gradient (possible reduced CPU
207// XD_CONT cost during massive parallel calculation due to a single non-blocking reduction per iteration, if TRUST is
208// XD_CONT built with a MPI-3 implementation)... no example in TRUST
209
210
211// XD preconditionneur_petsc_deriv objet_u preconditionneur_petsc_deriv INHERITS_BRACE Preconditioners available with
212// XD_CONT petsc solvers
213
214// XD preconditionneur_petsc_diag preconditionneur_petsc_deriv diag INHERITS_BRACE Diagonal (Jacobi) preconditioner.
215
216// XD preconditionneur_petsc_c_amg preconditionneur_petsc_deriv c-amg INHERITS_BRACE preconditionner
217
218// XD preconditionneur_petsc_sa_amg preconditionneur_petsc_deriv sa-amg INHERITS_BRACE preconditionner
219
220// XD preconditionneur_petsc_BLOCK_JACOBI_ICC preconditionneur_petsc_deriv BLOCK_JACOBI_ICC INHERITS_BRACE Incomplete
221// XD_CONT Cholesky factorization for symmetric matrix with the PETSc implementation.
222// XD attr level entier level OPT factorization level (default value, 1). In parallel, the factorization is done by
223// XD_CONT block (one per processor by default).
224// XD attr ordering chaine(into=["natural","rcm"]) ordering OPT The ordering of the local matrix is natural by default,
225// XD_CONT but rcm ordering, which reduces the bandwith of the local matrix, may interestingly improves the quality of
226// XD_CONT the decomposition and reduces the number of iterations.
227
228// XD preconditionneur_petsc_boomeramg preconditionneur_petsc_deriv boomeramg INHERITS_BRACE Multigrid preconditioner
229// XD_CONT (no option is available yet, look at CLI command and Petsc documentation to try other options).
230
231// XD preconditionneur_petsc_null preconditionneur_petsc_deriv null INHERITS_BRACE No preconditioner used
232
233// XD preconditionneur_petsc_lu preconditionneur_petsc_deriv lu INHERITS_BRACE preconditionner
234
235// XD preconditionneur_petsc_jacobi preconditionneur_petsc_deriv jacobi INHERITS_BRACE preconditionner
236
237// XD preconditionneur_petsc_EISENTAT preconditionneur_petsc_deriv EISENTAT INHERITS_BRACE SSOR version with Eisenstat
238// XD_CONT trick which reduces the number of computations and thus CPU cost...
239// XD attr omega floattant omega OPT relaxation factor
240
241// XD preconditionneur_petsc_ssor preconditionneur_petsc_deriv ssor INHERITS_BRACE Symmetric Successive Over Relaxation
242// XD_CONT algorithm.
243// XD attr omega floattant omega OPT relaxation factor (default value, 1.5)
244
245// XD preconditionneur_petsc_block_jacobi_ilu preconditionneur_petsc_deriv block_jacobi_ilu INHERITS_BRACE
246// XD_CONT preconditionner
247// XD attr level entier level OPT not_set
248
249// XD preconditionneur_petsc_spai preconditionneur_petsc_deriv spai INHERITS_BRACE Spai Approximate Inverse algorithm
250// XD_CONT from Parasails Hypre library.
251// XD attr level entier level OPT first parameter
252// XD attr epsilon floattant epsilon OPT second parameter
253
254// XD preconditionneur_petsc_pilut preconditionneur_petsc_deriv pilut INHERITS_BRACE Dual Threashold Incomplete LU
255// XD_CONT factorization.
256// XD attr level entier level OPT factorization level
257// XD attr epsilon floattant epsilon OPT drop tolerance
258
259// XD preconditionneur_petsc_ilu_mumps preconditionneur_petsc_deriv ilu_mumps INHERITS_BRACE Incomplete LU factorization
260// XD_CONT with Block Low Ranking from the MUMPS library. Mapped at runtime onto Petsc's cholesky pc with
261// XD_CONT mat_mumps_icntl_35=1 (BLR enabled).
262// XD attr epsilon floattant epsilon OPT BLR dropping parameter (passed through as mat_mumps_cntl_7).
263
264
265
266// printOn
268{
269 s << chaine_lue_;
270 return s;
271}
272
273// readOn
275{
276 lecture(is);
277 return is;
278}
279
280void check_not_defined(option o)
281{
282 if (o.defined)
283 {
284 Cerr << "Error! Option " << o.name << " should not be defined with the preconditioner of this solver." << finl;
285 Cerr << "Change your data file." << finl;
287 }
288}
289
290//check to see if a string is a number
291#ifdef PETSCKSP_H
292static bool is_number(const std::string& s)
293{
294 std::string::const_iterator it = s.begin();
295 while (it != s.end() && std::isdigit(*it)) ++it;
296 return !s.empty() && it == s.end();
297}
298#endif
299
300bool gmres_right_unpreconditionned=true;
301// Lecture et creation du solveur
303{
304 if (amgx_ || gpu_ || std::getenv("TRUST_PETSC_VERBOSE"))
305 verbose = true;
306#ifdef PETSCKSP_H
307 if(!std::is_same<PetscInt, trustIdType>::value)
308 Process::exit("Type mismatch!!! PetscInt and trustIdType should be equal!!! PETSc not compiled in 64b??");
309
310 Motcle accolade_ouverte("{");
311 Motcle accolade_fermee("}");
312 Nom pc("");
313 Nom motlu;
314 Nom ksp;
315 lecture(entree);
317 is >> ksp; // On lit le solveur en premier puis les options du solveur: PETSC ksp { ... }
318 is >> motlu; // On lit l'accolade
319 if (motlu != accolade_ouverte)
320 {
321 Cerr << "Error while reading the parameters of PETSc solver: " << ksp << " { ... }" << finl;
322 Cerr << "We expected " << accolade_ouverte << " instead of " << motlu << finl;
323 exit();
324 }
325 // Verification si Petsc est bien initialise (permet d'eviter un crash en sequentiel sur les machines batch)
326 PetscBool isInitialized;
327 PetscInitialized(&isInitialized);
328 if (!isInitialized)
329 {
330 Cerr << "On this queuing system cluster, you need to use mpirun even on sequential mode" << finl;
331 Cerr << "(mpirun -np 1 ...) with a PETSc solver or the calculation will crash. " << finl;
332 Cerr << "You can use the trust script as a workaround:" << finl;
333 Cerr << "trust " << nom_du_cas() << finl;
334 exit();
335 }
336
337 // Creation du solveur et association avec le preconditionneur
338 if (option_prefix_=="??") // Prefix non fixe
339 {
341 option_prefix_="";
342 if (numero_solveur > 1)
343 {
344 // On cree un prefix pour les options si plus d'un solveur pour les differencier. Exemple:
345 // premier solveur -ksp_type ... -pc_type ...
346 // deuxieme solveur -solver2_ksp_type ... -solver2_pc-type ...
347 // troisieme solveur -solveur3_ksp_type ...
348 option_prefix_ += "solver";
349 option_prefix_ += (Nom) numero_solveur;
350 option_prefix_ += "_";
351 }
352 }
353
354 /************************/
355 /* Set PETSC_COMM_WORLD */
356 /************************/
357 // Recuperation du communicateur du groupe courant
358#ifdef MPI_
360 PETSC_COMM_WORLD = ref_cast(Comm_Group_MPI,PE_Groups::current_group()).get_mpi_comm();
361#endif
362
363 KSPCreate(PETSC_COMM_WORLD, &SolveurPetsc_);
364 KSPGetPC(SolveurPetsc_, &PreconditionneurPetsc_);
365
366 // Add options if PETSc solver is used:
368 {
369 // _petsc.TU is only printed if one group calculation (e.g. Execute_parallel failed)
370 Nom petsc_TU(":");
371 petsc_TU+=nom_du_cas();
372 petsc_TU+="_petsc.TU";
373#ifdef TRUST_USE_GPU
374 //if (instance==1) PetscLogGpuTime(); // Slow down calculation ! Use -log_view_gpu_time
375#endif
376#ifndef TRUST_USE_CUDA
377 // Unexplained segfault when build with nvcc, we disable:
378 add_option("log_view",petsc_TU); // Monitor performances at the end of the calculation
379 PetscLogDefaultBegin(); // Necessary cause if not Event logs not printed in petsc_TU file ... I don't know why...
380#endif
381 }
382#ifdef NDEBUG
383 // PETSc 3.14 active par defaut les exceptions, on desactive en production ?
384 // PetscSetFPTrap(PETSC_FP_TRAP_OFF);
385 // Utiliser -fp_trap 0 a l'execution plutot: Segfault vu sur petsc gmres { precond diag ... }
386#endif
387 //add_option("on_error_abort",""); // ne marche pas semble t'il
388 // On doit pouvoir lire des mots cles de base (GCP, GMRES, CHOLESKY)
389 // mais egalement pouvoir appeler les options Petsc avec une chaine { -ksp_type cg -pc_type sor ... }
390 // Les options non reconnues doivent arreter le code
391 // Reprendre le formalisme de GCP { precond ssor { omega val } seuil val }
392 Motcles les_solveurs(21);
393 {
394 les_solveurs[0] = "CLI";
395 les_solveurs[1] = "GCP";
396 les_solveurs[2] = "GMRES";
397 les_solveurs[3] = "CHOLESKY|MUMPS";
398 les_solveurs[4] = "CHOLESKY_OUT_OF_CORE|MUMPS_OUT_OF_CORE";
399 les_solveurs[5] = "BICGSTAB";
400 les_solveurs[6] = "IBICGSTAB";
401 les_solveurs[7] = "CHOLESKY_SUPERLU|LU_SUPERLU";
402 les_solveurs[8] = "PGMRES";
403 les_solveurs[9] = "LU";
404 les_solveurs[10] = "PIPECG";
405 les_solveurs[11] = "CHOLESKY_LAPACK";
406 les_solveurs[12] = "CHOLESKY_UMFPACK";
407 les_solveurs[13] = "CHOLESKY_PASTIX";
408 les_solveurs[14] = "CLI_VERBOSE";
409 les_solveurs[15] = "CLI_QUIET";
410 les_solveurs[16] = "CHOLESKY_MUMPS_BLR|MUMPS_BLR";
411 les_solveurs[17] = "CHOLESKY_CHOLMOD";
412 les_solveurs[18] = "PIPECG2";
413 les_solveurs[19] = "FGMRES";
414 les_solveurs[20] = "LU_STRUMPACK";
415 }
416 int solver_supported_on_gpu_by_petsc=0;
417 int solver_supported_on_gpu_by_amgx=0;
418 amgx_options_="";
419 int rang=les_solveurs.search(ksp);
420 nommer(les_solveurs[rang]);
421 switch(rang)
422 {
423 case 0:
424 case 14:
425 case 15:
426 {
427 if (rang == 15) fixer_limpr(-1); // Quiet
428 else fixer_limpr(1); // On imprime le residu
429 solver_supported_on_gpu_by_petsc=1; // Not really, reserved to expert...
430 solver_supported_on_gpu_by_amgx=1; // Not really, reserved to expert...
431 if (limpr() >= 0) Cerr << "Reading of the " << (amgx_ ? "AmgX" : "Petsc") << " commands:" << finl;
432 Nom valeur;
433 is >> motlu;
434 if (amgx_)
435 {
436 while (motlu!=accolade_fermee)
437 {
438 // -config file.json
439 if (motlu == "-file")
440 {
441 is >> motlu;
443 {
444 Cerr << "Reading AmgX config file " << motlu << " :" << finl;
445 EFichier config_amgx(motlu);
446 std::string line;
447 while (!config_amgx.eof())
448 {
449 std::getline(config_amgx.get_ifstream(), line);
450 if (line.find("#") && line.find("config_version"))
451 {
452 Cerr << line << finl;
453 amgx_options_+=line;
454 amgx_options_+="\n";
455 }
456 }
457 }
458 }
459 else
460 {
461 amgx_options_ += motlu;
462 amgx_options_ += "\n";
463 }
464 is >> motlu;
465 }
466 }
467 else
468 while (motlu!=accolade_fermee)
469 {
470 is >> valeur;
471 // "-option val" ou "-option" ?
472 if (valeur.debute_par("-") || valeur==accolade_fermee)
473 {
474 add_option(motlu.suffix("-"), "", 1);
475 motlu = valeur;
476 }
477 else
478 {
479 if (motlu == "-ksp_type" && valeur=="preonly") solveur_direct_=cli; // Activate direct solveur if using -ksp_preonly ...
480 if (motlu == "-ksp_max_it") ignore_nb_it_max_ = 1; //pour un comportement similaire a l'option nb_it_max
481 add_option(motlu.suffix("-"), valeur, 1);
482 is >> motlu;
483 }
484 }
485 // Pour faciliter le debugage:
486 if (rang == 14) // Verbose
487 {
488 add_option("ksp_view", "");
489 add_option("options_view", "");
490 add_option("options_left", "");
491 }
492 if (!amgx_)
493 {
494 // Changement dans PETSc 3.21: plus de preconditioneur par defaut
495 // On met ILU(0) comme auparavant pour ne pas changer tous les jeux de donnees qui ont: "petsc cli { }"
496 Nom current_pc;
497 Nom option="-";
498 option+=option_prefix_;
499 option+="pc_type";
500 if (!has_option(option, current_pc))
501 {
502 if (Process::nproc()>1)
503 {
504 add_option("pc_type", "bjacobi");
505 add_option("sub_pc_type", "ilu");
506 }
507 else
508 {
509 add_option("pc_type", "ilu");
510 }
511 }
512 }
513 break;
514 }
515 case 1:
516 {
517 KSPSetType(SolveurPetsc_, KSPCG);
518 // Residu=||Ax-b|| comme dans TRUST pour GCP sinon on ne peut comparer les convergences
519 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
520 // Merge the two inner products needed in CG into a single MPI_Allreduce() call:
521 // Gain interessant a partir de 4000 coeurs
522 if (Process::nproc()>=4000)
523 {
524 //add_option("ksp_cg_single_reduction",""); Pour Petsc < 3.3, la fonction KSPCGUseSingleReduction n'etait pas disponible
525 KSPCGUseSingleReduction(SolveurPetsc_,(PetscBool)1);
526 }
527 // But It requires two extra work vectors than the conventional implementation in PETSc.
528 solver_supported_on_gpu_by_petsc=1;
529 solver_supported_on_gpu_by_amgx=1;
530 add_amgx_option("solver(s)","PCG"); // CG avec preconditionnement
531 // PCGF : Flexible CG avec preconditionnement
532 break;
533 }
534 case 10:
535 {
536 KSPSetType(SolveurPetsc_, KSPPIPECG);
537 // Residu=||Ax-b|| comme dans TRUST pour GCP sinon on ne peut comparer les convergences
538 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
539 break;
540 }
541 case 18:
542 {
543 KSPSetType(SolveurPetsc_, KSPPIPECG2);
544 // Residu=||Ax-b|| comme dans TRUST pour GCP sinon on ne peut comparer les convergences
545 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
546 break;
547 }
548 case 2:
549 {
550 KSPSetType(SolveurPetsc_, KSPGMRES);
551 // Le preconditionnement a droite permet que le residu utilise pour la convergence
552 // soit le residu reel ||Ax-b|| et non le residu preconditionne pour certains solveurs
553 // avec un preconditionnement a gauche (ex: GMRES). Ainsi, on peut comparer strictement
554 // les performances des solveurs (TRUST ou PETSC) entre eux
555 if (gmres_right_unpreconditionned)
556 {
557 KSPSetPCSide(SolveurPetsc_, PC_RIGHT);
558 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
559 }
560 solver_supported_on_gpu_by_petsc=1;
561 solver_supported_on_gpu_by_amgx=1;
562 if (amgx_)
563 {
564 add_amgx_option("solver(s)","GMRES"); // GMRES
565 Process::exit("Gmres solver on GPU with AmgX fails to return a valid solution. Try GCP, BiCGSTAB or FGMRES solvers.");
566 }
567 break;
568 }
569 case 19:
570 {
571 KSPSetType(SolveurPetsc_, KSPFGMRES);
572 KSPSetPCSide(SolveurPetsc_, PC_RIGHT);
573 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
574 solver_supported_on_gpu_by_petsc=1;
575 solver_supported_on_gpu_by_amgx=1;
576 add_amgx_option("solver(s)","FGMRES"); // FGMRES
577 break;
578 }
579 case 8:
580 {
581 KSPSetType(SolveurPetsc_, KSPPGMRES);
582 // PGMRES ne peut etre que preconditionne a gauche (CAx=Cb)
583 // et on ne peut avoir que le residu preconditionne (||CAx-Cb||)
584 // -> on ne peut comparer la convergence avec le GMRES...
585 KSPSetPCSide(SolveurPetsc_, PC_LEFT);
586 // KSPSetNormType(SolveurPetsc, KSP_NORM_UNPRECONDITIONED);
587 solver_supported_on_gpu_by_petsc=1;
588 break;
589 }
590 case 3:
591 case 4:
592 case 9:
593 case 16:
594 {
595 // Si MUMPS est present, on le prend par defaut (solveur_direct_=1) sinon SuperLU (solveur_direct_=2):
596#ifdef PETSC_HAVE_MUMPS
597 solveur_direct_ = mumps;
598 // Option out_of_core
599 if (rang == 4) add_option("mat_mumps_icntl_22", "1");
600
601 // Option BLR
602 if (rang == 16)
603 {
604 Cerr
605 << "Activating BLR factorization. For more info, see http://mumps.enseeiht.fr/doc/userguide_5.1.2.pdf (page 18, 51, 52)."
606 << finl;
607 add_option("mat_mumps_icntl_35", "1");
608 }
609#else
610 solveur_direct_=superlu_dist;
611#endif
612 KSPSetType(SolveurPetsc_, KSPPREONLY);
613 break;
614 }
615 case 20:
616 {
617 // Strumpack faster than MUMPS on CPU sometimes (LU and solve, ex: JEL_bous)
618 solveur_direct_ = strumpack;
619 // ToDo add BLR option
620 KSPSetType(SolveurPetsc_, KSPPREONLY);
621 solver_supported_on_gpu_by_petsc=1;
622 if (gpu_)
623 {
624 // Triangular solve by default on GPU but limited to 1 MPI rank
625 add_option("mat_strumpack_gpu", "1");
626 add_option("mat_strumpack_metis_nodeNDP", "1"); // See https://github.com/pghysels/STRUMPACK/issues/127
627 }
628 break;
629 }
630 case 5:
631 {
632 KSPSetType(SolveurPetsc_, KSPBCGS);
633 solver_supported_on_gpu_by_petsc=1;
634 solver_supported_on_gpu_by_amgx=1;
635 // BICGSTAB // BICGSTAB sans preconditionnement
636 add_amgx_option("solver(s)","PBICGSTAB"); // BICGSTAB avec precondtionnement
637 break;
638 }
639 case 6:
640 {
641 KSPSetType(SolveurPetsc_, KSPIBCGS); // 1 point de synchro au lieu de 3 pour KSPBCGS
642 // Pour optimiser encore les comms, voir:
643 // http://www.mcs.anl.gov/petsc/petsc-as/snapshots/petsc-3.0.0/docs/manualpages/KSP/KSPIBCGS.html
644 KSPSetLagNorm(SolveurPetsc_, PETSC_TRUE);
645 break;
646 }
647 case 7:
648 {
649 solveur_direct_=superlu_dist;
650 // SuperLU_dist, parallel but not faster than MUMPS
651 KSPSetType(SolveurPetsc_, KSPPREONLY);
652 // ToDo: Update again SuperLU_dist cause GPU Triangular solver available for L (soon U)
653 // But slower on GPU than Strumpack during factorization...
654 solver_supported_on_gpu_by_petsc=1;
655 //if (gpu_) add_option("XXX", "1");
656 break;
657 }
658 case 11:
659 {
660 if (Process::is_parallel()) Process::exit("Cholesky_lapack can't be used for parallel calculation.");
661 solveur_direct_=petsc;
662 // Lapack, old and slow (non pas vrai sur petites matrices d'ordre 100 - 10000 !)
663 add_option("pc_factor_nonzeros_along_diagonal", ""); // Moins robuste que MUMPS pour un pivot nul donc on reordonne pour eviter
664 KSPSetType(SolveurPetsc_, KSPPREONLY);
665 break;
666 }
667 case 12:
668 {
669 if (Process::is_parallel()) Process::exit("Cholesky_umfpack can't be used for parallel calculation.");
670 solveur_direct_=umfpack;
671 // Umfpack, sequential only but fast...
672 KSPSetType(SolveurPetsc_, KSPPREONLY);
673 //more robustness
674 add_option("mat_umfpack_pivot_tolerance","1.0");
675 break;
676 }
677 case 13:
678 {
679 solveur_direct_=pastix;
680 // Pastix supports sbaij but seems slow...
681 KSPSetType(SolveurPetsc_, KSPPREONLY);
682 break;
683 }
684 case 17:
685 {
686 if (Process::is_parallel()) Process::exit("Cholesky_cholmod can't be used for parallel calculation.");
687 solveur_direct_=cholmod;
688 // Cholmod Cholesky (pas LU), sequentiel, supporte multi-GPU
690 {
691 Cerr << ksp << " is only supported for symmetric linear system." << finl;
693 }
694 solver_supported_on_gpu_by_petsc=1;
695 KSPSetType(SolveurPetsc_, KSPPREONLY);
696 break;
697 }
698 default:
699 {
700 Cerr << ksp << " : solver not officially recognized by TRUST among those possible for the moment:" << finl;
701 Cerr << les_solveurs << finl;
702 Cerr << "You can try to access directly to Petsc solvers with the command line:" << finl;
703 Cerr << "PETSC CLI { -ksp_type solver_name -pc_type preconditioning_name -ksp_atol threshold -ksp_monitor }" << finl;
704 Cerr << "See the reference manual for all Petsc options." << finl;
706 }
707 }
708
709 // On verifie que le solveur est supporte sur GPU:
710 if (gpu_)
711 {
712#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
713 Cerr << "GPU capabilities of PETSc will be used." << finl;
714#else
715 Cerr << "You can not use petsc_gpu keyword cause GPU" << finl;
716 Cerr << "capabilities will not work on your workstation with PETSc." << finl;
717 Cerr << "Check if you have a NVidia video card and its driver up to date." << finl;
718 Cerr << "Check petsc.log file under $TRUST_ROOT/lib/src/LIBPETSC for more details." << finl;
720#endif
721 if (solver_supported_on_gpu_by_petsc==0)
722 {
723 Cerr << les_solveurs[rang] << " is not supported yet by PETSc on GPU." << finl;
725 }
726 }
727 if (amgx_)
728 {
729#ifdef TRUST_USE_CUDA
730 Cerr << "GPU capabilities of AmgX will be used." << finl;
731#else
732 Cerr << "You can not use amgx keyword cause TRUST version is not build with CUDA support." << finl;
734#endif
735 if (solver_supported_on_gpu_by_amgx==0)
736 {
737 Cerr << les_solveurs[rang] << " is not supported yet by AmgX on GPU." << finl;
739 }
740 }
741
742 int convergence_with_seuil=0; // Keyword to check the convergence via seuil or nb_it_max
743 // On continue a lire
744 if (motlu==accolade_ouverte)
745 {
746 // Temporaire essayer de faire converger les noms de parametres des differentes solveurs (GCP, GMRES,...)
747 Motcles les_parametres_solveur(31);
748 {
749 les_parametres_solveur[0] = "impr";
750 les_parametres_solveur[1] = "seuil"; // Seuil absolu (atol)
751 les_parametres_solveur[2] = "precond";
752 les_parametres_solveur[3] = "precond_nul"; // To accept the TRUST syntax
753 les_parametres_solveur[4] = "nb_it_max";
754 les_parametres_solveur[5] = "save_matrix_petsc_format";
755 les_parametres_solveur[6] = "factored_matrix"; // Experimental
756 les_parametres_solveur[7] = "read_matrix";
757 les_parametres_solveur[8] = "controle_residu";
758 les_parametres_solveur[9] = "cli";
759 les_parametres_solveur[10] = "ordering";
760 les_parametres_solveur[11] = "petsc_decide"; // Experimental
761 les_parametres_solveur[12] = "aij";
762 les_parametres_solveur[13] = "nb_cpus";
763 les_parametres_solveur[14] = "divtol";
764 les_parametres_solveur[15] = "save_matrice|save_matrix";
765 les_parametres_solveur[16] = "quiet";
766 les_parametres_solveur[17] = "restart";
767 les_parametres_solveur[18] = "cli_verbose";
768 les_parametres_solveur[19] = "dropping_parameter";
769 les_parametres_solveur[20] = "rtol"; // Seuil relatif
770 les_parametres_solveur[21] = "atol"; // Seuil absolu <=> seuil
771 les_parametres_solveur[22] = "ignore_new_nonzero";
772 les_parametres_solveur[23] = "rebuild_matrix";
773 les_parametres_solveur[24] = "allow_realloc";
774 les_parametres_solveur[25] = "clean_matrix";
775 les_parametres_solveur[26] = "save_matrix_mtx_format";
776 les_parametres_solveur[27] = "reuse_preconditioner_nb_it_max";
777 les_parametres_solveur[28] = "reduce_ram";
778 les_parametres_solveur[29] = "reorder_matrix";
779 les_parametres_solveur[30] = "pcshell"; // user defined preconditionner
780 }
781 option_double omega("omega",amgx_ ? 0.9 : 1.5);
782 option_int level("level",1);
783 option_double epsilon("epsilon",0.1);
784 option_string ordering("ordering",(Nom)"");
785 controle_residu_=0;
786
787 is >> motlu;
788 while (motlu!=accolade_fermee)
789 {
790 switch(les_parametres_solveur.search(motlu))
791 {
792 case 16:
793 {
794 fixer_limpr(-1);
795 break;
796 }
797 case 0:
798 {
799 fixer_limpr(1);
800 // Si MUMPS on ajoute des impressions sur la decomposition
801 if (solveur_direct_==mumps)
802 add_option("mat_mumps_icntl_4","3");
803 else if (solveur_direct_==superlu_dist)
804 add_option("mat_superlu_dist_printstat","");
805 else if (solveur_direct_==petsc)
806 {}
807 else if (solveur_direct_==umfpack)
808 add_option("mat_umfpack_prl","2");
809 else if (solveur_direct_==pastix)
810 add_option("mat_pastix_verbose","2");
811 else if (solveur_direct_==cholmod)
812 add_option("mat_cholmod_print","3");
813 else if (solveur_direct_==strumpack)
814 add_option("mat_strumpack_verbose", "1");
815 else if (solveur_direct_)
816 {
817 Cerr << "impr not coded yet for this direct solver." << finl;
819 }
820 break;
821 }
822 case 1:
823 case 21:
824 {
825 if (solveur_direct_)
826 {
827 Cerr << "Definition of " << les_parametres_solveur[les_parametres_solveur.search(motlu)] << " is useless for a direct method." << finl;
828 Cerr << "Suppress the keyword." << finl;
829 exit();
830 }
831 is >> seuil_;
832 convergence_with_seuil=1;
833 add_amgx_option("s:convergence","ABSOLUTE");
834 add_amgx_option("s:tolerance",Nom(seuil_,"%e"));
835 break;
836 }
837 case 2:
838 {
839 is >> pc;
840 is >> motlu;
841 if (motlu != accolade_ouverte)
842 {
843 Cerr << "Error while reading the parameters of the PETSC preconditioner: precond " << pc << " { ... }" << finl;
844 Cerr << "We expected " << accolade_ouverte << " instead of " << motlu << finl;
845 exit();
846 }
847 is >> motlu;
848 while (motlu!=accolade_fermee)
849 {
850 Motcles les_parametres_precond(4);
851 {
852 les_parametres_precond[0] = omega.name;
853 les_parametres_precond[1] = level.name;
854 les_parametres_precond[2] = epsilon.name;
855 les_parametres_precond[3] = ordering.name;
856 }
857 double tmp_int;
858 double tmp_double;
859 Nom tmp_string;
860 switch(les_parametres_precond.search(motlu))
861 {
862 case 0:
863 {
864 is >> tmp_double;
865 omega.value()=tmp_double;
866 omega.defined=1;
867 break;
868 }
869 case 1:
870 {
871 is >> tmp_int ;
872 level.value()=(int)tmp_int;
873 level.defined=1;
874 add_amgx_option("p:ilu_sparsity_level",(Nom)level.value());
875 // Coloring level: 1 par defaut Doit valoir ilu_sparsity_level+1 pour MULTICOLOT_INU (voir AmgX reference guide)
876 add_amgx_option("p:coloring_level",(Nom)Nom(level.value()+1));
877 break;
878 }
879 case 2:
880 {
881 is >> tmp_double;
882 epsilon.value()=tmp_double;
883 epsilon.defined=1;
884 break;
885 }
886 case 3:
887 {
888 is >> tmp_string;
889 ordering.value()=tmp_string;
890 ordering.defined=1;
891 break;
892 }
893 default:
894 {
895 if (amgx_)
896 {
897 Cerr << "Reading option: " << motlu << finl;
898 add_amgx_option(motlu);
899 break;
900 }
901 else
902 {
903 Cerr << motlu
904 << " : unrecognized option among all of those possible on Petsc preconditioner:"
905 << finl;
906 Cerr << les_parametres_precond << finl;
908 }
909 }
910 }
911 is >> motlu;
912 }
913 break;
914 }
915 case 30:
916 {
917 pc="pcshell";
918 OWN_PTR(PCShell_base)& pcs = pc_user_.pc_shell;
919 is >> motlu;
920 pcs.typer(motlu);
921 is >> pcs.valeur();
922 break;
923 }
924 case 3:
925 {
926 pc="null";
927 break;
928 }
929 case 4:
930 {
931 is >> nb_it_max_;
932 convergence_with_nb_it_max_=1;
933 add_amgx_option("s:max_iters",Nom(nb_it_max_));
934 break;
935 }
936 case 15:
937 {
939 break;
940 }
941 case 5:
942 {
943 // on sauvegarde au format petsc
945 break;
946 }
947 case 26:
948 {
949 // on sauvegarde au format matrix market
951 break;
952 }
953 case 6:
954 {
956 {
957 Cerr << "factored_matrix option is not available for parallel calculation." << finl;
958 exit();
959 }
960 if (solveur_direct_!=petsc)
961 {
962 // Switch to PETSc Cholesky cause MUMPS or SUPERLU don't give access to LU ?
963 Cerr << "Only cholesky_lapack keyword may be used with the option factored_matrix." << finl;
964 exit();
965 }
966 is >> factored_matrix_;
967 break;
968 }
969 case 7:
970 {
971 set_read_matrix(true);
972 break;
973 }
974 case 8:
975 {
976 is >> controle_residu_;
977 break;
978 }
979 case 9:
980 {
981 is >> motlu; // On lit l'accolade
982 if (motlu != accolade_ouverte)
983 {
984 Cerr << "We expected " << accolade_ouverte << " instead of " << motlu << finl;
985 exit();
986 }
987 Cerr << "Reading of the Petsc commands:" << finl;
988 Nom valeur;
989 is >> motlu;
990 while (motlu!=accolade_fermee)
991 {
992 is >> valeur;
993 // "-option val" ou "-option" ?
994 // adding a test to support negative value
995 bool negative_value = is_number(valeur.getSuffix("-").getString());
996 if ((valeur.debute_par("-") && !negative_value) || valeur==accolade_fermee)
997 {
998 add_option(motlu.suffix("-"), "");
999 motlu = valeur;
1000 }
1001 else
1002 {
1003 add_option(motlu.suffix("-"), valeur);
1004 is >> motlu;
1005 }
1006 }
1007 if (limpr()>-1)
1008 fixer_limpr(1); // On imprime le residu si CLI
1009 // Pour faciliter le debugage:
1010 if (rang == 18) // Verbose
1011 {
1012
1013 add_option("ksp_view", "");
1014 add_option("options_view", "");
1015 add_option("options_left", "");
1016 }
1017 break;
1018 }
1019 case 10:
1020 {
1021 is >> motlu;
1022 // Si pas MUMPS on previent
1023 if (solveur_direct_!=mumps)
1024 {
1025 Cerr << "Ordering keyword for a solver is limited to Cholesky only." << finl;
1026 Process::exit();
1027 }
1028 Motcles mumps_ordering(6);
1029 {
1030 // NB: Il y'a d'autres ordering (voir doc MUMPS)
1031 mumps_ordering[0] = "amd";
1032 mumps_ordering[1] = "pt-scotch";
1033 mumps_ordering[2] = "parmetis";
1034 mumps_ordering[3] = "scotch";
1035 mumps_ordering[4] = "pord";
1036 mumps_ordering[5] = "metis";
1037 }
1038 int rang_mumps=mumps_ordering.search(motlu);
1039 // MUMPS fait un choix automatique par defaut (selon type et taille de la matrice, et nombre de processeurs) mais a savoir que:
1040 // Sur le cas Cx et PAR_Cx 4 cores, Scotch en sequentiel et PT-Scotch en parallele sont les meilleurs choix
1041 // Sur les gros cas, parfois Metis plus rapide pour A=LU et Scotch pour x=A-1B...
1042 if (rang_mumps==-1)
1043 {
1044 Cerr << motlu << " : unrecognized ordering from those available for the MUMPS solver Cholesky:" << finl;
1045 Cerr << mumps_ordering << finl;
1046 Process::exit();
1047 }
1048 else if (rang_mumps==1 || rang_mumps==2)
1049 {
1051 {
1052 Cerr << "You can't use the parallel ordering " << motlu << " during a sequential calculation." << finl;
1053 Process::exit();
1054 }
1055 add_option("mat_mumps_icntl_28","2"); // Parallel analysis
1056 add_option("mat_mumps_icntl_29",(Nom)rang_mumps); // Parallel ordering
1057 }
1058 else
1059 {
1060 add_option("mat_mumps_icntl_28","1"); // Sequential analysis
1061 add_option("mat_mumps_icntl_7",(Nom)rang_mumps); // Sequential ordering
1062 }
1063 break;
1064 }
1065 case 11:
1066 {
1067 is >> petsc_decide_;
1068 different_partition_ = petsc_decide_; // If Petsc decides the matrix partition, the partition is often different than the TRUST partition
1069 break;
1070 }
1071 case 12:
1072 {
1073 mataij_=1;
1074 break;
1075 }
1076 case 13:
1077 {
1078 is >> motlu;
1079 is >> petsc_nb_cpus_;
1080 different_partition_ = 1; // If user decides a different number of CPUs to solve PETSc matrix, the matrix partition will be different than the TRUST partition
1081 if (motlu=="first")
1082 petsc_cpus_selection_=1;
1083 else if (motlu=="every")
1084 petsc_cpus_selection_=2;
1085 else
1086 {
1087 Cerr << "We should read the option first or every after the keyword nb_cpus." << finl;
1088 Cerr << "Or we read: " << motlu << finl;
1089 exit();
1090 }
1091 if (petsc_nb_cpus_<1 || petsc_nb_cpus_>Process::nproc())
1092 {
1093 Cerr << "Incorrect number of CPUs selected for solving the PETSc matrix: " << petsc_nb_cpus_ << finl;
1094 exit();
1095 }
1096 break;
1097 }
1098 case 14:
1099 {
1100 is >> divtol_; // See http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/KSP/KSPSetTolerances.html
1101 break;
1102 }
1103 case 17:
1104 {
1105 KSPType type_ksp_method;
1106 KSPGetType(SolveurPetsc_, &type_ksp_method);
1107 if ((Nom)type_ksp_method != Nom("gmres") && ((Nom)type_ksp_method != Nom("fgmres")))
1108 {
1109 Cerr << "restart option is available only with [f]gmres methods" << finl;
1110 exit();
1111 }
1112 int restart_gmres;
1113 is >> restart_gmres;
1114 KSPGMRESSetRestart(SolveurPetsc_,restart_gmres);
1115 break;
1116 }
1117 case 19:
1118 {
1119 // Si pas MUMPS on previent
1120 if (solveur_direct_!=mumps)
1121 {
1122 Cerr << les_parametres_solveur[rang] << " keyword for a solver is limited to " << les_solveurs[14] << " only." << finl;
1123 Process::exit();
1124 }
1125 double dropping_parameter;
1126 is >> dropping_parameter;
1127 add_option("mat_mumps_cntl_7",dropping_parameter); // Dropping parameter
1128 break;
1129 }
1130 case 20:
1131 {
1132 if (solveur_direct_)
1133 {
1134 Cerr << "Definition of " << les_parametres_solveur[les_parametres_solveur.search(motlu)] << " is useless for a direct method." << finl;
1135 Cerr << "Suppress the keyword." << finl;
1136 exit();
1137 }
1138 is >> seuil_relatif_;
1139 convergence_with_seuil=1;
1140 add_amgx_option("s:convergence","RELATIVE_INI_CORE");
1141 add_amgx_option("s:tolerance",Nom(seuil_relatif_,"%e"));
1142 break;
1143 }
1144 case 22:
1145 int flag;
1146 is >> flag;
1147 ignore_new_nonzero_ = (bool)flag;
1148 break;
1149 case 23:
1150 is >> flag;
1151 rebuild_matrix_ = (bool)flag;
1152 break;
1153 case 24:
1154 is >> flag;
1155 allow_realloc_ = (bool)flag;
1156 break;
1157 case 25:
1158 is >> flag;
1159 mat_ignore_zero_entries_ = (bool)flag;
1160 break;
1161 case 27:
1164 break;
1165 case 28:
1166 reduce_ram_ = true;
1167 break;
1168 case 29:
1169 is >> flag;
1170 reorder_matrix_ = (bool)flag;
1171 break;
1172 default:
1173 {
1174 if (amgx_)
1175 {
1176 Cerr << "Reading option: " << motlu << finl;
1177 add_amgx_option(motlu);
1178 }
1179 else
1180 {
1181 Cerr << motlu << " : unrecognized option from those available in the Petsc solver:" << finl;
1182 Cerr << les_parametres_solveur << finl;
1183 Process::exit();
1184 }
1185 }
1186 }
1187 is >> motlu;
1188 }
1189 // Some checks
1190 if (petsc_decide_ && petsc_cpus_selection_)
1191 {
1192 Cerr << "You can't use petsc_decide and nb_cpus option together." << finl;
1193 exit();
1194 }
1195
1196 int pc_supported_on_gpu_by_petsc=0;
1197 int pc_supported_on_gpu_by_amgx=0;
1198 Motcles les_precond(18);
1199 {
1200 les_precond[0] = "NULL"; // Pas de preconditionnement
1201 les_precond[1] = "ILU"; // Incomplete LU
1202 les_precond[2] = "SSOR"; // Symetric Successive Over Relaxation
1203 les_precond[3] = "EISENSTAT"; // Symetric Successive Over Relaxation avec Eiseinstat trick
1204 les_precond[4] = "SPAI"; // Sparse Approximate Inverse
1205 les_precond[5] = "PILUT"; // Dual-threshold incomplete LU factorisation
1206 les_precond[6] = "DIAG|JACOBI"; // Diagonal (Jacobi) precondtioner
1207 les_precond[7] = "BOOMERAMG"; // Multigrid preconditioner
1208 les_precond[8] = "BLOCK_JACOBI_ICC"; // Block Jacobi ICC preconditioner (code dans PETSc, optimise)
1209 les_precond[9] = "BLOCK_JACOBI_ILU"; // Block Jacobi ILU preconditioner (code dans PETSc, optimise)
1210 les_precond[10] = "C-AMG"; // Classical AMG
1211 les_precond[11] = "SA-AMG"; // Smooth Aggregated AMG
1212 les_precond[12] = "GS"; // Gauss-Seidel
1213 les_precond[13] = "PCSHELL"; // user defined preconditionner
1214 les_precond[14] = "LU|MUMPS"; // MUMPS LU
1215 les_precond[15] = "UA-AMG"; // Unsmoothed Aggregated AMG
1216 les_precond[16] = "ILU_MUMPS"; // Incomplete ILU with a Block Low Ranking from MUMPS
1217 les_precond[17] = "ILU_STRUMPACK"; // Incomplete ILU with a Block Low Ranking from STRUMPACK
1218 }
1219
1220 if (pc!="")
1221 {
1222 // On empeche le choix d'un preconditionneur avec une methode directe
1223 // puisque celle ci EST le preconditionneur KSPREONLY
1224 if (solveur_direct_)
1225 {
1226 Cerr << "Using precond keyword with a direct method like Cholesky is useless" << finl;
1227 Cerr << "because for PETSc the LU factorization is used as a preconditioner." << finl;
1228 exit();
1229 }
1230 // Option du preconditionneur
1231 rang = les_precond.search(pc);
1232 switch(rang)
1233 {
1234 case 0:
1235 {
1236 PCSetType(PreconditionneurPetsc_, PCNONE);
1237 pc_supported_on_gpu_by_petsc=1;
1238 pc_supported_on_gpu_by_amgx=1;
1239 add_amgx_option("s:preconditioner(p)","NOSOLVER");
1240 check_not_defined(omega);
1241 check_not_defined(level);
1242 check_not_defined(epsilon);
1243 check_not_defined(ordering);
1244 break;
1245 }
1246 case 1:
1247 {
1248 //Cout << "See http://www.ncsa.uiuc.edu/UserInfo/Resources/Software/Math/HYPRE/docs-1.6.0/HYPRE_usr_manual/node33.html" << finl;
1249 //Cout << "to have some advices on the incomplete LU factorisation level: ILU(level)" << finl;
1250 // On n'attaque pas le ILU de Petsc qui n'est pas parallele
1251 // On prend celui de Hypre (Euclid=PILU(k)) en passant par les commandes en ligne
1252 // car peu de parametres peuvent etre fixes sinon
1253 //PCSetType(PreconditionneurPetsc_, PCHYPRE);
1254 //PCHYPRESetType(PreconditionneurPetsc_, "euclid");
1255 //add_option("pc_hypre_euclid_levels",(Nom)level.value());
1256 //check_not_defined(omega);
1257 //check_not_defined(epsilon);
1258 //check_not_defined(ordering);
1259 //
1260 // CHANGES in the PETSc 3.6 version: Removed -pc_hypre_type euclid due to bit-rot
1261 pc_supported_on_gpu_by_amgx=1;
1262 if (amgx_)
1263 add_amgx_option("s:preconditioner(p)","MULTICOLOR_DILU");
1264 else if (gpu_)
1265 {
1266 add_option("pc_type","ilu");
1267 add_option("pc_factor_mat_solver_type","cusparse");
1268 add_option("pc_factor_levels",(Nom)level.value());
1269 }
1270 else
1271 {
1272 Cerr << "Error: CHANGES in the PETSc 3.6 version: Removed -pc_hypre_type euclid due to bit-rot."
1273 << finl;
1274 Cerr << "So the ILU { level k } preconditioner no longer available. " << finl;
1275 Cerr << "Change your data file." << finl;
1276 Process::exit();
1277 }
1278 break;
1279 }
1280 case 2:
1281 {
1282 PCSetType(PreconditionneurPetsc_, PCSOR);
1283 if (amgx_) Process::exit("SSOR is not available on GPU, try GS (Gauss Seidel)");
1284 if (omega.value()>=1. && omega.value()<=2.)
1285 {
1286 PCSORSetOmega(PreconditionneurPetsc_, omega.value());
1287 }
1288 else
1289 {
1290 Cerr << "omega value for SSOR should be between 1 and 2" << finl;
1291 exit();
1292 }
1293 pc_supported_on_gpu_by_amgx=0;
1294 check_not_defined(level);
1295 check_not_defined(epsilon);
1296 check_not_defined(ordering);
1297 break;
1298 }
1299 case 3:
1300 {
1301 PCSetType(PreconditionneurPetsc_, PCEISENSTAT);
1302 if (omega.value()>=1. && omega.value()<=2.)
1303 {
1304 PCEisenstatSetOmega(PreconditionneurPetsc_, omega.value());
1305 }
1306 else
1307 {
1308 Cerr << "omega value for EISENSTAT should be between 1 and 2" << finl;
1309 exit();
1310 }
1311 check_not_defined(level);
1312 check_not_defined(epsilon);
1313 check_not_defined(ordering);
1314 break;
1315 }
1316 case 4:
1317 {
1318 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1319 PCHYPRESetType(PreconditionneurPetsc_, "parasails");
1320 add_option("pc_hypre_parasails_nlevels",(Nom)level.value()); // Higher values of level [>=0] leads to more accurate, but more expensive preconditioners (default 1)
1321 add_option("pc_hypre_parasails_thresh",(Nom)epsilon.value()); // Lower values of eps [0-1] leads to more accurate, but more expensive preconditioners (default 0.1)
1322 //add_option("pc_hypre_parasails_sym","SPD"); // Matrice symetrique definie positive. PL: comment cela a pu marcher avant ? La matrice n'est pas toujours symetrique.
1323 check_not_defined(omega);
1324 check_not_defined(ordering);
1325 KSPType type_ksp;
1326 KSPGetType(SolveurPetsc_, &type_ksp);
1327 Process::exit("SPAI preconditioner is not supported anymore.");
1328 break;
1329 }
1330 case 5:
1331 {
1332 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1333 PCHYPRESetType(PreconditionneurPetsc_, "pilut");
1334 add_option("pc_hypre_pilut_factorrowsize",(Nom)level.value()); // Maximum nonzeros retained in each row of L and U (default 20)
1335 add_option("pc_hypre_pilut_tol",(Nom)epsilon.value()); // Values below the value are dropped in L and U (default 1.e-7)
1336 check_not_defined(omega);
1337 check_not_defined(ordering);
1338 //
1339 // ERROR VALGRIND
1340 //Cerr << "Error VALGRIND with PETSc Dual Threashold Incomplete LU factorization." << finl;
1341 //Cerr << "So the PILUT { level k epsilon thresh } preconditioner no longer available. " << finl;
1342 //Cerr << "Change your data file." << finl;
1343 //Process::exit();
1344 break;
1345 }
1346 case 6:
1347 {
1348 PCSetType(PreconditionneurPetsc_, PCJACOBI);
1349 pc_supported_on_gpu_by_petsc=1;
1350 pc_supported_on_gpu_by_amgx=1;
1351 if (amgx_)
1352 {
1353 add_amgx_option("s:preconditioner(p)","BLOCK_JACOBI");
1354 Process::exit("Diagonal preconditioner on GPU with AmgX is slow to converge. Try GS (Gauss-Seidel) preconditioner.");
1355 }
1356 check_not_defined(omega);
1357 check_not_defined(level);
1358 check_not_defined(epsilon);
1359 check_not_defined(ordering);
1360 break;
1361 }
1362 case 9: // ilu
1363 case 8: // icc
1364 {
1365 if (rang==9) preconditionnement_non_symetrique_ = 1;
1366 pc_supported_on_gpu_by_amgx=1;
1367 pc_supported_on_gpu_by_petsc=1;
1368 if (amgx_)
1369 add_amgx_option("s:preconditioner(p)","MULTICOLOR_DILU"); // MULTICOLOR_ILU plante...
1370 else
1371 {
1372 add_option("sub_pc_type",rang==8 ? "icc" : "ilu");
1373 add_option("sub_pc_factor_levels",(Nom)level.value());
1374 // On fixe le precondtionnement non symetrique pour appliquer eventuellement un ordering autre que celui par defaut (natural)
1375 // Un ordering rcm peut ameliorer par exemple la convergence
1376 // Voir le remplissage de la matrice avec -mat_view_draw -draw_pause -1
1377 if (ordering.value()!="")
1378 {
1379 add_option("sub_pc_factor_mat_ordering_type",ordering.value());
1380 // Le preconditionnement natural (defaut) ne necessite pas une matrice de preconditionnement symetrique, les autres si:
1381 preconditionnement_non_symetrique_=1;
1382 }
1383
1384 }
1385 PCSetType(PreconditionneurPetsc_, PCBJACOBI);
1386 check_not_defined(omega);
1387 check_not_defined(epsilon);
1388 break;
1389 }
1390 case 14:
1391 {
1392 // LU|MUMPS { ordering XXX }
1393 //preconditionnement_non_symetrique_ = 1;
1394 add_option("sub_pc_type", "lu");
1395 add_option("sub_pc_factor_mat_solver_type","mumps");
1396 if(limpr()) add_option("mat_mumps_icntl_4","3");
1397 if (ordering.value()!="")
1398 add_option("sub_pc_factor_mat_ordering_type",ordering.value());
1399 PCSetType(PreconditionneurPetsc_, PCBJACOBI);
1400 check_not_defined(omega);
1401 check_not_defined(epsilon);
1402 break;
1403 }
1404 case 7:
1405 {
1406#ifdef HYPRE_USING_GPU
1407 // GPU build of Hypre provides only gpu preconditionner now. No runtime switch to CPU or GPU versions yet...
1408 gpu_ = 1;
1409#endif
1410 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1411 PCHYPRESetType(PreconditionneurPetsc_, "boomeramg"); // Classical C-AMG
1412 pc_supported_on_gpu_by_petsc=1;
1413 // Changement pc_hypre_boomeramg_relax_type_all pour PETSc 3.10, la matrice de
1414 // preconditionnement etant seqaij, symetric-SOR/jacobi (defaut) provoque KSP_DIVERGED_INDEFINITE_PC
1415 // Voir: https://lists.mcs.anl.gov/mailman/htdig/petsc-users/2012-December/015922.html
1416 if (!gpu_) add_option("pc_hypre_boomeramg_relax_type_all", "Jacobi");
1417 // Voir https://mooseframework.inl.gov/releases/moose/2021-05-18/application_development/hypre.html
1418 //if (dimension==3) Cerr << "Warning, on massive parallel calculation for best performance, consider playing with -pc_hypre_boomeramg_strong_threshold 0.7 or 0.8 or 0.9" << finl;
1419 if (dimension==3) add_option("pc_hypre_boomeramg_strong_threshold", "0.7");
1420 if (limpr()) add_option("pc_hypre_boomeramg_print_statistics","1");
1421 check_not_defined(omega);
1422 check_not_defined(level);
1423 check_not_defined(epsilon);
1424 check_not_defined(ordering);
1425 // Page 16: https://prace-ri.eu/wp-content/uploads/WP294-PETSc4FOAM-A-Library-to-plug-in-PETSc-into-the-OpenFOAM-Framework.pdf
1426 /*
1427 add_option("pc_hypre_boomeramg_max_iter","1");
1428 add_option("pc_hypre_boomeramg_strong_threshold","0.7");
1429 add_option("pc_hypre_boomeramg_grid_sweeps_up","1");
1430 add_option("pc_hypre_boomeramg_grid_sweeps_down","1");
1431 add_option("pc_hypre_boomeramg_agg_nl","2");
1432 add_option("pc_hypre_boomeramg_agg_num_paths","1");
1433 add_option("pc_hypre_boomeramg_max_levels","25");
1434 add_option("pc_hypre_boomeramg_coarsen_type","HMIS");
1435 add_option("pc_hypre_boomeramg_interp_type","ext+i");
1436 add_option("pc_hypre_boomeramg_P_max","2");
1437 add_option("pc_hypre_boomeramg_truncfactor","0.2");*/
1438 break;
1439 }
1440 case 10: // Classical AMG
1441 case 11: // Smooth Aggregated AMG
1442 case 15: // Unsmoothed Aggregated AMG
1443 {
1444 pc_supported_on_gpu_by_amgx=1;
1445 pc_supported_on_gpu_by_petsc=1;
1446 preconditionnement_non_symetrique_ = 1;
1447 if (amgx_)
1448 {
1449 add_amgx_option("s:preconditioner(p)","AMG");
1450 add_amgx_option("s:use_scalar_norm","1");
1451 add_amgx_option("p:error_scaling","0");
1452 add_amgx_option("p:print_grid_stats","1");
1453 add_amgx_option("p:max_iters","1");
1454 add_amgx_option("p:cycle","V");
1455 add_amgx_option("p:min_coarse_rows","2");
1456 add_amgx_option("p:max_levels","100");
1457 add_amgx_option("p:smoother(smoother)","BLOCK_JACOBI");
1458 add_amgx_option("p:presweeps","1");
1459 add_amgx_option("p:postsweeps","1");
1460 add_amgx_option("p:coarsest_sweeps","1");
1461 add_amgx_option("p:coarse_solver","DENSE_LU_SOLVER");
1462 add_amgx_option("p:dense_lu_num_rows","2");
1463 if (rang==10) // C-AMG
1464 {
1465 add_amgx_option("p:algorithm","CLASSICAL","Best choice if you have enough memory and fine tune carefully p:selector and p:strength parameters.");
1466 //add_amgx_option("p:selector","PMIS","PMIS selector seems to take less memory, and much faster setup than HMIS."); // Ne permet pas autre chose que p:strength_threshold=0.25 sur le maillage 220e6 de DomainFlowLES !
1467 add_amgx_option("p:selector","HMIS","PMIS may offer faster setup but weaker (some issues for a high number of GPUs) than serial implementation!");
1468 add_amgx_option("p:interpolator","D2","Also available D1. D2 is considerably more expensive during both setup and solve phases, but convergence on difficult problems");
1469 Nom strength("AHAT");
1470 add_amgx_option("p:strength",strength,"Choose the strength of connection metric to use. Allowable options are AHAT and ALL");
1471 //if (strength=="AHAT") add_amgx_option("p:strength_threshold","0.25","All edges with strength below this threshold will be discarded. Higher: faster setup, lower memory but lower convergence");
1472 // Change 0.25 to 0.8 as Boomeramg: in all cases, less memory and faster times globally
1473 if (strength=="AHAT") add_amgx_option("p:strength_threshold","0.8","All edges with strength below this threshold will be discarded. Higher: faster setup, lower memory but lower convergence. You may try 0.25 for better convergence if enough memory.");
1474 }
1475 else if (rang==11) // SA-AMG
1476 {
1477 add_amgx_option("p:algorithm","AGGREGATION");
1478 add_amgx_option("p:selector","SIZE_2");
1479 add_amgx_option("p:max_matching_iterations","100000");
1480 add_amgx_option("p:max_unassigned_percentage","0.0");
1481 }
1482 else
1483 {
1484 Process::exit("Not supported for AmgX");
1485 }
1486 add_amgx_option("smoother:relaxation_factor","0.8");
1487 }
1488 else
1489 {
1490 // ToDo : trouver des parametres pour PETSc afin d'avoir une comparaison possible CPU vs GPU (meme its par exemple):
1491 add_option("pc_type","gamg");
1492 // Ajout pour retrouver la convergence de PETSc 3.14:
1493 //add_option("mg_levels_pc_type","sor");
1494 //add_option("pc_gamg_threshold","0.");
1495 if (rang==10) // C-AMG
1496 {
1497 // Convergence fortement degradee 3.14 -> 3.20 malgre les options precedentes...
1498 add_option("pc_gamg_type","classical");
1499 }
1500 else if (rang==11) // SA-AMG
1501 {
1502 add_option("pc_gamg_type","agg");
1503 add_option("pc_gamg_agg_nsmooths","1");
1504 // Non performances catastrophiques:
1505 //add_option("pc_gamg_threshold","0.7"); // Fix in parallel: Computed maximum singular value as zero
1506 //add_option("pc_gamg_aggressive_square_graph","1");
1507 }
1508 else if (rang==15) // UA-AMG
1509 {
1510 add_option("pc_gamg_type","agg");
1511 add_option("pc_gamg_agg_nsmooths","0");
1512 //add_option("pc_gamg_aggressive_square_graph","1");
1513 }
1514 else
1515 {
1516 Process::exit("Usupported precond for PETSc.");
1517 }
1518 }
1519 break;
1520 }
1521 case 12:
1522 {
1523 if (!amgx_) Process::exit("GS (Gauss-Seidel) not available yet.");
1524 if (omega.value()<0 || omega.value()>2)
1525 {
1526 Cerr << "Relaxation value omega for GS should be between 0 and 2" << finl;
1527 exit();
1528 }
1529 pc_supported_on_gpu_by_amgx=1;
1530 if (amgx_)
1531 {
1532 add_amgx_option("s:preconditioner(p)","GS"); // Non documente...
1533 //add_amgx_option("s:preconditioner(p)","MULTICOLOR_GS"); // MULTICOLOR_GS lent dans AmgX ?
1534 add_amgx_option("p:relaxation_factor", Nom(omega.value())); // Defaut 0.9
1535 if (matrice_symetrique_) add_amgx_option("p:symmetric_GS","1");
1536 }
1537 check_not_defined(level);
1538 check_not_defined(epsilon);
1539 check_not_defined(ordering);
1540 break;
1541 }
1542 case 13:
1543 {
1544
1545 PCSetType(PreconditionneurPetsc_, PCSHELL);
1546 //PetscNew(&pc_user_);
1547
1548 auto PCShellUserApply = [](PC pc_apply, Vec x, Vec y)
1549 {
1550 PCstruct *pcstruct;
1551
1552 PCShellGetContext(pc_apply,(void**) &pcstruct);
1553 OWN_PTR(PCShell_base)& pcs=pcstruct->pc_shell;
1554 return pcs->computePC_(pc_apply,x,y);
1555 };
1556
1557 auto PCShellUserPreSolve = [](PC pc_apply, KSP ksp_apply, Vec x, Vec y)
1558 {
1559 PCstruct *pcstruct;
1560
1561 PCShellGetContext(pc_apply,(void**) &pcstruct);
1562 OWN_PTR(PCShell_base)& pcs=pcstruct->pc_shell;
1563 return pcs->preSolve_(pc_apply, ksp_apply, x, y);
1564 };
1565
1566 auto PCShellUserPostSolve = [](PC pc_apply, KSP ksp_apply, Vec x, Vec y)
1567 {
1568 PCstruct *pcstruct;
1569
1570 PCShellGetContext(pc_apply,(void**) &pcstruct);
1571 OWN_PTR(PCShell_base)& pcs=pcstruct->pc_shell;
1572 return pcs->postSolve_(pc_apply, ksp_apply, x, y);
1573 };
1574
1575 auto PCShellUserDestroy = [](PC pc_apply)
1576 {
1577 PCstruct *pcstruct;
1578
1579 PCShellGetContext(pc_apply,(void**) &pcstruct);
1580 OWN_PTR(PCShell_base)& pcs=pcstruct->pc_shell;
1581 return pcs->destroyPC_(pc_apply);
1582 };
1583
1584 PCShellSetApply(PreconditionneurPetsc_, PCShellUserApply);
1585 PCShellSetContext(PreconditionneurPetsc_, &pc_user_);
1586 PCShellSetDestroy(PreconditionneurPetsc_, PCShellUserDestroy);
1587 PCShellSetPreSolve(PreconditionneurPetsc_, PCShellUserPreSolve); //to apply action on operators before the kspsolve
1588 PCShellSetPostSolve(PreconditionneurPetsc_, PCShellUserPostSolve); //to apply action on operators after the kspsolve
1589 break;
1590 }
1591 case 16:
1592 {
1593 // ILU_MUMPS:
1594 add_option("pc_type", "cholesky"); // Attention, si on met LU en sequentiel il n'utilise pas MUMPS...
1595 add_option("pc_factor_mat_solver_type", "mumps");
1596 add_option("mat_mumps_icntl_35", "1"); // BLR enabled
1597 //add_option("mat_mumps_icntl_36", "??"); controls the choice of BLR factorization variant
1598 //add_option("mat_mumps_icntl_38", "??"); sets the estimated compression rate of LU factors with BLR
1599 add_option("mat_mumps_cntl_7", (double)epsilon.value()); // Droping parameter
1600 //if (limpr()) add_option("mat_mumps_icntl_4","3"); // verobose
1601 check_not_defined(level);
1602 break;
1603 }
1604 case 17:
1605 {
1606 // ILU_STRUMPACK:
1607 add_option("pc_type", "ilu");
1608 add_option("mat_type", "aij");
1609 add_option("pc_factor_mat_solver_type", "strumpack");
1610 add_option("mat_strumpack_compression","BLR"); // Type of rank-structured compression in sparse LU factors (choose one of) NONE HSS BLR HODLR BLR_HODLR ZFP_BLR_HODLR LOSSLESS LOSSY
1611 add_option("mat_strumpack_compression_rel_tol", (double)epsilon.value()); // Compression parameter
1612 //add_option("mat_strumpack_compression_abs_tol", (Nom)epsilon.value()); // Compression parameter
1613 //add_option("mat_strumpack_compression_lossy_precision,"1-64"); // Precision when using lossy compression [1-64],
1614 //if (limpr()) add_option("mat_strumpack_verbose", "1"); // Provisoire
1615 check_not_defined(level);
1616 break;
1617 }
1618 default:
1619 {
1620 Cerr << pc << " : preconditioner not officially recognized by TRUST among those possible for the moment:" << finl;
1621 Cerr << les_precond << finl;
1622 Cerr << "You can try to access directly to Petsc preconditioners with the command line." << finl;
1623 Cerr << "See the reference manual of Petsc to do this." << finl;
1624 Process::exit();
1625 }
1626 }
1627 }
1628 else
1629 {
1630 if (!solveur_direct_)
1631 {
1632 Cerr << "You forgot to define a preconditioner with the keyword precond." << finl;
1633 Cerr << "If you don't want a preconditioner, add for the solver definition:" << finl;
1634 Cerr << "precond null" << finl;
1635 Process::exit();
1636 }
1637 else
1638 {
1639 // Pour un solveur direct le preconditionner EST le solveur:
1640 pc_supported_on_gpu_by_petsc = solver_supported_on_gpu_by_petsc;
1641 pc_supported_on_gpu_by_amgx = solver_supported_on_gpu_by_amgx;
1642 }
1643 }
1644 // On verifie que les preconditionneurs sont supportes sur GPU:
1645 if (gpu_ && pc_supported_on_gpu_by_petsc==0)
1646 {
1647 Cerr << les_precond[rang] << " is not supported yet by PETSc on GPU." << finl;
1648 Process::exit();
1649 }
1650 if (amgx_ && pc_supported_on_gpu_by_amgx==0)
1651 {
1652 Cerr << les_precond[rang] << " is not supported yet by AmgX on GPU." << finl;
1653 Process::exit();
1654 }
1655 }
1656
1657 // On fixe des parametres du solveur et du preconditionneur selon que l'on ait un solveur direct ou iteratif
1658 // KSPSetInitialGuessNonzero : Resout Ax=B en supposant x nul ou non
1659 // KSPSetTolerances : Pour fixer les criteres de convergence du solveur iteratif
1660 if (solveur_direct_)
1661 {
1662 KSPSetInitialGuessNonzero(SolveurPetsc_, PETSC_FALSE);
1663 PCSetType(PreconditionneurPetsc_, PCLU);
1664 }
1665 else
1666 {
1667 KSPSetInitialGuessNonzero(SolveurPetsc_, PETSC_TRUE);
1668 if (convergence_with_nb_it_max_)
1669 {
1670 if (convergence_with_seuil)
1671 {
1672 Cerr << "You can only define solver convergence either by seuil or by nb_it_max." << finl;
1673 Cerr << "So suppress seuil keyword or nb_it_max keyword." << finl;
1674 exit();
1675 }
1676 // Convergence is defined with nb_it_max, the norm is checked after nb_it_max iterations:
1677 seuil_ = DMAXFLOAT;
1678 add_option("ksp_check_norm_iteration",(Nom)(nb_it_max_-1));
1679 nb_it_max_ = NB_IT_MAX_DEFINED;
1680 }
1681 // Convergence si residu(it) < MAX (seuil_relatif_ * residu(0), seuil_);
1682 if (seuil_==0 && seuil_relatif_==_RTOL_MIN_)
1683 {
1684 seuil_=1.e-12; // Si aucun seuil defini, on prend un seuil absolu de 1.e-12 (comme avant)
1685 }
1686 if (seuil_relatif_<_RTOL_MIN_)
1687 Process::exit("Fix rtol cause it is too low !");
1688 KSPSetTolerances(SolveurPetsc_, seuil_relatif_, seuil_, (divtol_==0 ? PETSC_DEFAULT : divtol_), nb_it_max_);
1689 }
1690 // Change le calcul du test de convergence relative (||Ax-b||/||Ax(0)-b|| au lieu de ||Ax-b||/||b||)
1691 // Peu utilisee dans TRUST car on utilise la convergence sur la norme absolue
1692 // Mais cela corrige une erreur KSP_DIVERGED_DTOL quand ||Ax-b||/||b||>10000=div_tol par defaut dans PETSc (rencontree sur Etude REV_4)
1693 //add_option(sys, "ksp_converged_use_initial_residual_norm",1); // Before PETSc 3.5
1694 KSPConvergedDefaultSetUIRNorm(SolveurPetsc_); // After PETSc 3.5, a function is available
1695
1696 // Surcharge eventuelle par la ligne de commande
1697 KSPSetOptionsPrefix(SolveurPetsc_, option_prefix_);
1698 KSPSetFromOptions(SolveurPetsc_);
1699 PCSetOptionsPrefix(PreconditionneurPetsc_, option_prefix_);
1700 PCSetFromOptions(PreconditionneurPetsc_);
1701
1702 // Setting the names:
1703 KSPType type_ksp;
1704 KSPGetType(SolveurPetsc_, &type_ksp);
1705 type_ksp_=(Nom)type_ksp;
1706
1707 PCType type_pc;
1708 PCGetType(PreconditionneurPetsc_, &type_pc);
1709 if (type_pc) type_pc_=(Nom)type_pc;
1710
1711 // Pas de version CPU de Hypre si PETSc active le support GPU:
1712#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1713 if (type_pc_=="hypre") gpu_ = true;
1714#endif
1715
1716 // Creation du fichier de config .amgx (NB: les objets PETSc sont crees mais ne seront pas utilises)
1718 {
1719 SFichier s(config());
1720 // Syntax: See https://github.com/NVIDIA/AMGX/raw/master/doc/AMGX_Reference.pdf
1721 s << "# AmgX config file" << finl << "config_version=2" << finl;
1722 add_amgx_option("s:print_config", limpr() ? "1" : "0");
1723 add_amgx_option("s:print_solve_stats", limpr() ? "1" : "0");
1724 add_amgx_option("s:obtain_timings", limpr() ? "1" : "0");
1725 add_amgx_option("s:store_res_history","1");
1726 add_amgx_option("s:monitor_residual","1");
1727 add_amgx_option("s:max_iters","10000"); // 100 par defaut trop bas...
1728 //if (Process::nproc()<=4)
1729 // add_amgx_option("determinism_flag","1", "15% slower but enabled for NR tests");
1730#ifdef MPIX_CUDA_AWARE_SUPPORT
1731 if (getenv("AMGX_USE_MPI_GPU_AWARE")) add_amgx_option("communicator","MPI_DIRECT","Enable GPU direct with MPI Cuda-Aware. No gain for the moment.");
1732#endif
1733 s << amgx_options_;
1734 Cerr << "Writing the AmgX config file: " << config() << finl;
1735 }
1736#else
1737 Cerr << "Error, the code is not built with PETSc support." << finl;
1738 Cerr << "Contact TRUST support." << finl;
1739 Process::exit();
1740#endif
1741
1742}
1744{
1745 Nom str(Objet_U::nom_du_cas());
1746#ifdef PETSCKSP_H
1747 str+=option_prefix_.prefix("_");
1748#else
1749 Process::exit("ToDo fix Solv_Petsc::config(): build config filename with numero_solve in the constructor!");
1750#endif
1751 str+=amgx_ ? ".amgx" : ".petsc";
1752 return str;
1753}
1754
1757#ifdef PETSCKSP_H
1758PetscLogStage Solv_Petsc::Create_Stage_=1;
1759PetscLogStage Solv_Petsc::KSPSolve_Stage_=2;
1760
1761// Sortie Maple d'une matrice morse
1762void sortie_maple(Sortie& s, const Matrice_Morse& M)
1763{
1764 s.precision(30);
1765 s<<"B:=matrix([";
1766 int M_nb_lignes=M.nb_lignes();
1767 int M_nb_colonnes=M.nb_colonnes();
1768 for (int i=0; i<M_nb_lignes; i++)
1769 {
1770 s<<"[";
1771 for ( int j=0 ; j<M_nb_colonnes; j++ )
1772 {
1773 s<<M(i,j);
1774 if (j!=(M_nb_colonnes-1)) s<<",";
1775 }
1776 s<<"]";
1777 if (i!=(M_nb_lignes-1)) s<<",";
1778 }
1779 s<<"]):"<<finl;
1780}
1781
1782// Save Matrix and RHS in a .petsc or .mtx (matrix market) file:
1783void Solv_Petsc::SaveObjectsToFile(const DoubleVect& secmem, DoubleVect& solution)
1784{
1785 Perf_counters::time_point start = statistics().start_clock();
1786 if (save_matrix()==2)
1787 {
1788 MatInfo Info;
1789 MatGetInfo(MatricePetsc_,MAT_GLOBAL_SUM,&Info);
1790 auto nnz = (trustIdType)Info.nz_allocated;
1791
1792 Nom filename("Matrix_");
1793 filename+=(Nom)nb_rows_tot_;
1794 filename+="_rows_";
1795 filename+=(Nom)nproc();
1796 filename+="_cpus.petsc";
1797
1798 Cerr << "Writing the global PETSc matrix (" << nb_rows_tot_<< " rows and " << nnz << " nnz) in the binary file " << filename << finl;
1799 if (Process::nproc()>32) Cerr << "It may take some minutes..." << finl;
1800 PetscViewer viewer;
1801 PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1802// HDF5 issue: the file is empty, don't know why
1803// So trying to use MPIIO to speedup the write/load...
1804//#ifdef PETSC_HAVE_HDF5
1805// PetscViewerSetType(viewer, PETSCVIEWERHDF5);
1806//#else
1807 PetscViewerSetType(viewer, PETSCVIEWERBINARY);
1808 PetscViewerBinarySetUseMPIIO(viewer, PETSC_TRUE);
1809//#endif
1810 PetscViewerFileSetMode(viewer, FILE_MODE_WRITE);
1811 PetscViewerFileSetName(viewer, filename);
1812 statistics().begin_count(STD_COUNTERS::backup_file,statistics().get_last_opened_counter_level()+1);
1813 auto bytes = 8 * nnz + 4 * nnz + 4 * nb_rows_tot_;
1814 MatView(MatricePetsc_, viewer);
1815 Cerr << "[IO] " << statistics().get_time_since_last_open(STD_COUNTERS::backup_file) << " s to write matrix file." << finl;
1816 statistics().end_count(STD_COUNTERS::backup_file, 1, static_cast<int>(bytes));
1817 // Save also the RHS if on the host:
1818 if (SecondMembrePetsc_!=nullptr)
1819 {
1820 Cerr << "Writing also the RHS in the file " << filename << finl;
1821 VecView(SecondMembrePetsc_, viewer);
1822 }
1823 else
1824 Process::exit("You can't export anymore matrix&RHS at PETSc format with AmgX solver. Switch to PETSc solver instead.");
1825 PetscViewerDestroy(&viewer);
1826
1827 // ASCII output for small matrix(debugging)
1828 if (nb_rows_tot_<20)
1829 {
1830 Cerr << "The global PETSc matrix is small so we also print it:" << finl;
1831 MatView(MatricePetsc_, PETSC_VIEWER_STDOUT_WORLD);
1832 }
1833 }
1834 else if (save_matrix()==3)
1835 {
1836 // Format matrix market ToDo : method
1837 if (Process::is_parallel()) Process::exit("Error, matrix market format is not available yet in parallel.");
1838 Nom filename(Objet_U::nom_du_cas());
1839 filename += "_matrix";
1840 filename += (Nom) instance;
1841 filename += ".mtx";
1842 SFichier mtx(filename);
1843 mtx.precision(14);
1844 mtx.setf(ios::scientific);
1845 PetscInt rows;
1846 const PetscInt *ia, *ja;
1847 PetscBool done;
1848 MatGetRowIJ(MatricePetsc_, 0, PETSC_FALSE, PETSC_FALSE, &rows, &ia, &ja, &done);
1849 if (!done) Process::exit("Error in MatGetRowIJ");
1850 PetscScalar *v;
1851 MatType mat_type;
1852 MatGetType(MatricePetsc_, &mat_type);
1853 Nom type;
1854 if (strcmp(mat_type, MATSEQAIJ) == 0)
1855 {
1856 type = "general";
1857 MatSeqAIJGetArray(MatricePetsc_, &v);
1858 }
1859 else if (strcmp(mat_type, MATSEQSBAIJ) == 0)
1860 {
1861 type = "symmetric";
1862 MatSeqSBAIJGetArray(MatricePetsc_, &v);
1863 }
1864 else Process::exit("Matrix type not supported.");
1865 mtx << "%%MatrixMarket matrix coordinate real " << type << finl;
1866 Cerr << "Matrix (" << (int)rows << " lines) written into file: " << filename << finl;
1867 mtx << "%%matrix" << finl;
1868 mtx << (int)rows << " " << (int)rows << " " << (int)ia[rows] << finl;
1869 for (int row=0; row<rows; row++)
1870 for (PetscInt j=ia[row]; j<ia[row+1]; j++)
1871 mtx << row+1 << " " << (int)ja[j]+1 << " " << v[j] << finl;
1872 // Sauve b au format matrix market
1873 filename = Objet_U::nom_du_cas();
1874 filename += "_rhs";
1875 filename += (Nom) instance;
1876 filename += ".mtx";
1877 SFichier rhs_mtx(filename);
1878 rhs_mtx.precision(14);
1879 rhs_mtx.setf(ios::scientific);
1880 rhs_mtx << "%%MatrixMarket matrix array real general" << finl;
1881 rhs_mtx << secmem.size_array() << " " << secmem.line_size() << finl;
1882 for (int row=0; row<rows; row++)
1883 rhs_mtx << secmem(row) << finl;
1884 // Provisoire: sauve un vector Petsc au format ASCII pour le RHS
1885 if (SecondMembrePetsc_!=nullptr)
1886 {
1887 PetscViewer viewer;
1888 Nom rhs_filename(Objet_U::nom_du_cas());
1889 rhs_filename += "_rhs";
1890 rhs_filename += (Nom) instance;
1891 rhs_filename += ".petsc";
1892 PetscViewerASCIIOpen(PETSC_COMM_WORLD,rhs_filename,&viewer);
1893 PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
1894 VecView(SecondMembrePetsc_, viewer);
1895 Cerr << "Save RHS into " << rhs_filename << finl;
1896 PetscViewerDestroy(&viewer);
1897 }
1898 }
1899 if (verbose) Cout << "[Petsc] Time to write matrix: \t" << statistics().compute_time(start) << finl;
1900}
1901
1902// Read a PETSc matrix in a file and
1903// returns the local number of rows
1904void Solv_Petsc::RestoreMatrixFromFile()
1905{
1906 Nom filename("Matrix_");
1907 filename+=(Nom)nb_rows_tot_;
1908 filename+="_rows_";
1909 filename+=(Nom)nproc();
1910 filename+="_cpus.petsc";
1911 add_option("viewer_binary_skip_info",""); // Skip reading .info file to avoid -vecload_block_size unused option
1912
1913 MatCreate(PETSC_COMM_WORLD,&MatricePetsc_);
1914 if (petsc_decide_)
1915 MatSetSizes(MatricePetsc_, PETSC_DECIDE, PETSC_DECIDE, nb_rows_tot_, nb_rows_tot_);
1916 else if (petsc_cpus_selection_)
1917 {
1918 Cerr << "Reading a PETSc matrix with a different number of CPUs is not implemented yet." << finl;
1919 Cerr << "Contact TRUST support." << finl;
1920 exit();
1921 }
1922 else
1923 MatSetSizes(MatricePetsc_, nb_rows_, nb_rows_, PETSC_DECIDE, PETSC_DECIDE);
1924
1925 Cerr << "Reading the global PETSc matrix in the binary file " << filename << finl;
1926 if (Process::nproc()>32) Cerr << "It may take some minutes..." << finl;
1927 PetscViewer viewer;
1928 PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1929//#ifdef PETSC_HAVE_HDF5
1930// PetscViewerSetType(viewer, PETSCVIEWERHDF5);
1931//#else
1932 PetscViewerSetType(viewer, PETSCVIEWERBINARY);
1933 PetscViewerBinarySetUseMPIIO(viewer, PETSC_TRUE);
1934//#endif
1935 PetscViewerFileSetMode(viewer, FILE_MODE_READ);
1936 PetscViewerFileSetName(viewer, filename);
1937 statistics().begin_count(STD_COUNTERS::backup_file,statistics().get_last_opened_counter_level()+1);
1938 MatLoad(MatricePetsc_, viewer);
1939 MatInfo Info;
1940 MatGetInfo(MatricePetsc_,MAT_GLOBAL_SUM,&Info);
1941 auto nnz = (trustIdType)Info.nz_allocated;
1942 auto bytes = 8 * nnz + 4 * nnz + 4 * nb_rows_tot_;
1943 Cerr << "[IO] " << statistics().get_time_since_last_open(STD_COUNTERS::backup_file) << " s to read matrix file." << finl;
1944 statistics().end_count(STD_COUNTERS::backup_file, 1, static_cast<int>(bytes));
1945
1946 PetscViewerDestroy(&viewer);
1948 {
1949 Cerr << "Reading a non symmetric PETSc matrix is not supported yet in TRUST." << finl;
1950 exit();
1951 }
1952 // Conversion AIJ to SBAIJ:
1953 MatSetOption(MatricePetsc_, MAT_SYMMETRIC, PETSC_TRUE);
1954#ifdef PETSC_HAVE_CUDA
1955 if (gpu_)
1956 MatConvert(MatricePetsc_, MATAIJCUSPARSE, MAT_INPLACE_MATRIX, &MatricePetsc_);
1957 else
1958#endif
1959#ifdef PETSC_HAVE_HIP
1960 if (gpu_)
1961 MatConvert(MatricePetsc_, MATAIJHIPSPARSE, MAT_INPLACE_MATRIX, &MatricePetsc_);
1962 else
1963#endif
1964 MatConvert(MatricePetsc_, MATSBAIJ, MAT_INPLACE_MATRIX, &MatricePetsc_);
1965
1966 PetscInt nb_rows_tot,nb_cols_tot;
1967 MatGetSize(MatricePetsc_,&nb_rows_tot,&nb_cols_tot);
1968 Cerr << "The matrix read has " << (int)nb_rows_tot << " rows." << finl;
1969 PetscInt nb_local_rows, nb_local_cols;
1970 MatGetLocalSize(MatricePetsc_, &nb_local_rows, &nb_local_cols);
1971 if (nb_local_rows != nb_items_to_keep_)
1972 {
1973 Cerr << "The matrix read has " << (int)nb_local_rows << " local columns whereas" << finl;
1974 Cerr << "the RHS/Solution vectors have a size of " << nb_items_to_keep_ << "." << finl;
1975 Cerr << "Check your data file or the file containing the PETSc matrix." << finl;
1976 exit();
1977 }
1978}
1979
1980// SV
1981// Since PETSc 3.10 The option ksp_view is not taken into account for (at least) instance = 2
1982// I don't understand why ??!!
1983// So I introduce this fix : if the option ksp_view is in PETSc Options then we call the KSPView function
1984bool Solv_Petsc::enable_ksp_view()
1985{
1986 Nom option="-ksp_view";
1987 Nom empty=" ";
1988 char *option_value = strdup( empty );
1989 PetscBool enable; // enable this option ?
1990 PetscOptionsGetString( PETSC_NULLPTR, PETSC_NULLPTR, option, option_value, empty.longueur( ), &enable );
1991 //Nom actual_value( option_value );
1992 free( option_value );
1993 return enable ;
1994}
1995
1996int Solv_Petsc::add_option(const Nom& astring, const double& value, int cli)
1997{
1998 char nom_value[80];
1999 snprintf(nom_value,80, "%e",value);
2000 return add_option(astring, (Nom)nom_value, cli);
2001}
2002
2003void Solv_Petsc::add_amgx_option(const Nom& key, const Nom& value, const std::string& comment)
2004{
2005 if (amgx_ && !amgx_options_.contient(key))
2006 {
2007 if (comment!="") amgx_options_+="# "+comment+":\n";
2008 amgx_options_+=key+"="+value+"\n";
2009 }
2010}
2011
2012void Solv_Petsc::add_amgx_option(const Nom& key_value)
2013{
2014 if (amgx_)
2015 {
2016 std::string key = key_value.getString().substr(0, key_value.getString().find('='));
2017 if (!amgx_options_.contient(key)) amgx_options_ += key_value + "\n";
2018 }
2019}
2020
2021bool Solv_Petsc::has_option(const Nom& option, Nom& current_value)
2022{
2023 PetscBool flg;
2024 Nom vide=" ";
2025 char* tmp=strdup(vide);
2026 PetscOptionsGetString(PETSC_NULLPTR,PETSC_NULLPTR,option,tmp,vide.longueur(),&flg);
2027 current_value = tmp;
2028 free(tmp);
2029 return current_value!=vide;
2030}
2031
2032int Solv_Petsc::add_option(const Nom& astring, const Nom& value, int cli)
2033{
2034 Nom option="-";
2035 // Ajout du prefix si l'option concerne KSP, PC, Mat ou Vec:
2036 if (astring.debute_par("ksp_") ||
2037 astring.debute_par("sub_ksp_") ||
2038 astring.debute_par("pc_") ||
2039 astring.debute_par("sub_pc_") ||
2040 astring.debute_par("mat_") ||
2041 astring.debute_par("vec_") ||
2042 astring.debute_par("mg_") ||
2043 cli)
2044 option+=option_prefix_;
2045
2046 option+=astring;
2047 // Attention il ne retourne pas de code d'erreur si l'option est mal orthographiee!!
2048 // Il ne dit pas non plus qu'elle est unused avec -options_left
2049 // Nouveau 1.6.3 pour la ligne de commande reste prioritaire, on ne change une option
2050 // que si elle n'a pas deja ete specifiee...
2051 Nom current_value;
2052 if (has_option(option, current_value))
2053 {
2054 if (limpr() >= 0) Cerr << "Option Petsc: " << option << " " << value << " not taken cause " << option << " already defined to " << current_value << finl;
2055 return 0;
2056 }
2057 else
2058 {
2059 if (value=="")
2060 {
2061 PetscOptionsSetValue(PETSC_NULLPTR, option, PETSC_NULLPTR);
2062 if (limpr() >= 0) Cerr << "Option Petsc: " << option << finl;
2063 }
2064 else
2065 {
2066 PetscOptionsSetValue(PETSC_NULLPTR, option, value);
2067 if (limpr() >= 0) Cerr << "Option Petsc: " << option << " " << value << finl;
2068 }
2069 return 1;
2070 }
2071}
2072
2073#ifndef PETSC_HAVE_HYPRE
2074// Pour que le code puisse compiler/tourner si on prend PETSc sans aucun autre package externe:
2075PetscErrorCode PCHYPRESetType(PC,const char[])
2076{
2077 Cerr << "HYPRE preconditioners are not available in this TRUST version." << finl;
2078 Cerr << "May be, HYPRE library has been deactivated during the PETSc build process." << finl;
2079 Process::exit();
2080 return 0;
2081}
2082#endif
2083
2084
2085
2086// Routine de monitoring appele par Petsc
2087PetscErrorCode MyKSPMonitor(KSP SolveurPetsc, PetscInt it, PetscReal residu, void *dummy)
2088{
2089 if (it==0)
2090 Cout << "Norm of the residue: " << residu << " (1)";
2091 else
2092 Cout << residu << " ";
2093 if ((it % 15) == 0) Cout << finl ;
2094 return 0;
2095}
2096#endif
2097
2098// Solve system
2099int Solv_Petsc::resoudre_systeme(const Matrice_Base& la_matrice, const DoubleVect& secmem, DoubleVect& solution)
2100{
2101
2102#ifdef PETSCKSP_H
2103 // Create solver now just before solve if not created:
2104 if (SolveurPetsc_==nullptr) create_solver();
2105 std::fenv_t fenv;
2106 std::feholdexcept(&fenv);
2107 // Si on utilise un solver petsc on le signale pour les stats finales
2108 statistics().begin_count(STD_COUNTERS::petsc_solver,statistics().get_last_opened_counter_level()+1);
2109 statistics().end_count(STD_COUNTERS::petsc_solver);
2110 Perf_counters::time_point start = statistics().start_clock();
2111 // Attention, bug apres PETSc 3.14 le logging avec PetscLogStage est tres cher pour MatSetValues (appel MPI meme en sequentiel!). Vu sur Flica5 avec appel frequents a Update_matrix
2112 bool log_Create_Stage = false; // ToDO mettre un test plus intelligent selon taille du cas ou si parallele ?
2113 if (log_Create_Stage) PetscLogStagePush(Create_Stage_);
2114 if (nouvelle_matrice())
2115 {
2116 // Changement de la taille de matrice, on detruit les objets dont la taille change:
2117 int hasChanged = mp_max((int)(secmem_sz_!=secmem.size_array()));
2118 if (MatricePetsc_!=nullptr && hasChanged != 0)
2119 {
2120 // Destruction de la matrice de preconditionnement:
2121 KSPSetOperators(SolveurPetsc_, MatricePetsc_, PETSC_NULLPTR);
2122 // Destruction des vecteurs
2123 VecDestroy(&SecondMembrePetsc_);
2124 SecondMembrePetsc_ = nullptr;
2125 VecDestroy(&SolutionPetsc_);
2126 SolutionPetsc_ = nullptr;
2127 if (LocalSolutionPetsc_!=nullptr)
2128 {
2129 VecDestroy(&LocalSolutionPetsc_);
2130 LocalSolutionPetsc_ = nullptr;
2131 VecScatterDestroy(&VecScatter_);
2132 }
2133 // Destruction matrice
2134 MatDestroy(&MatricePetsc_);
2135 MatricePetsc_ = nullptr;
2136 // Destruction DM
2137 if (dm_!=nullptr)
2138 DMDestroy(&dm_);
2139 }
2140
2141 matrice_symetrique_ = true; // On suppose que la matrice est symetrique
2142
2143 // Construction de la numerotation globale:
2144 if (MatricePetsc_==nullptr)
2145 construit_renum(secmem);
2146
2147 // Matrice morse intermedaire de conversion
2148 Matrice_Morse matrice_morse_intermediaire;
2149 if (read_matrix())
2150 {
2151 // Read the PETSc matrix
2152 RestoreMatrixFromFile();
2153 }
2154 else if (sub_type(Matrice_Petsc, la_matrice))
2155 {
2156 // Matrice deja au format Petsc
2157 MatricePetsc_ = ref_cast(Matrice_Petsc, la_matrice).getMat();
2158 set_read_matrix(true); // flag reutilise comme si on avait lu la matrice
2159 }
2160 else
2161 construit_matrice_morse_intermediaire(la_matrice, matrice_morse_intermediaire);
2162 if (verbose) Cout << "[Petsc] Time to convert matrix: \t" << statistics().compute_time(start) << finl;
2163
2164 // Verification stockage de la matrice
2165 check_aij(matrice_morse_intermediaire);
2166
2167 bool la_matrice_est_morse_non_symetrique =
2168 sub_type(Matrice_Morse, la_matrice) && !sub_type(Matrice_Morse_Sym, la_matrice);
2169 const Matrice_Morse& matrice_morse = la_matrice_est_morse_non_symetrique ? ref_cast(Matrice_Morse, la_matrice)
2170 : matrice_morse_intermediaire;
2171
2172 // Detect if the stencil state:
2173 if (MatricePetsc_ == nullptr || rebuild_matrix_ || read_matrix())
2174 nouveau_stencil_ = true;
2175 else
2176 nouveau_stencil_ = detect_new_stencil(matrice_morse);
2177
2178 // Build x and b if necessary
2179 Create_vectors(secmem);
2180 // Creation de Champs (fields) pour pouvoir utiliser des preconditionneurs PCFIELDSPLIT
2181 Create_DM(secmem);
2182
2183 // Construit ou update la matrice
2184 if (nouveau_stencil_)
2185 Create_objects(matrice_morse, secmem.line_size());
2186 else
2187 Update_matrix(MatricePetsc_, matrice_morse);
2188
2189 /* reglage de BlockSize avec le line_size() du second membre */
2190 if (limpr() == 1)
2191 {
2192 MatInfo info;
2193 MatGetInfo(MatricePetsc_, MAT_GLOBAL_SUM, &info);
2194 PetscInt nnz = (PetscInt)info.nz_used;
2195 Cout << "Order of the PETSc matrix : " << nb_rows_tot_ << " (~ "
2196 << (petsc_cpus_selection_ ? (int) (nb_rows_tot_ / petsc_nb_cpus_) : nb_rows_)
2197 << " unknowns per PETSc process ) " << (nouveau_stencil_ ? "New stencil." : "Same stencil.") << " nnz= " << nnz << finl;
2198 }
2199 }
2200 // Update PETSc Vec (vectors) for RHS and solution
2201 Update_vectors(secmem, solution);
2202
2203 // Save the matrix and the RHS if the matrix has changed...
2204 if (nouvelle_matrice() && save_matrix()) SaveObjectsToFile(secmem, solution);
2205 if (log_Create_Stage) PetscLogStagePop();
2206 //////////////////////////
2207 // Solve the linear system
2208 //////////////////////////
2209 int size_residu = nb_it_max_ + 1;
2210 //DoubleTrav residu(size_residu); // bad_alloc sur gros cas, curie pourquoi ?
2211 ArrOfDouble residu(size_residu);
2212 int nbiter = solve(residu);
2213 nb_it_previous_ = nbiter;
2214 if (limpr()>-1)
2215 {
2216 double residu_relatif=(residu[0]>0?residu[nbiter]/residu[0]:residu[nbiter]);
2217 Cout << finl << "Final residue: " << residu[nbiter] << " ( " << residu_relatif << " )"<<finl;
2218 }
2219 Update_solution(solution);
2220 solution.echange_espace_virtuel();
2222 // Calcul et verification du vrai residu sur matrice:
2223 bool check_residual = controle_residu_;
2224#ifndef NDEBUG
2225 if (amgx_ || gpu_)
2226 if (getenv("TRUST_CLOCK_ON")==nullptr)
2227 {
2228 Cerr << "Warning checking residual. D2H and H2D copies are possible..." << finl;
2229 check_residual = true; // En debug uniquement car verification faite sur CPU ce qui est dommage en prod...
2230 }
2231#endif
2232 if (check_residual && !MatricePetsc_ /* Matrice_Petsc::ajouter_multvect() not implemented bouh */)
2233 {
2234 DoubleVect test(secmem);
2235 test*=-1;
2236 la_matrice.ajouter_multvect(solution,test);
2237 double vrai_residu = mp_norme_vect(test);
2238 if (verbose) Cout << "||Ax-b||=" << vrai_residu << finl;
2239 // Verification de la solution sur la matrice initiale
2240 if (nbiter>0 && Process::je_suis_maitre())
2241 {
2242 double precision_machine=1.e-12;
2243 if (residu[0]>0 && residu[nbiter]/residu[0]>precision_machine && vrai_residu>10*residu[nbiter])
2244 {
2245 Cerr << "Error, computed solution x is false !" << finl;
2246 Cerr << "True residual (" << vrai_residu << ") is much higher than convergence residual (" << residu[nbiter] << ") !" << finl;
2247 Process::exit();
2248 }
2249 }
2250 }
2251 if (solution.isDataOnDevice() && !amgx_) mapToDevice(solution);
2252 std::fesetenv(&fenv);
2253 return nbiter;
2254#else
2255 return -1;
2256#endif
2257}
2258
2259#ifdef PETSCKSP_H
2260#include <signal.h>
2261
2262// Function to handle signals
2263void handleSignal(int signum)
2264{
2265 if (signum==8)
2266 {
2267 Cerr << "SIGFPE from PETSc KSPSolve() !" << finl;
2268 throw signum;
2269 }
2270 else if (signum==11)
2271 {
2272 Cerr << "SIGSEGV from PETSc KSPSolve() !" << finl;
2273 throw signum;
2274 }
2275 else
2276 {
2277 fprintf(stderr, "Signal %d received from PETSc. Exiting...\n", signum);
2278 Process::exit();
2279 }
2280}
2281// Function where signal handlers are set up
2282void setupSignalHandlers(bool on)
2283{
2284 // Configure the sigaction structure for SIGFPE
2285 struct sigaction sa_fpe;
2286 sa_fpe.sa_handler = on ? handleSignal : SIG_DFL;
2287 sigemptyset(&sa_fpe.sa_mask);
2288 sa_fpe.sa_flags = 0;
2289
2290 // Install the signal handler for SIGFPE
2291 if (sigaction(SIGFPE, &sa_fpe, nullptr) == -1)
2292 {
2293 fprintf(stderr, "Error installing signal handler for SIGFPE.\n");
2294 Process::exit(EXIT_FAILURE);
2295 }
2296
2297 // Configure the sigaction structure for SIGSEGV
2298 struct sigaction sa_segv;
2299 sa_segv.sa_handler = on ? handleSignal : SIG_DFL;
2300 sigemptyset(&sa_segv.sa_mask);
2301 sa_segv.sa_flags = 0;
2302
2303 // Install the signal handler for SIGSEGV
2304 if (sigaction(SIGSEGV, &sa_segv, nullptr) == -1)
2305 {
2306 fprintf(stderr, "Error installing signal handler for SIGSEGV.\n");
2307 Process::exit(EXIT_FAILURE);
2308 }
2309}
2310
2311int Solv_Petsc::solve(ArrOfDouble& residu)
2312{
2313 PetscLogStagePush(KSPSolve_Stage_);
2314 Perf_counters::time_point start = statistics().start_clock();
2315 // Affichage par MyKSPMonitor
2316 if (!solveur_direct_)
2317 {
2318 if (limpr() == 1)
2319 {
2320 KSPMonitorSet(SolveurPetsc_, MyKSPMonitor, PETSC_NULLPTR, PETSC_NULLPTR);
2321 }
2322 else
2323 KSPMonitorCancel(SolveurPetsc_);
2324 }
2325 // Historique du residu
2326 KSPSetResidualHistory(SolveurPetsc_, residu.addr(), residu.size_array(), PETSC_TRUE);
2327 // Ksp_view
2328 if (enable_ksp_view())
2329 KSPView(SolveurPetsc_, PETSC_VIEWER_STDOUT_WORLD);
2330 // Keep precond ?
2332 {
2333 //set_reuse_preconditioner(false); // Par defaut, precond est refait
2334 PetscBool flg;
2335 PetscOptionsHasName(PETSC_NULLPTR,option_prefix_,"-ksp_reuse_preconditioner",&flg);
2336 if (flg)
2339 {
2340 bool reuse_precond = (nb_it_previous_ <= reuse_preconditioner_nb_it_max_);
2341 if (!reuse_precond) Cout << "Matrix preconditioner is recomputed cause previous iterations number>" << reuse_preconditioner_nb_it_max_ << "..." << finl;
2342 set_reuse_preconditioner(reuse_precond ? true : false);
2343 }
2344 if (reuse_preconditioner()) Cout << "Matrix has changed but reusing previous preconditioner..." << finl;
2345 if (type_pc_ == "shell" && !reuse_preconditioner())
2346 {
2347 OWN_PTR(PCShell_base)& pcs=pc_user_.pc_shell;
2348 pcs->setUpPC_(PreconditionneurPetsc_, SolveurPetsc_, MatricePetsc_, SecondMembrePetsc_);
2349 }
2350 else
2351 KSPSetReusePreconditioner(SolveurPetsc_, (PetscBool) reuse_preconditioner()); // Default PETSC_FALSE
2352 }
2353 // Solve
2354 setupSignalHandlers(true);
2355 if (gpu_)
2356 statistics().begin_count(STD_COUNTERS::gpu_library,statistics().get_last_opened_counter_level()+1);
2357 KSPSolve(SolveurPetsc_, SecondMembrePetsc_, SolutionPetsc_);
2358 if (gpu_)
2359 statistics().end_count(STD_COUNTERS::gpu_library);
2360 setupSignalHandlers(false);
2361 // Analyse de la convergence par Petsc
2362 KSPConvergedReason Reason;
2363 KSPGetConvergedReason(SolveurPetsc_, &Reason);
2364 if (Reason==KSP_DIVERGED_ITS && (convergence_with_nb_it_max_ || ignore_nb_it_max_)) Reason = KSP_CONVERGED_ITS;
2365 if (Reason<0)
2366 {
2367 Cerr << "No convergence on the resolution with the Petsc solver." << finl;
2368 Cerr << "Reason given by Petsc: ";
2369 if (Reason==KSP_DIVERGED_NULL) Cerr << "KSP_DIVERGED_NULL" << finl;
2370 else if (Reason==KSP_DIVERGED_DTOL) Cerr << "KSP_DIVERGED_DTOL" << finl;
2371 else if (Reason==KSP_DIVERGED_BREAKDOWN) Cerr << "KSP_DIVERGED_BREAKDOWN" << finl;
2372 else if (Reason==KSP_DIVERGED_BREAKDOWN_BICG) Cerr << "KSP_DIVERGED_BREAKDOWN_BICG" << finl;
2373 else if (Reason==KSP_DIVERGED_NONSYMMETRIC) Cerr << "KSP_DIVERGED_NONSYMMETRIC" << finl;
2374 else if (Reason==KSP_DIVERGED_INDEFINITE_PC) Cerr << "KSP_DIVERGED_INDEFINITE_PC" << finl;
2375 else if (Reason==KSP_DIVERGED_NANORINF) Cerr << "KSP_DIVERGED_NANORINF" << finl;
2376 else if (Reason==KSP_DIVERGED_INDEFINITE_MAT) Cerr << "KSP_DIVERGED_INDEFINITE_MAT" << finl;
2377 else if (Reason==KSP_DIVERGED_PC_FAILED) Cerr << "KSP_DIVERGED_PC_FAILED" << finl;
2378 else if (Reason==KSP_DIVERGED_ITS)
2379 {
2380 Cerr << "KSP_DIVERGED_ITS" << finl;
2381 Cerr << "That means the solver didn't converge within the maximal iterations number." << finl;
2382 Cerr << "You can change the maximal number of iterations with the -ksp_max_it option." << finl;
2383#ifdef MPIX_CUDA_AWARE_SUPPORT
2384 // Probleme vu avec GPU direct si >= 4 GPUs et preconditinneurs C-AMG ou BOOMERAMG
2385 // OK pour SA-AMG et Jacobi
2386 // Il faudrait faire un reproducer a soumettre a PETSc...
2387 Cerr << "It seems there is a convergence issue (bug?) with MPI GPU Aware library with PETSc CG and some preconditioners." << finl;
2388 Cerr << "Try using BICGSTAB instead of GCP to bypass the issue." << finl;
2389 Process::exit();
2390#endif
2391 }
2392 else Cerr << (int)Reason << finl;
2393 throw Reason;
2394 }
2395 if (Reason<0 && !return_on_error_) exit();
2396 PetscInt nbiter=-1;
2397 KSPGetIterationNumber(SolveurPetsc_, &nbiter);
2398 int nbit = (int)nbiter; // always an int actually
2399 if (limpr()>-1)
2400 {
2401 // MyKSPMonitor ne marche pas pour certains solveurs (residu(0) n'est pas calcule):
2402 if (solveur_direct_ || type_ksp_ == KSPIBCGS)
2403 {
2404 // Calcul de residu(0)=||B||
2405 VecNorm(SecondMembrePetsc_, NORM_2, &residu[0]);
2406 // On l'affiche pour les solveurs directs (pour les autres TRUST s'en occupe):
2407 if (solveur_direct_) MyKSPMonitor(SolveurPetsc_, 0, residu[0], 0);
2408 }
2409 // Idem: l'historique du residu est mal evalue pour certains solveurs:
2410 // donc on le calcul a la derniere iteration:
2411 if (residu[0] > 0 && (solveur_direct_ || type_ksp_ == KSPIBCGS))
2412 {
2413 // Calcul de residu(nbiter)=||Ax-B||
2414 VecScale(SecondMembrePetsc_, -1);
2415 MatMultAdd(MatricePetsc_, SolutionPetsc_, SecondMembrePetsc_, SecondMembrePetsc_);
2416 VecNorm(SecondMembrePetsc_, NORM_2, &residu[nbit]);
2417 }
2418 }
2419 if (verbose) Cout << finl << "[Petsc] Time to solve system: \t" << statistics().compute_time(start) << finl;
2420 PetscLogStagePop();
2421 return Reason < 0 ? (int)Reason : nbit;
2422}
2423#endif
2424
2425#ifdef PETSCKSP_H
2426void Solv_Petsc::Update_vectors(const DoubleVect& secmem, DoubleVect& solution)
2427{
2428 // Assemblage du second membre et de la solution
2429 Perf_counters::time_point start = statistics().start_clock();
2430 bool DataOnDevice = solution.checkDataOnDevice(secmem);
2431 if (gpu_ && DataOnDevice && !isViennaCLVector()) // The 2 arrays are up to date on the device and the solver is a GPU one (fastest strategy)
2432 {
2433 // We update PETSc vectors with the arrays on device:
2435 if (isKokkosVector())
2436 {
2437#ifdef PETSC_HAVE_KOKKOS
2438 VecKokkosPlaceArray(SecondMembrePetsc_, addrOnDevice(rhs_));
2439 VecKokkosPlaceArray(SolutionPetsc_, addrOnDevice(lhs_));
2440#else
2441 Process::exit("PETSc not built with Kokkos-kernels!");
2442#endif
2443 }
2444 else
2445 {
2446#ifdef PETSC_HAVE_CUDA
2447 VecCUDAPlaceArray(SecondMembrePetsc_, addrOnDevice(rhs_));
2448 VecCUDAPlaceArray(SolutionPetsc_, addrOnDevice(lhs_));
2449#endif
2450#ifdef PETSC_HAVE_HIP
2451 VecHIPPlaceArray(SecondMembrePetsc_, addrOnDevice(rhs_));
2452 VecHIPPlaceArray(SolutionPetsc_, addrOnDevice(lhs_));
2453#endif
2454 }
2455 if (reorder_matrix_) Process::exit("reorder_matrix option is not supported yet on GPU");
2456 if (different_partition_) Process::exit("different_partition option is not supported yet on GPU");
2457 }
2458 else
2459 {
2460 // ToDo OpenMP afficher un warning pour dire d'utiliser un solveur GPU si solution est sur le GPU
2461 secmem.ensureDataOnHost();
2462 solution.ensureDataOnHost();
2463 PetscInt size=ix.size_array();
2464 if (gpu_)
2465 statistics().begin_count(STD_COUNTERS::gpu_copytodevice,statistics().get_last_opened_counter_level()+1);
2466 VecSetOption(SecondMembrePetsc_, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
2467 VecSetValues(SecondMembrePetsc_, size, ix.addr(), secmem.addr(), INSERT_VALUES);
2468 VecSetOption(SolutionPetsc_, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
2469 VecSetValues(SolutionPetsc_, size, ix.addr(), solution.addr(), INSERT_VALUES);
2470 VecAssemblyBegin(SecondMembrePetsc_);
2471 VecAssemblyEnd(SecondMembrePetsc_);
2472 VecAssemblyBegin(SolutionPetsc_);
2473 VecAssemblyEnd(SolutionPetsc_);
2474 if (gpu_)
2475 statistics().end_count(STD_COUNTERS::gpu_copytodevice);
2476 if (reorder_matrix_)
2477 {
2478 VecPermute(SecondMembrePetsc_, colperm, PETSC_FALSE);
2479 VecPermute(SolutionPetsc_, colperm, PETSC_FALSE);
2480 }
2481 }
2482 if (verbose) Cout << finl << "[Petsc] Time to update vectors: \t" << statistics().compute_time(start) << finl;
2483
2484 // VecView(SecondMembrePetsc_,PETSC_VIEWER_STDOUT_WORLD);
2485 // VecView(SolutionPetsc_,PETSC_VIEWER_STDOUT_WORLD);
2486}
2487
2488bool Solv_Petsc::isKokkosVector()
2489{
2490 VecType type;
2491 VecGetType(SecondMembrePetsc_, &type);
2492 return strcmp(type, VECSEQKOKKOS)==0 || strcmp(type, VECMPIKOKKOS)==0;
2493}
2494
2495bool Solv_Petsc::isViennaCLVector()
2496{
2497 VecType type;
2498 VecGetType(SecondMembrePetsc_, &type);
2499 return strcmp(type, VECSEQVIENNACL)==0 || strcmp(type, VECMPIVIENNACL)==0;
2500}
2501
2502void Solv_Petsc::Update_solution(DoubleVect& solution)
2503{
2504 // Recuperation de la solution
2505 Perf_counters::time_point start = statistics().start_clock();
2506 bool DataOnDevice = solution.checkDataOnDevice();
2507 if (gpu_ && DataOnDevice && !isViennaCLVector()) // solution is on the device to SolutionPetsc_ -> solution update without copy
2508 {
2510 if (isKokkosVector())
2511 {
2512#ifdef PETSC_HAVE_KOKKOS
2513 VecKokkosResetArray(SecondMembrePetsc_);
2514 VecKokkosResetArray(SolutionPetsc_);
2515#else
2516 Process::exit("PETSc not built with Kokkos-kernels!");
2517#endif
2518 }
2519 else
2520 {
2521#ifdef PETSC_HAVE_CUDA
2522 VecCUDAResetArray(SecondMembrePetsc_);
2523 VecCUDAResetArray(SolutionPetsc_);
2524#endif
2525#ifdef PETSC_HAVE_HIP
2526 VecHIPResetArray(SecondMembrePetsc_);
2527 VecHIPResetArray(SolutionPetsc_);
2528#endif
2529 }
2530 }
2531 else
2532 {
2533 int size=ix.size_array();
2534 if (reorder_matrix_)
2535 VecPermute(SolutionPetsc_, rowperm, PETSC_TRUE);
2536 // ToDo un seul VecGetValues comme VecSetValues
2537 if (different_partition_)
2538 {
2539 // TRUST and PETSc have different partitions, a local vector LocalSolutionPetsc_ is gathered from the global vector SolutionPetsc_ :
2540 VecScatterBegin(VecScatter_, SolutionPetsc_, LocalSolutionPetsc_, INSERT_VALUES, SCATTER_FORWARD);
2541 VecScatterEnd (VecScatter_, SolutionPetsc_, LocalSolutionPetsc_, INSERT_VALUES, SCATTER_FORWARD);
2542 // Use the local vector to get the solution:
2543 PetscInt colonne_locale=0;
2544 for (int i=0; i<size; i++)
2545 if (items_to_keep_[i])
2546 {
2547 VecGetValues(LocalSolutionPetsc_, 1, &colonne_locale, &solution(i));
2548 colonne_locale++;
2549 }
2550 assert(nb_rows_==colonne_locale);
2551 }
2552 else
2553 {
2554 // TRUST and PETSc has same partition, local solution can be accessed from the global vector:
2555 if (gpu_)
2556 statistics().begin_count(STD_COUNTERS::gpu_copyfromdevice,statistics().get_last_opened_counter_level()+1);
2557 VecGetValues(SolutionPetsc_, size, ix.addr(), solution.addr());
2558 if (gpu_)
2559 statistics().end_count(STD_COUNTERS::gpu_copyfromdevice);
2560 }
2561 }
2562 if (verbose) Cout << finl << "[Petsc] Time to update solution: \t" << statistics().compute_time(start) << finl;
2563}
2564
2565void Solv_Petsc::check_aij(const Matrice_Morse& matrice)
2566{
2567 /*******************/
2568 /* Setting mataij_ */
2569 /*******************/
2570 // Matrice non symetrique, on utilise le format aij et non sbaij:
2571 if (!matrice_symetrique_) mataij_=1;
2572
2573 // Matrice reordonee necessite le format aij
2574 if (reorder_matrix_) mataij_=1;
2575
2576 // Je n'arrive pas a faire marcher le stockage symetrique avec le preconditionneur PCEISENSTAT
2577 // qui est interessant car necessite 2 fois moins d'operations que le SSOR
2578 if (type_pc_==PCEISENSTAT) mataij_=1;
2579
2580 // Reading a Matrix with Hypre (ToDo test if mataij=1 for Hypre is not better, cause here 2 matrix seqsbaij and seqaij)
2581 if (read_matrix() && type_pc_==PCHYPRE) mataij_=1;
2582
2583 // Dans le cas de SUPERLU_DIST pour Cholesky, je n'arrive pas a faire marcher le stockage
2584 // symetrique donc l'utilisation de SUPERLU_DIST n'est pas encore optimale en RAM...
2585 if (solveur_direct_==superlu_dist) mataij_=1;
2586 // IDEM pour UMFPACK qui ne supporte que le format AIJ:
2587 if (solveur_direct_==umfpack) mataij_=1;
2588 // IDEM pour UMFPACK qui ne supporte que le format AIJ:
2589 if (solveur_direct_==strumpack) mataij_=1;
2590
2591 // Dans le cas GPU, seul le format AIJ est supporte pour le moment:
2592 if (gpu_ || amgx_) mataij_=1;
2593
2594#ifdef PETSC_HAVE_OPENMP
2595 // Dans le cas d'OpenMP, seul le format aij est multithreade:
2596 // PL (01/2021): plus vrai
2597 // mataij_=1;
2598#endif
2599
2600 // Dans le cas de save_matrix_ en parallele
2601 // Sinon, cela bloque avec sbaij:
2602 if (save_matrix()==1 && Process::is_parallel()) mataij_=1;
2603
2604 // Error in PETSc when read/save the factored matrix if matrix is sbaij
2605 // so aij is selected instead:
2606 if (factored_matrix_!="") mataij_=1;
2607
2608 if (!read_matrix())
2609 {
2610 // Ajout d'un test de verification de la symetrie supposee de la matrice PETSc
2611 // Ce test a permis de trouver un defaut de parallelisme sur le remplissage
2612 // de la matrice en pression lors de l'introduction de l'option volume etendu
2613 bool check_matrice_symetrique_ = matrice_symetrique_;
2614 // Check cancelled for:
2615#ifdef PETSC_HAVE_CUDA
2616 if (!amgx_) check_matrice_symetrique_=false; // Bug with CUDA ?
2617#endif
2618#ifdef NDEBUG
2619 check_matrice_symetrique_=false; // Not done in production
2620#endif
2621 if (mataij_ == 0)
2622 {
2623 /***************************************/
2624 /* Test de verification de la symetrie */
2625 /***************************************/
2626 if (check_matrice_symetrique_)
2627 {
2628 Mat MatricePetscComplete;
2629 // On construit une matrice PETSc complete sans hypothese sur la symetrie
2630 Create_MatricePetsc(MatricePetscComplete, 1, matrice);
2631 Mat MatricePetsc;
2632 Create_MatricePetsc(MatricePetsc, mataij_, matrice);
2633 PetscBool matrices_identiques;
2634 // On teste l'egalite des 2 matrices en faisant n produits matrice-vecteur
2635 int n = 10;
2636 MatMultAddEqual(MatricePetsc, MatricePetscComplete, n, &matrices_identiques);
2637 if (!matrices_identiques)
2638 {
2639 Cerr << "Error: matrix PETSc are different according to the symmetric storage or not." << finl;
2640 if (Process::is_parallel()) Cerr << "Check if the matrix is correct in parallel." << finl;
2641 Cerr << "Contact TRUST support team." << finl;
2642 if (nb_rows_ < 10)
2643 {
2644 MatView(MatricePetsc, PETSC_VIEWER_STDOUT_WORLD);
2645 MatView(MatricePetscComplete, PETSC_VIEWER_STDOUT_WORLD);
2646 exit();
2647 }
2648 }
2649 MatDestroy(&MatricePetsc);
2650 MatDestroy(&MatricePetscComplete);
2651 }
2652 }
2653 }
2654}
2655// Creation des objets PETSc
2656void Solv_Petsc::Create_objects(const Matrice_Morse& mat, int blocksize)
2657{
2658 // Remplissage d'une matrice de preconditionnement non symetrique
2659 Mat MatricePrecondionnementPetsc;
2660 /* Semble plus vrai pour spai dans Petsc 3.10.0:
2661 if (matrice_symetrique_ && (type_pc_=="hypre" || type_pc_=="spai")) */
2662 if (matrice_symetrique_ && type_pc_ == "hypre")
2663 preconditionnement_non_symetrique_ = 1;
2664 if (mataij_==1) preconditionnement_non_symetrique_ = 0;
2665
2666
2667
2668 if (preconditionnement_non_symetrique_)
2669 Create_MatricePetsc(MatricePrecondionnementPetsc, 1, mat);
2670
2671 // Creation de la matrice Petsc si necessaire
2672 if (!read_matrix())
2673 {
2674 if (MatricePetsc_!=nullptr) MatDestroy(&MatricePetsc_);
2675 Create_MatricePetsc(MatricePetsc_, mataij_, mat);
2676 }
2677 MatSetBlockSize(MatricePetsc_, blocksize);
2678 /* Seems petsc_decide=1 have no interest. On PETSC_GCP with n=2 (20000cell/n), the ratio is 99%-101% and petsc_decide is slower
2679 Even with n=9, ratio is 97%-103%, and petsc_decide is slower by 10%. Better load balance but increased MPI cost and lower convergence...
2680 Hope it will be better with GPU
2681 if (!petsc_decide_)
2682 {
2683 // Try to detect the possible gain with petsc_decide_
2684 int min_nb_rows = mp_min(nb_rows_);
2685 int max_nb_rows = mp_max(nb_rows_);
2686 int new_nb_rows = PETSC_DECIDE;
2687 PetscSplitOwnership(PETSC_COMM_WORLD, &new_nb_rows, &nb_rows_tot_);
2688 int min_new_nb_rows = mp_min(new_nb_rows);
2689 int max_new_nb_rows = mp_max(new_nb_rows);
2690 Cerr << min_nb_rows << " " << max_nb_rows << " " << min_new_nb_rows << " " << max_new_nb_rows << finl;
2691 }
2692 */
2693 /*****************************************************************************/
2694 /* Changement du preconditionneur pour profiter de la symetrie de la matrice */
2695 /*****************************************************************************/
2697 {
2698 MatSetOption(MatricePetsc_, MAT_SYMMETRIC, PETSC_TRUE); // ToDo: ajout option spd pour MAT_SPD ?
2699 if (type_pc_ == PCLU)
2700 {
2701 // PCCHOLESKY is only supported for sbaij format or since PETSc 3.9.2, SUPERLU, CHOLMOD
2702 if (mataij_ == 0 || solveur_direct_ == superlu_dist || solveur_direct_ == cholmod)
2703 PCSetType(PreconditionneurPetsc_, PCCHOLESKY); // Precond PCLU -> PCCHOLESKY
2704 }
2705 else if (type_pc_ == PCSOR)
2706 PCSORSetSymmetric(PreconditionneurPetsc_, SOR_LOCAL_SYMMETRIC_SWEEP); // Precond SOR -> SSOR
2707 }
2708
2709 /*******************************************/
2710 /* Choix du package pour le solveur direct */
2711 /*******************************************/
2712 static int message_affi = limpr() >= 0;
2713 if (solveur_direct_ == mumps)
2714 {
2715 // Message pour prevenir
2716 if (message_affi)
2717 {
2718 Cout << "The LU decomposition of a matrix with ";
2719 Cout << "Cholesky from MUMPS may take several minutes, please wait..." << finl;
2720 Cout
2721 << "If the decomposition fails/crashes cause a lack of memory, then increase the number of CPUs for your calculation"
2722 << finl;
2723 Cout
2724 << "or add reduce_ram option (syntax: cholesky { reduce_ram }) to suppress preventive memory increase (INCTL(14))"
2725 << finl;
2726 Cout
2727 << "or use Cholesky_out_of_core keyword to write the decomposition on the disk, thus saving memory but with an extra CPU cost during solve."
2728 << finl;
2729 Cout
2730 << "To see the RAM required by the decomposition in the .out file, add impr option to the solver: petsc cholesky { impr }"
2731 << finl;
2732 Cout
2733 << "If an error INFOG(1)=-8|-9|-11|-17|-20 is returned, you can try to increase the ICNTL(14) parameter of MUMPS by using the -mat_mumps_icntl_14 command line option."
2734 << finl;
2735 message_affi = 0;
2736 }
2737 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERMUMPS);
2738 if (!reduce_ram_)
2739 {
2740 // Securite pour MUMPS avec augmentation de la memoire:
2741 // Par defaut le cas PAR_Canal_incline_VEF plante sur 4 processeurs si -mat_mumps_icntl_14 inferieur a 35...
2742 // On revient a 75 car parfois VEF_258 plante... C'est pas clair au niveau memoire...
2743 // Passage a Petsc 3.3 necessite d'augmenter a plus de 75 car sinon Aero_192 crashe...
2744 // A 90, le cas les_Re180Pr071_T0Q_jdd2 plante sur forchat (32bits)
2745 // On differencie sequentiel (peu de memoire, mais estimation juste)
2746 // et le calcul parallele (voir peut etre une separation entre plus et moins de 16 processeurs...)
2747 // Peut etre equiper le script trust d'une detection des erreurs INFO(1)=-9 ...
2748 // On passe de 35 a 40 pour faire passer le cas cavite_entrainee_2D_jdd2 (suite passage a MUMPS 5.2.0)
2750 add_option("mat_mumps_icntl_14", "40");
2751 else
2752 add_option("mat_mumps_icntl_14", "90");
2753 }
2754 }
2755 else if (solveur_direct_ == superlu_dist)
2756 {
2757 if (message_affi)
2758 Cout << "Cholesky from SUPERLU_DIST may take several minutes, please wait..." << finl;
2759 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERSUPERLU_DIST);
2760 }
2761 else if (solveur_direct_ == petsc)
2762 {
2763 if (message_affi)
2764 Cout << "Cholesky from PETSc may take several minutes, please wait...";
2765 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERPETSC);
2766 }
2767 else if (solveur_direct_ == umfpack)
2768 {
2769 if (message_affi)
2770 Cout << "Cholesky from UMFPACK may take several minutes, please wait...";
2771 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERUMFPACK);
2772 }
2773 else if (solveur_direct_ == pastix)
2774 {
2775 if (message_affi)
2776 Cout << "Cholesky from Pastix may take several minutes, please wait...";
2777 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERPASTIX);
2778 }
2779 else if (solveur_direct_ == cholmod)
2780 {
2781 if (message_affi)
2782 Cout << "Cholesky from Cholmod may take several minutes, please wait...";
2783 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERCHOLMOD);
2784 }
2785 else if (solveur_direct_ == strumpack)
2786 {
2787 if (message_affi)
2788 Cout << "Cholesky from Strumpack may take several minutes, please wait...";
2789 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERSTRUMPACK);
2790 }
2791 else if (solveur_direct_ == cli)
2792 {
2793 if (message_affi)
2794 Cout << "LU factorization may take several minutes, please wait...";
2795 }
2796 else if (solveur_direct_)
2797 {
2798 Cerr << "PCFactorSetMatSolverType not called for direct solver, solveur_direct_=" << solveur_direct_ << finl;
2799 Cerr << "Contact TRUST support." << finl;
2800 exit();
2801 }
2802
2803 if ((solveur_direct_ > mumps) && (message_affi))
2804 {
2805 Cout << " OK " << finl;
2806 message_affi = 0;
2807 }
2808
2809 /****************************************/
2810 /* Association de la matrice au solveur */
2811 /****************************************/
2812 if (preconditionnement_non_symetrique_)
2813 {
2814 KSPSetOperators(SolveurPetsc_, MatricePetsc_, MatricePrecondionnementPetsc);
2815 MatDestroy(&MatricePrecondionnementPetsc);
2816 }
2817 else
2818 {
2819 KSPSetOperators(SolveurPetsc_, MatricePetsc_, MatricePetsc_);
2820 }
2821 /************************************/
2822 /* Factored matrix if direct solver */
2823 /************************************/
2824 if (solveur_direct_)
2825 {
2826 // Syntax: factored_matrix save|read|disk
2827 // disk means read the factored_matrix or compute it if not found then save it to disk
2828 Nom filename(Objet_U::nom_du_cas());
2829 filename += "_Factored_Matrix.petsc";
2830 if (factored_matrix_ == "read" || factored_matrix_ == "disk")
2831 {
2832 int filename_found = 0;
2833 // Advice: stat file from master and broadcast
2835 {
2836 // struct stat f {}; pb sur gcc 4.8.5
2837 struct stat f;
2838 if (!stat(filename, &f)) filename_found = 1;
2839 }
2840 envoyer_broadcast(filename_found, 0);
2841 if (filename_found)
2842 {
2843 // File found, we read it:
2844 factored_matrix_ = "read";
2845 }
2846 else
2847 {
2848 // File not found, two cases:
2849 Cerr << "File " << filename << " not found";
2850 if (factored_matrix_ == "read")
2851 {
2852 Cerr << "!" << finl;
2853 exit();
2854 }
2855 else
2856 {
2857 Cerr << "." << finl;
2858 Cerr << "So we will compute the factored matrix and we will save it to disk." << finl;
2859 factored_matrix_ = "save";
2860 }
2861 }
2862 }
2863 if (factored_matrix_ == "read")
2864 {
2865 PetscViewer viewer;
2866 PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &viewer);
2867 PCFactorSetUpMatSolverType(PreconditionneurPetsc_);
2868 Mat FactoredMatrix;
2869 PCFactorGetMatrix(PreconditionneurPetsc_, &FactoredMatrix);
2870 // MatCreate(PETSC_COMM_WORLD,&FactoredMatrix);
2871 // MatSetSizes(FactoredMatrix, nb_rows_, nb_rows_, PETSC_DECIDE, PETSC_DECIDE);
2872 // MatSetType(FactoredMatrix,MATSEQAIJ);
2873 Cerr << "Reading the factored matrix in the file " << filename << finl;
2874 MatLoad(FactoredMatrix, viewer);
2875 PetscViewerDestroy(&viewer);
2876 }
2877 else if (factored_matrix_ == "save")
2878 {
2879 Mat FactoredMatrix;
2880 // Compute the factored matrix:
2881 PCFactorSetUpMatSolverType(PreconditionneurPetsc_);
2882 PCSetUp(PreconditionneurPetsc_);
2883 // Get the factored matrix:
2884 PCFactorGetMatrix(PreconditionneurPetsc_, &FactoredMatrix);
2885 //NO MatConvert(FactoredMatrix,MATSEQDENSE,MAT_INITIAL_MATRIX,&FactoredMatrix);
2886 // MatScalar *a;
2887 // MatDenseGetArray(FactoredMatrix,&a);
2888 if (nb_rows_ < 20)
2889 {
2890 Cerr << "A=" << finl;
2891 MatView(MatricePetsc_, PETSC_VIEWER_STDOUT_WORLD);
2892 Cerr << "LU=" << finl;
2893 MatView(FactoredMatrix, PETSC_VIEWER_STDOUT_WORLD);
2894 }
2895 // Save to disk:
2896 Cerr << "Writing the factored matrix in the file " << filename << finl;
2897 Cerr << "Not implemented yet." << finl;
2898 exit();
2899 PetscViewer viewer;
2900 PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_WRITE, &viewer);
2901 MatView(FactoredMatrix, viewer);
2902 PetscViewerDestroy(&viewer);
2903
2904 // PetscViewer viewer2;
2905 // PetscViewerBinaryOpen(PETSC_COMM_WORLD,filename,FILE_MODE_READ,&viewer2);
2906 // MatLoad(FactoredMatrix, viewer2);
2907 // PetscViewerDestroy(&viewer2);
2908 }
2909 else if (factored_matrix_ != "")
2910 {
2911 Cerr << "Unknown option for factored_matrix option: " << factored_matrix_ << finl;
2912 Cerr << "Options are: read|save|disk" << finl;
2913 exit();
2914 }
2915 }
2916
2917 /*************************************/
2918 /* Mise en place du preconditionneur */
2919 /*************************************/
2920 Perf_counters::time_point start = statistics().start_clock();
2921 KSPSetUp(SolveurPetsc_);
2922 PCSetUpOnBlocks(PreconditionneurPetsc_); // Sets up the preconditioner for each block in the block Jacobi, overlapping Schwarz, and fieldsplit methods.
2923 // IF gamg preconditioner, print grid stats has there is no specific option:
2924 if (limpr()>0 && chaine_lue_.contient("gamg_")) KSPView(SolveurPetsc_, PETSC_VIEWER_STDOUT_WORLD);
2925 if (verbose) Cout << "[Petsc] Time to setup solver: \t" << statistics().compute_time(start) << finl;
2926}
2927
2928void Solv_Petsc::Create_vectors(const DoubleVect& b)
2929{
2930 if (SecondMembrePetsc_!=nullptr) return; // Deja construit
2931
2932 if (gpu_)
2933 {
2934 // For GPU solvers, allocate 2 arrays on device to avoid 2 H2D and 1 D2H copy later during each solve.
2936 }
2937 // Build x
2938 VecCreate(PETSC_COMM_WORLD,&SecondMembrePetsc_);
2939 // Set sizes:
2940 if (petsc_decide_)
2941 VecSetSizes(SecondMembrePetsc_, PETSC_DECIDE, nb_rows_tot_);
2942 else if (petsc_cpus_selection_)
2943 {
2944 PetscInt nb_rows_petsc = compute_nb_rows_petsc(nb_rows_tot_);
2945 VecSetSizes(SecondMembrePetsc_, nb_rows_petsc, PETSC_DECIDE);
2946 }
2947 else
2948 VecSetSizes(SecondMembrePetsc_, nb_rows_, PETSC_DECIDE);
2949
2950 // Set type:
2951 VecType vtype = VECSTANDARD;
2952#ifdef TRUST_USE_GPU
2953 if (gpu_)
2954 {
2955 if (getenv("PETSC_USE_KOKKOS")!=nullptr)
2956 vtype = VECKOKKOS;
2957 else
2958#ifdef PETSC_HAVE_CUDA
2959 vtype = VECCUDA;
2960#endif
2961#ifdef PETSC_HAVE_HIP
2962 vtype = VECHIP;
2963#endif
2964 }
2965#endif
2966 VecSetType(SecondMembrePetsc_, vtype);
2967 VecSetOptionsPrefix(SecondMembrePetsc_, option_prefix_);
2968 VecSetFromOptions(SecondMembrePetsc_);
2969 // Build b
2970 VecDuplicate(SecondMembrePetsc_,&SolutionPetsc_);
2971 // Initialize x to avoid a crash on GPU later with VecSetValues... (bug PETSc?)
2972 // if (gpu_) VecSet(SolutionPetsc_,0.0);
2973
2974 // Only in the case where TRUST and PETSc partitions are not the same
2975 // VecGetValues can only get values on the same processor, so need to gather values from
2976 // global vector SolutionPetsc_ to a local vector LocalSolutionPetsc_ before using VecGetValues
2977 // It will add an extra MPI cost with this operation.
2978 if (different_partition_)
2979 {
2980 // Create the local vector of length nb_rows_:
2981 VecCreateSeq(PETSC_COMM_SELF, nb_rows_, &LocalSolutionPetsc_);
2982 // Create the Scatter context to gather from the global solution to the local solution
2983 ArrOfPetscInt from(nb_rows_);
2984 for (int i=0; i<nb_rows_; i++)
2985 from[i]=decalage_local_global_+i; // Global indices in SolutionPetsc_
2986 IS fromis;
2987 ISCreateGeneral(PETSC_COMM_WORLD, from.size_array(), from.addr(), PETSC_COPY_VALUES, &fromis);
2988 VecScatterCreate(SolutionPetsc_, fromis, LocalSolutionPetsc_, PETSC_NULLPTR, &VecScatter_);
2989 ISDestroy(&fromis);
2990 // Will permit later with VecScatterBegin/VecScatterEnd something like:
2991 // LocalSolutionPetsc_[tois[i]]=SolutionPetsc_[fromis[i]]
2992 }
2993}
2994
2995void Solv_Petsc::Create_DM(const DoubleVect& b)
2996{
2997 if (dm_!=nullptr) return; // Deja construit
2998 /* creation de champs Petsc si des MD_Vector_Composite sont trouves dans b, avec recursion! */
2999 if (sub_type(MD_Vector_composite, b.get_md_vector().valeur()))
3000 {
3001 std::map<std::string, std::vector<PetscInt>> champ;
3002 //liste (MD_Vector_composite, offset de ses elements, multiplicateur (nb d'items du tableau par item du MD_Vector) prefixe des noms de ses champs)
3003 std::vector<std::tuple<const MD_Vector_composite *, int, int, std::string>>
3004 mdc_list =
3005 {
3006 std::make_tuple(&ref_cast(MD_Vector_composite, b.get_md_vector().valeur()), 0, b.line_size(),
3007 std::string("b"))
3008 };
3009 while (mdc_list.size()) //remplissage recursif de champs_ -> (nom du champ, indices)
3010 {
3011 const MD_Vector_composite& mdc = *std::get<0>(mdc_list.back());
3012 int idx = std::get<1>(mdc_list.back()), mult = std::get<2>(mdc_list.back()), un = 1;
3013 std::string prefix = std::get<3>(mdc_list.back());
3014 mdc_list.pop_back();
3015 for (int i = 0; i < mdc.nb_parts(); i++)
3016 {
3017 const MD_Vector_base& mdb = mdc.get_desc_part(i).valeur();
3018 int mult2 = mult * std::max(mdc.get_shape(i), un), nb_seq = mdb.nb_items_seq_local() * mult2;
3019 if (sub_type(MD_Vector_composite, mdb)) //un autre MD_Vector_Composite! on le met dans la liste
3020 mdc_list.push_back(std::make_tuple(&ref_cast(MD_Vector_composite, mdb), idx, mult2,
3021 prefix + std::to_string((long long) i)));
3022 else
3023 {
3024 std::vector<PetscInt> indices;
3025 for (int j = 0; j < nb_seq; j++) indices.push_back(decalage_local_global_ + idx + j);
3026 if (mdc.get_name(i)!="")
3027 champ[mdc.get_name(i).getString()] = indices;
3028 else
3029 champ[prefix + std::to_string((long long) i)] = indices;
3030 }
3031 idx += nb_seq; //mise a jour du decalage (idx)
3032 }
3033 }
3034
3035 /* PetscSection : indique a quel champ appartient chaque variable */
3036 PetscSection sec;
3037 PetscSectionCreate(PETSC_COMM_WORLD, &sec);
3038 PetscSectionSetNumFields(sec, (int)champ.size());
3039 PetscSectionSetChart(sec, decalage_local_global_,
3041 int idx = 0;
3042 for (auto &&kv : champ)
3043 {
3044 PetscSectionSetFieldName(sec, idx, kv.first.c_str());
3045 if (limpr() >= 0) Cerr << "Field " << kv.first << " available for PCFieldsplit." << finl;
3046 for (int j = 0; j < (int) kv.second.size(); j++)
3047 PetscSectionSetDof(sec, kv.second[j], 1), PetscSectionSetFieldDof(sec, kv.second[j], idx, 1);
3048 idx++;
3049 }
3050 PetscSectionSetUp(sec);
3051
3052 /* DMShell : un objet encapsulant la section */
3053 DMShellCreate(PETSC_COMM_WORLD, &dm_);
3054 DMSetLocalSection(dm_, sec);
3055 DMSetUp(dm_);
3056 PetscSectionDestroy(&sec);
3057 }
3058 if (sub_type(MD_Vector_composite, b.get_md_vector().valeur())) PCSetDM(PreconditionneurPetsc_, dm_);
3059}
3060
3061PetscInt Solv_Petsc::compute_nb_rows_petsc(PetscInt nb_rows_tot)
3062{
3063 // Case the user specifies a number of CPUs:
3064 PetscInt nb_rows_petsc = nb_rows_tot / petsc_nb_cpus_;
3065 // Process 0 takes the possible rows in excess:
3066 if (je_suis_maitre()) nb_rows_petsc = nb_rows_tot - (petsc_nb_cpus_ - 1) * nb_rows_petsc;
3067 //
3068 if (petsc_cpus_selection_==1)
3069 {
3070 // First nb_cpus_ CPUs only so:
3071 if (Process::me() >= petsc_nb_cpus_) nb_rows_petsc = 0;
3072 }
3073 else if (petsc_cpus_selection_==2)
3074 {
3075 // Every nb_cpus CPUs only so:
3076 if (Process::me() % petsc_nb_cpus_ != 0) nb_rows_petsc = 0;
3077 }
3078 else
3079 {
3080 Cerr << "Error: petsc_cpus_selection_=" << petsc_cpus_selection_ << finl;
3081 exit();
3082 }
3083 return nb_rows_petsc;
3084}
3085
3086// Creation d'une matrice Petsc depuis une matrice Matrice_Morse
3087void Solv_Petsc::Create_MatricePetsc(Mat& MatricePetsc, int mataij, const Matrice_Morse& mat_morse)
3088{
3089 Perf_counters::time_point start = statistics().start_clock();
3090 // Recuperation des donnees
3091 bool journal = nb_rows_tot_ < 20 ? true : false;
3092 journal = false;
3093 assert(!sub_type(Matrice_Morse_Sym, mat_morse));
3094 if (journal) // Impressions provisoires
3095 {
3096 Journal() << "mat=" << finl;
3097 mat_morse.imprimer_formatte(Journal());
3098 Journal() << "renum_=" << finl;
3099 renum_.ecrit(Journal());
3100 Journal() << "items_to_keep_=" << finl;
3101 Journal() << items_to_keep_ << finl;
3102 }
3103
3104 /////////////////////////////////////
3105 // On cree et dimensionne la matrice
3106 /////////////////////////////////////
3107 // Based on src/ksp/ksp/examples/tutorials/ex2.c
3108 MatCreate(PETSC_COMM_WORLD, &MatricePetsc);
3109 if (petsc_decide_)
3110 MatSetSizes(MatricePetsc, PETSC_DECIDE, PETSC_DECIDE, nb_rows_tot_, nb_rows_tot_);
3111 else if (petsc_cpus_selection_)
3112 {
3113 PetscInt nb_rows_petsc = compute_nb_rows_petsc(nb_rows_tot_);
3114 Journal() << "Process " << Process::me() << " has " << nb_rows_petsc << " rows of the matrix PETSc." << finl;
3115 MatSetSizes(MatricePetsc, nb_rows_petsc, nb_rows_petsc, PETSC_DECIDE, PETSC_DECIDE);
3116 }
3117 else // Normal use: partition of PETSc matrix is dicted by TRUST matrix:
3118 MatSetSizes(MatricePetsc, nb_rows_, nb_rows_, PETSC_DECIDE, PETSC_DECIDE);
3119
3120 /************************/
3121 /* Typage de la matrice */
3122 /************************/
3123 if (mataij == 0)
3124 {
3125 // On utilise SBAIJ pour une matrice symetrique (plus rapide que AIJ)
3126 MatSetType(MatricePetsc, MATSBAIJ);
3127 }
3128 else
3129 {
3130 // On utilise AIJ car je n'arrive pas a faire marcher avec BAIJ
3131 MatType mtype = MATAIJ;
3132#ifdef TRUST_USE_GPU
3133 if (gpu_)
3134 {
3135 if (getenv("PETSC_USE_KOKKOS")!=nullptr)
3136 mtype = MATAIJKOKKOS;
3137 else
3138#ifdef PETSC_HAVE_CUDA
3139 mtype = MATAIJCUSPARSE;
3140#endif
3141#ifdef PETSC_HAVE_HIP
3142 mtype = MATAIJHIPSPARSE;
3143#endif
3144 }
3145#endif
3146 MatSetType(MatricePetsc, mtype);
3147 }
3148 // Surcharge eventuelle par ligne de commande avec -mat_type:
3149 // Example: now possible to change aijcusparse to aijviennacl via CLI
3150 MatSetOptionsPrefix(MatricePetsc, option_prefix_);
3151 MatSetFromOptions(MatricePetsc);
3152 // If -mat_type aij, update mataij flag:
3153 MatType mat_type;
3154 MatGetType(MatricePetsc, &mat_type);
3155 if (strcmp(mat_type, MATSEQAIJ) == 0 || strcmp(mat_type, MATMPIAIJ) == 0) mataij = 1;
3156
3157 /********************************************/
3158 /* Preallocation de la taille de la matrice */
3159 /********************************************/
3160 // Use fast PETSc matrix assembly on the device only if TRUST matrix is on the device AND we use a PETSc GPU solver:
3161 bool use_coo = mat_morse.get_coeff().isDataOnDevice() && gpu_;
3162 if (use_coo)
3163 {
3164 if (verbose) Cout << "[Petsc] Using COO to preallocate the matrix on the device." << finl;
3165 // We preallocate on host (should be done once during the first time-step)
3166 // Is it possible to preallocate on device ? ToDo: view on ArrOfTID
3167 // MatSetPreallocationCOOLocal seems to fail (several test cases crash in //)
3168 const auto& tab1 = mat_morse.get_tab1();
3169 const auto& tab2 = mat_morse.get_tab2();
3170 const int n = tab1.size_array() - 1;
3171 const ArrOfInt& tab_indice = indice_coeff_to_keep(mat_morse);
3172 PetscInt nnz = tab_indice.size_array();
3173 // COO format:
3174 PetscInt* coo_i;
3175 PetscInt* coo_j;
3176 PetscMalloc2(nnz, &coo_i, nnz, &coo_j);
3177 ArrOfTID& renum_array = renum_; // tableau vu comme lineaire
3178 int ligne_locale = 0;
3179 nnz = 0;
3180 for (int i = 0; i < n; i++)
3181 {
3182 if (items_to_keep_[i])
3183 {
3184 const auto k0 = tab1[i] - 1;
3185 const auto k1 = tab1[i + 1] - 1;
3186 for (auto k = k0; k < k1; k++)
3187 {
3188 const int colonne_locale = tab2[k] - 1;
3189 const PetscInt ligne_globale = ligne_locale + decalage_local_global_;
3190 const PetscInt colonne_globale = renum_array[colonne_locale];
3191 coo_i[nnz] = ligne_globale;
3192 coo_j[nnz] = colonne_globale;
3193 nnz++;
3194 }
3195 ligne_locale++;
3196 }
3197 }
3198 MatSetPreallocationCOO(MatricePetsc, nnz, coo_i, coo_j);
3199 PetscFree2(coo_i, coo_j);
3200 }
3201 else
3202 {
3203 ArrOfPetscInt nnz(nb_rows_);
3204 ArrOfPetscInt d_nnz(nb_rows_);
3205 ArrOfPetscInt o_nnz(nb_rows_);
3206 ArrOfTID& renum_array = renum_; // tableau vu comme lineaire
3207 const PetscInt premiere_colonne_globale = decalage_local_global_;
3208 const PetscInt derniere_colonne_globale = nb_rows_ + decalage_local_global_;
3209 const auto& tab1 = mat_morse.get_tab1();
3210 const auto& tab2 = mat_morse.get_tab2();
3211 int cpt = 0;
3212 const int n = tab1.size_array() - 1;
3213 for (int i = 0; i < n; i++)
3214 {
3215 if (items_to_keep_[i])
3216 {
3217 const auto k0 = tab1[i] - 1;
3218 const auto k1 = tab1[i + 1] - 1;
3219 nnz[cpt] = k1 - k0; // Nombre d'elements non nuls sur la ligne i
3220 for (auto k = k0; k < k1; k++)
3221 {
3222 const int colonne_locale = tab2[k] - 1;
3223 const PetscInt colonne_globale = renum_array[colonne_locale];
3224 if (colonne_globale >= premiere_colonne_globale && colonne_globale < derniere_colonne_globale)
3225 d_nnz[cpt]++;
3226 else
3227 o_nnz[cpt]++;
3228 }
3229 cpt++;
3230 }
3231 }
3232 if (journal)
3233 {
3234 Journal() << "nnz=" << nnz << finl;
3235 Journal() << "d_nnz=" << d_nnz << finl;
3236 Journal() << "o_nnz=" << o_nnz << finl;
3237 }
3238 // TRES important pour la vitesse de construction de la matrice
3239 if (mataij == 0)
3240 {
3241 if (different_partition_)
3242 {
3243 // If partition of TRUST and PETSc differs, difficult to preallocate the matrix finely so:
3244 // ToDo, try to optimize:
3245 int nz = Process::mp_max((nnz.size_array() == 0 ? 0 : (int) max_array(
3246 nnz))); // max_array always an int: max numb of zeros on a line
3247 MatSeqSBAIJSetPreallocation(MatricePetsc, block_size_, nz, PETSC_NULLPTR);
3248 MatMPISBAIJSetPreallocation(MatricePetsc, block_size_, nz, PETSC_NULLPTR, nz, PETSC_NULLPTR);
3249 }
3250 else
3251 {
3252 MatSeqSBAIJSetPreallocation(MatricePetsc, block_size_, PETSC_DEFAULT, nnz.addr());
3253 // Test on nb_rows==0 is to avoid PAR_docond_anisoproc hangs
3254 MatMPISBAIJSetPreallocation(MatricePetsc, block_size_, (nb_rows_ == 0 ? 0 : PETSC_DEFAULT), d_nnz.addr(),
3255 (nb_rows_ == 0 ? 0 : PETSC_DEFAULT), o_nnz.addr());
3256 }
3257 }
3258 else
3259 {
3260 if (different_partition_)
3261 {
3262 // If partition of TRUST and PETSc differs, difficult to preallocate the matrix finely so:
3263 // ToDo, try to optimize:
3264 int nz = Process::mp_max((nnz.size_array() == 0 ? 0 : (int) max_array(
3265 nnz))); // max_array always an int: max numb of zeros on a line
3266 MatSeqAIJSetPreallocation(MatricePetsc, nz, PETSC_NULLPTR);
3267 MatMPIAIJSetPreallocation(MatricePetsc, nz, PETSC_NULLPTR, nz, PETSC_NULLPTR);
3268 }
3269 else
3270 {
3271 MatSeqAIJSetPreallocation(MatricePetsc, PETSC_DEFAULT, nnz.addr());
3272 // Test on nb_rows==0 is to avoid PAR_docond_anisoproc hangs
3273 MatMPIAIJSetPreallocation(MatricePetsc, (nb_rows_ == 0 ? 0 : PETSC_DEFAULT), d_nnz.addr(),
3274 (nb_rows_ == 0 ? 0 : PETSC_DEFAULT), o_nnz.addr());
3275 }
3276 }
3277 }
3278 // ToDo: nettoyer la matrice TRUST en amont... Car le nnz des matrices peut varier (ex: implicite, Hyd_Cx_impl ou PolyMAC_HFV)
3279 // et si on supprime les zeros de la matrice, lors d'un update on peut avoir une allocation -> erreur
3280 if (mataij_)
3281 {
3282 if (!mat_ignore_zero_entries_ || mat_morse.constant_stencil())
3283 MatSetOption(MatricePetsc, MAT_IGNORE_ZERO_ENTRIES, PETSC_FALSE); // Stocke les zeros st stencil constant
3284 else
3285 {
3286 MatSetOption(MatricePetsc, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE); // Ne stocke pas les zeros
3287 if (verbose)
3288 {
3289 ArrOfDouble nonzeros(2); // Pas ArrOfInt car nonzeros peut depasser 2^32 facilement - on n'a pas besoin d'un compte exact
3290 nonzeros[0] = 0;
3291 nonzeros[1] = (double)mat_morse.nb_coeff();
3292 for (int i = 0; i < nonzeros[1]; i++)
3293 if (mat_morse.get_coeff()(i) != 0)
3294 nonzeros[0] += 1;
3295 mp_sum_for_each_item(nonzeros);
3296 if (nonzeros[1] > 0)
3297 {
3298 double ratio = 1 - (double) nonzeros[0] / (double) nonzeros[1];
3299 if (ratio > 0.2)
3300 Cout << "Warning! Trust matrix contains a lot of useless stored zeros: " << (int) (ratio * 100)
3301 << "% (" << nonzeros[1] - nonzeros[0] << "/" << nonzeros[1] << ")" << finl;
3302 }
3303 int zero_discarded = (int) (std::lrint(nonzeros[1] - nonzeros[0]));
3304 if (zero_discarded)
3305 Cout << "[Petsc] Discarding " << zero_discarded
3306 << " zeros from TRUST matrix into the PETSc matrix ..." << finl;
3307 }
3308 }
3309 }
3310 // Genere une erreur (ou pas) si une case de la matrice est remplie sans allocation auparavant:
3311 MatSetOption(MatricePetsc, MAT_NEW_NONZERO_ALLOCATION_ERR, allow_realloc_ ? PETSC_FALSE : PETSC_TRUE);
3312
3313 // Hash table (Faster MatAssembly after the first one)
3315 MatSetOption(MatricePetsc, MAT_USE_HASH_TABLE, PETSC_TRUE);
3316 if (verbose) Cout << "[Petsc] Time to create the matrix: \t" << statistics().compute_time(start) << finl;
3317
3318 // Fill the matrix
3319 Solv_Petsc::Update_matrix(MatricePetsc, mat_morse);
3320
3321 // Reorder the matrix
3322 if (reorder_matrix_)
3323 {
3324 Mat Aperm;
3325 MatOrderingType ordering = MATORDERINGRCM;
3326 MatGetOrdering(MatricePetsc, ordering, &rowperm, &colperm);
3327 ISInvertPermutation(rowperm, PETSC_DECIDE, &inv_rowperm);
3328 ISInvertPermutation(colperm, PETSC_DECIDE, &inv_colperm);
3329 MatPermute(MatricePetsc, rowperm, colperm, &Aperm);
3330 MatDestroy(&MatricePetsc);
3331 MatricePetsc = Aperm;
3332 }
3333}
3334
3335void Solv_Petsc::Update_matrix(Mat& MatricePetsc, const Matrice_Morse& mat_morse)
3336{
3337 Perf_counters::time_point start = statistics().start_clock();
3338 bool journal = nb_rows_tot_ < 20 ? true : false;
3339 journal = false;
3340
3341 /*****************************/
3342 /* Remplissage de la matrice */
3343 /*****************************/
3344 // Use fast PETSc matrix assembly on the device only if TRUST matrix is on the device AND we use a PETSc GPU solver:
3345 bool use_coo = mat_morse.get_coeff().isDataOnDevice() && gpu_;
3346 if (use_coo)
3347 {
3348 if (verbose) Cout << "[Petsc] Using COO to fill the matrix on the device." << finl;
3349 const ArrOfInt& tab_indice = indice_coeff_to_keep(mat_morse);
3350 const auto& tab_coeff = mat_morse.get_coeff();
3351 int nnz = tab_indice.size_array();
3352 DoubleTrav tab_v(nnz);
3353 CDoubleArrView coeff = tab_coeff.view_ro();
3354 CIntArrView indice = tab_indice.view_ro();
3355 DoubleArrView v = static_cast<ArrOfDouble&>(tab_v).view_wo();
3356 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), nnz, KOKKOS_LAMBDA(const int i)
3357 {
3358 v[i] = coeff[indice[i]];
3359 });
3360 end_gpu_timer(__KERNEL_NAME__);
3361 MatSetValuesCOO(MatricePetsc, v.data(), INSERT_VALUES);
3362 }
3363 else
3364 {
3365 // ligne par ligne avec un tableau coeff et tab2 qui contiennent
3366 // les coefficients et les colonnes globales pour chaque ligne
3367 // On dimensionne ces tableaux a la taille la plus grande possible
3368 // ToDo : recalcul de nnz utile ?
3369 ArrOfInt nnz(nb_rows_);
3370 nnz = 0;
3371 ArrOfTID& renum_array = renum_; // tab seen as a flat array (can't use ArrOfPetscInt& because of C++ ref cast...)
3372 const auto& tab1 = mat_morse.get_tab1();
3373 const auto& tab2 = mat_morse.get_tab2();
3374 int cpt = 0;
3375 for (int i = 0; i < tab1.size_array() - 1; i++)
3376 if (items_to_keep_[i])
3377 {
3378 nnz[cpt] = (int)(tab1[i + 1] - tab1[i]); // Nombre d'elements non nuls sur la ligne i
3379 cpt++;
3380 }
3381 // Test sur nb_rows si nul (cas proc vide) car sinon max_array plante:
3382 int size = (nb_rows_ == 0 ? 0 : max_array(nnz));
3383 ArrOfDouble coeff_tmp(size);
3384 ArrOfPetscInt tab2_tmp(size);
3385 const auto& coeff = mat_morse.get_coeff();
3386 cpt = 0;
3387 const int n = tab1.size_array() - 1;
3388 for (int i = 0; i < n; i++)
3389 {
3390 if (items_to_keep_[i])
3391 {
3392 PetscInt ligne_globale = cpt + decalage_local_global_;
3393 int ncol = 0;
3394 const auto k0 = tab1[i] - 1;
3395 const auto k1 = tab1[i + 1] - 1;
3396 for (auto k = k0; k < k1; k++)
3397 {
3398 if (coeff[k] == 0 and reorder_matrix_ ) continue;
3399 coeff_tmp[ncol] = coeff[k];
3400 tab2_tmp[ncol] = renum_array[tab2[k] - 1];
3401 ncol++;
3402 }
3403// assert(ncol == nnz[cpt]);
3404 if (journal)
3405 {
3406 Journal() << (int) ligne_globale << " ";
3407 for (int j = 0; j < ncol; j++) Journal() << coeff_tmp[j] << " ";
3408 Journal() << finl;
3409 }
3410 try
3411 {
3412 MatSetValues(MatricePetsc, 1, &ligne_globale, ncol, tab2_tmp.addr(), coeff_tmp.addr(),
3413 INSERT_VALUES);
3414 }
3415 catch (...)
3416 {
3417 // ToDo: changer car PETSc est en C: pas d'exception lancee
3418 Cerr << "We detect that the PETSc matrix coefficients are changed without pre-allocation." << finl;
3419 Cerr << "Try one of the following option:" << finl;
3420 Cerr << "- Rebuild the matrix each time instead of updating the coefficients (slower)." << finl;
3421 Cerr << "enable_allocation : Enable re-allocation of coefficients (slow)." << finl;
3422 Cerr << "- Discard new coefficients (risk!)" << finl;
3423 Cerr << "Try the two options and select the costly one." << finl;
3424 Process::exit();
3425 }
3426 cpt++;
3427 }
3428 }
3429 /****************************/
3430 /* Assemblage de la matrice */
3431 /****************************/
3432 MatAssemblyBegin(MatricePetsc, MAT_FINAL_ASSEMBLY);
3433 MatAssemblyEnd(MatricePetsc, MAT_FINAL_ASSEMBLY);
3434 }
3435
3436 if (!nouveau_stencil_ && reorder_matrix_)
3437 {
3438 Mat Aperm;
3439
3440 MatPermute(MatricePetsc, rowperm, colperm, &Aperm);
3441 MatDestroy(&MatricePetsc);
3442 MatricePetsc = Aperm;
3443 }
3444
3445#ifndef NDEBUG
3446 if (mataij_)
3447 {
3448 // Verifie la non symetrie de la matrice (au moins une fois)
3449 PetscBool IsSymmetric;
3450 MatIsSymmetric(MatricePetsc, 0.0, &IsSymmetric);
3451 if (IsSymmetric && limpr() >= 0 && !amgx_) Cerr << "Warning: The PETSc matrix is aij but is symmetric. May be use sbaij ?" << finl;
3452 }
3453#endif
3454 // Ignore les coefficients ajoutes:
3455 if (!nouveau_stencil_ && ignore_new_nonzero_)
3456 MatSetOption(MatricePetsc, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);
3457
3458 // Recuperation de la memoire max
3459 /*
3460 if (limpr()==1)
3461 {
3462 MatInfo info;
3463 MatGetInfo(MatricePetsc,MAT_GLOBAL_MAX,&info);
3464 Cerr << "Max memory used by matrix on a MPI rank: " << (int)(info.memory/1024/1024) << " MB" << finl;
3465 }*/
3466 if (verbose) Cout << "[Petsc] Time to fill the matrix: \t" << statistics().compute_time(start) << finl;
3467}
3468
3469bool Solv_Petsc::detect_new_stencil(const Matrice_Morse& mat_morse)
3470{
3471 if (reorder_matrix_)
3472 {
3473 Mat Aperm;
3474
3475 MatPermute(MatricePetsc_, inv_rowperm, inv_colperm, &Aperm);
3476 MatDestroy(&MatricePetsc_);
3477 MatricePetsc_ = Aperm;
3478 }
3479
3480 // If stencil is set constant for matrix, we leave
3481 if (mat_morse.constant_stencil())
3482 return false;
3483
3484 // Est ce un nouveau stencil ?
3485 Perf_counters::time_point start = statistics().start_clock();
3486 int new_stencil=0;
3487 if (!mataij_)
3488 new_stencil = 1; // Don't how to check the stencil with symmetric ?
3489 else
3490 {
3491 PetscBool done;
3492 Mat localA;
3493 PetscInt nRowsLocal;
3494 const PetscInt *colIndices = nullptr, *rowOffsets = nullptr;
3495 if (Process::is_sequential()) // sequential AIJ
3496 {
3497 // Make localA point to the same memory space as A does
3498 localA = MatricePetsc_;
3499 }
3500 else
3501 {
3502 // Get local matrix from redistributed matrix
3503 MatMPIAIJGetLocalMat(MatricePetsc_, MAT_INITIAL_MATRIX, &localA);
3504 }
3505 MatGetRowIJ(localA, 0, PETSC_FALSE, PETSC_FALSE, &nRowsLocal, &rowOffsets, &colIndices, &done);
3506
3507 const auto& tab1 = mat_morse.get_tab1();
3508 const auto& tab2 = mat_morse.get_tab2();
3509 const auto& coeff = mat_morse.get_coeff();
3510 const ArrOfTID& renum_array = renum_;
3511 int RowLocal = 0;
3512 //Journal() << "Provisoire: nb_rows_=" << nb_rows_ << " nb_rows_tot_=" << nb_rows_tot_ << finl;
3513 const int n = tab1.size_array() - 1;
3514 for (int i = 0; i < n; i++)
3515 {
3516 if (items_to_keep_[i])
3517 {
3518 int nnz_row = 0;
3519 const auto k0 = tab1[i] - 1;
3520 const auto k1 = tab1[i + 1] - 1;
3522 {
3523 for (auto k = k0; k < k1; k++)
3524 if (coeff[k] != 0) nnz_row++;
3525 }
3526 else
3527 nnz_row += (int)(k1 - k0);
3528 const PetscInt kk0 = rowOffsets[RowLocal];
3529 const PetscInt kk1 = rowOffsets[RowLocal + 1];
3530 if (nnz_row != (int)(kk1 - kk0))
3531 {
3532 //Journal() << "Provisoire: Number of non-zero will change from " << rowOffsets[RowLocal + 1] - rowOffsets[RowLocal] << " to " << nnz_row << " on row " << RowLocal << finl;
3533 new_stencil = 1;
3534 break;
3535 }
3536 else
3537 {
3538 for (auto k = k0; k < k1; k++)
3539 {
3540 if (coeff[k] != 0)
3541 {
3542 bool found = false;
3543 const PetscInt col = renum_array[tab2[k] - 1];
3544 const PetscInt RowGlobal = decalage_local_global_+RowLocal;
3545 for (PetscInt kk = kk0; kk < kk1; kk++)
3546 {
3547 if (colIndices[kk] == col)
3548 {
3549 found = true;
3550 break;
3551 }
3552 }
3553 if (!found)
3554 {
3555 Journal() << "Provisoire: mat_morse(" << RowGlobal << "," << col << ")!=0 new " << finl;
3556 new_stencil = 1;
3557 break;
3558 }
3559 }
3560 }
3561 }
3562 RowLocal++;
3563 }
3564 }
3565 MatRestoreRowIJ(localA, 0, PETSC_FALSE, PETSC_FALSE, &nRowsLocal, &rowOffsets, &colIndices, &done);
3566 if (Process::is_parallel()) MatDestroy(&localA);
3567 new_stencil = mp_max(new_stencil);
3568 }
3569 if (verbose) Cout << "[Petsc] Time to check stencil: \t" << statistics().compute_time(start) << finl;
3570 return new_stencil;
3571}
3572#endif
: Classe Comm_Group_MPI, derivee de la classe abstraite Comm_Group.
Une entree dont la source est une chaine de caracteres.
Definition EChaine.h:31
Fichier en lecture Cette classe est a la classe C++ ifstream ce que la classe Entree est a la.
Definition EFichier.h:29
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42
virtual int nb_items_seq_local() const
Nom get_name(int i) const
int get_shape(int i) const
const MD_Vector & get_desc_part(int i) const
const MD_Vector_base & valeur() const
Definition MD_Vector.h:77
Classe Matrice_Base Classe de base de la hierarchie des matrices.
virtual DoubleVect & ajouter_multvect(const DoubleVect &x, DoubleVect &r) const
Operation de multiplication-accumulation (saxpy) matrice vecteur.
Classe Matrice_Morse_Sym Represente une matrice M (creuse) symetrique stockee au format Morse.
Classe Matrice_Morse Represente une matrice M (creuse), non necessairement carree.
auto nb_coeff() const
const auto & get_tab2() const
Sortie & imprimer_formatte(Sortie &s) const override
bool & constant_stencil() const
const auto & get_tab1() const
int nb_colonnes() const override
Return local number of columns (=size on the current proc).
const auto & get_coeff() const
int nb_lignes() const override
Return local number of lines (=size on the current proc).
Une chaine de caractere (Nom) en majuscules.
Definition Motcle.h:26
Un tableau d'objets de la classe Motcle.
Definition Motcle.h:63
int search(const Motcle &t) const
Definition Motcle.cpp:321
class Nom Une chaine de caractere pour nommer les objets de TRUST
Definition Nom.h:31
virtual int debute_par(const char *const n) const
Definition Nom.cpp:319
int longueur() const
Renvoie le nombre de caracteres de la chaine du Nom y compris le caractere zero de fin de chaine.
Definition Nom.cpp:191
Nom & prefix(const char *const)
Definition Nom.cpp:329
const std::string & getString() const
Definition Nom.h:92
friend class Entree
Definition Objet_U.h:76
static const Type_info * info()
Donne des informations sur le type de l'Objet_U.
Definition Objet_U.cpp:136
static bool disable_TU
Flag to disable or not the writing of the .TU files.
Definition Objet_U.h:125
static int dimension
Definition Objet_U.h:99
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
Definition Objet_U.cpp:293
static const Nom & nom_du_cas()
Renvoie une reference constante vers le nom du cas.
Definition Objet_U.cpp:146
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
Definition Objet_U.cpp:282
static const int & get_nb_groups()
static const Comm_Group & current_group()
renvoie une reference au groupe de processeurs actif courant
Definition PE_Groups.h:65
std::chrono::time_point< clock > time_point
void begin_count(const STD_COUNTERS &std_cnt, int counter_lvl=-100000)
double get_time_since_last_open(const STD_COUNTERS &name)
Give as a double the time (in second) elapsed in the operation tracked by the standard counter call n...
double compute_time(time_point start)
return time since start in seconds
time_point start_clock()
Start a clock, return a time_point, not a double.
void end_count(const std::string &custom_count_name, int count_increment=1, long int quantity_increment=0)
End the count of a counter and update the counter values.
static double mp_max(double)
Definition Process.cpp:376
static void mp_sum_for_each_item(TRUSTArray< _TYPE_ > &x, int n=-1)
Definition Process.cpp:193
static bool is_parallel()
Definition Process.cpp:110
static Sortie & Journal(int message_level=0)
Renvoie un objet statique de type Sortie qui sert de journal d'evenements.
Definition Process.cpp:588
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
Definition Process.cpp:104
static int me()
renvoie mon rang dans le groupe de communication courant.
Definition Process.cpp:125
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
static int je_suis_maitre()
renvoie 1 si on est sur le processeur maitre du groupe courant (c'est a dire me() == 0),...
Definition Process.cpp:86
static bool is_sequential()
Definition Process.cpp:115
Cette classe est a la classe C++ ofstream ce que la classe Sortie est a la classe C++ ostream Elle re...
Definition SFichier.h:27
int matrice_symetrique_
ArrOfDouble lhs_
public_for_cuda void Update_lhs_rhs(const DoubleVect &b, DoubleVect &x)
ArrOfDouble rhs_
void Create_lhs_rhs_onDevice()
const ArrOfInt & indice_coeff_to_keep(const Matrice_Morse &)
void construit_matrice_morse_intermediaire(const Matrice_Base &, Matrice_Morse &)
void Update_solution(DoubleVect &x)
bool ignore_new_nonzero_
Definition Solv_Petsc.h:207
bool mat_ignore_zero_entries_
Definition Solv_Petsc.h:210
bool allow_realloc_
Definition Solv_Petsc.h:209
static public_for_cuda int instance
Definition Solv_Petsc.h:132
bool reduce_ram_
Definition Solv_Petsc.h:211
int reuse_preconditioner_nb_it_max_
Definition Solv_Petsc.h:216
bool rebuild_matrix_
Definition Solv_Petsc.h:208
int solveur_direct_
Definition Solv_Petsc.h:201
void create_solver()
Definition Solv_Petsc.h:56
static int numero_solveur
Definition Solv_Petsc.h:133
int resoudre_systeme(const Matrice_Base &, const DoubleVect &, DoubleVect &) override
bool reorder_matrix_
Definition Solv_Petsc.h:213
const Nom config()
int nb_it_previous_
Definition Solv_Petsc.h:215
ArrOfBit items_to_keep_
Definition Solv_tools.h:31
int secmem_sz_
Definition Solv_tools.h:37
int nb_items_to_keep_
Definition Solv_tools.h:33
ArrOfTID ix
Definition Solv_tools.h:32
trustIdType nb_rows_tot_
Definition Solv_tools.h:35
TIDTab renum_
Definition Solv_tools.h:29
trustIdType decalage_local_global_
Definition Solv_tools.h:36
void construit_renum(const DoubleVect &)
bool read_matrix() const
void set_reuse_preconditioner(bool flag)
const Nom & get_chaine_lue() const
bool reuse_preconditioner()
bool nouvelle_matrice() const
int limpr() const
void fixer_limpr(int l)
void nommer(const Nom &nom) override
Donne un nom a l'Objet_U Methode virtuelle a surcharger.
void set_save_matrix(int flag)
void set_read_matrix(bool flag)
void fixer_nouvelle_matrice(bool i)
void lecture(Entree &)
int save_matrix() const
Classe de base des flux de sortie.
Definition Sortie.h:52
virtual void precision(int)
Definition Sortie.cpp:40
_SIZE_ size_array() const
_TYPE_ * addr()
bool isDataOnDevice() const
int line_size() const
Definition TRUSTVect.tpp:67
virtual const MD_Vector & get_md_vector() const
Definition TRUSTVect.h:123
virtual void echange_espace_virtuel(IsExchangeBlocking exchange_type=IsExchangeBlocking::DefaultBlocking, const std::string kernel_name="noname")
double & value()
Definition Solv_Petsc.h:408
int & value()
Definition Solv_Petsc.h:392
Nom & value()
Definition Solv_Petsc.h:376
int defined
Definition Solv_Petsc.h:362
Nom name
Definition Solv_Petsc.h:363