TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
Solv_AMG.cpp
1/****************************************************************************
2* Copyright (c) 2026, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <Solv_AMG.h>
17#include <EChaine.h>
18#include <Motcle.h>
19#include <Solv_AMGX.h>
20#include <Solv_Petsc_GPU.h>
21#ifdef TRUST_USE_ROCM
22#include <rocm-core/rocm_version.h>
23#endif
24#include <comm_incl.h> // Mandatory to have MPIX_CUDA_AWARE_SUPPORT defined or not
25#include <MD_Vector_composite.h>
26
27Implemente_instanciable(Solv_AMG,"Solv_AMG",SolveurSys_base);
28// XD amg solveur_sys_base amg NO_BRACE Wrapper for AMG preconditioner-based solver which switch for the best one on
29// XD_CONT CPU/GPU Nvidia/GPU AMD
30// XD attr solveur chaine solveur REQ not_set
31// XD attr option_solveur bloc_lecture option_solveur REQ not_set
32
33// printOn
35{
36 s << chaine_lue_;
37 return s;
38}
39
40/**
41 * @brief Reads the configuration for the AMG solver from the input stream.
42 *
43 * This function parses the input stream to configure the AMG solver parameters,
44 * including the relative tolerance (RTOL), absolute tolerance (ATOL), and whether
45 * to print additional information (IMPR). It supports different solver libraries
46 * based on the available hardware (CPU or GPU).
47 *
48 * The expected input format is:
49 * amg solver { rtol value [impr] }
50 *
51 * @param is The input stream from which to read the solver configuration.
52 * @return The input stream after reading the configuration.
53 *
54 * @throws Process::exit if the input syntax is incorrect or if an unsupported
55 * library is specified.
56 */
58{
59 // amg GCP|BISGTSTAB|GMRES { atol|rtol doublee [st double] [impr] }
60 is >> solver_;
61 if ((Motcle)solver_!="GCP")
62 {
63 Cerr << solver_ << " not supported yet for AMG !" << finl;
65 }
66 Motcle motcle;
67 is >> motcle;
68 while (motcle != "}")
69 {
70 if (motcle=="{") {}
71 else if (motcle=="RTOL") is >> rtol_;
72 else if (motcle=="ATOL") is >> atol_;
73 else if (motcle=="ST") is >> st_;
74 else if (motcle=="IMPR") impr_ = true;
75 else if (motcle=="READ_MATRIX") set_read_matrix(true);
76 else if (motcle=="SAVE_MATRIX_PETSC_FORMAT") set_save_matrix(2);
77 else if (motcle=="SEUIL") Process::exit("Use atol 'absolute tolerance' instead of seuil.");
78 else
79 {
80 options_+=" ";
81 options_+=motcle;
82 Cerr << motcle << " is not an option of AMG solver." << finl;
84 }
85 is >> motcle;
86 }
87 if (atol_<0 && rtol_<0) Process::exit("atol or rtol should be defined in AMG solver.");
88 return is;
89}
90
91void Solv_AMG::create_block_amg(int n, Nom precond)
92{
93 if (getenv("TRUST_AMG")!=nullptr) precond = getenv("TRUST_AMG");
94 // ToDo: not efficient on P0P1Pa (n==3)
95 chaine_lue_="cli { -ksp_type ";
96 chaine_lue_+=petsc_cg_issue_ ? "gmres" : "cg"; // Switch CG to GMRES for more robustness (BiCGstab is slower than GMRES 2xSPMV vs 1)
97 chaine_lue_+=rtol_>0 ? Nom(rtol_, " -ksp_rtol %e") : "";
98 chaine_lue_+=atol_>0 ? Nom(atol_, " -ksp_atol %e") : "";
99 chaine_lue_+=" -ksp_norm_type UNPRECONDITIONED \
100-pc_type fieldsplit \
101-pc_fieldsplit_type additive";
102 // Gamg is using MPI GPU-Aware but less robust than Boomeramg
103 // Il faut -pc_gamg_agg_nsmooths 0 (defaut 1) si crash mais plus lent
104 // Ajouter sur Nvidia -mat aijkokkos
105 if (precond=="gamg")
106 {
107 Cerr << "If Gamg setup crashes during MatProductSymbolic_SeqAIJCUSPARSE_SeqAIJCUSPARSE, it is related to not enough RAM device." << finl;
108 Cerr << "Use more GPUs, or try slower options: -fieldsplit_P0_pc_gamg_agg_nsmooths 0 -fieldsplit_P1_pc_gamg_agg_nsmooths 0" << finl;
109 chaine_lue_+=" -info :pc -fieldsplit_P0_ksp_type preonly \
110-fieldsplit_P0_pc_type gamg \
111-fieldsplit_P0_pc_gamg_threshold 0.01 \
112-fieldsplit_P0_pc_gamg_square_graph 1 \
113-fieldsplit_P1_ksp_type preonly \
114-fieldsplit_P1_pc_type gamg \
115-fieldsplit_P1_pc_gamg_threshold 0.01 \
116-fieldsplit_P1_pc_gamg_square_graph 1";
117 if (n==3)
118 {
119 chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \
120-fieldsplit_Pa_ksp_type preonly \
121-fieldsplit_Pa_pc_type gamg \
122-fieldsplit_Pa_pc_gamg_threshold 0.01 \
123-fieldsplit_Pa_pc_gamg_square_graph 1";
124 }
125 // Use Kokkos backend (slower though) to avoid memory issue on Nvidia:
126 // src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu:3269 cuda error 2 (cudaErrorMemoryAllocation) : out of memory
127#ifdef TRUST_USE_CUDA
128 chaine_lue_+=" -mat_type aijkokkos -vec_type kokkos";
129#endif
130 }
131 // Boomeramg do not exploit MPI GPU-Aware (issue reported to Hypre: https://github.com/hypre-space/hypre/issues/1354)
132 else if (precond=="boomeramg")
133 {
134 chaine_lue_+=" -fieldsplit_P0_ksp_type preonly \
135-fieldsplit_P0_pc_type hypre \
136-fieldsplit_P0_pc_hypre_type boomeramg \
137-fieldsplit_P0_pc_hypre_boomeramg_strong_threshold 0.1 \
138-fieldsplit_P0_pc_hypre_boomeramg_print_statistics 1 \
139-fieldsplit_P1_ksp_type preonly \
140-fieldsplit_P1_pc_type hypre \
141-fieldsplit_P1_pc_hypre_type boomeramg \
142-fieldsplit_P1_pc_hypre_boomeramg_strong_threshold 0.1 \
143-fieldsplit_P1_pc_hypre_boomeramg_print_statistics 1";
144 if (n==3)
145 {
146 chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \
147-fieldsplit_Pa_pc_type hypre \
148-fieldsplit_Pa_pc_hypre_type boomeramg \
149-fieldsplit_Pa_pc_hypre_boomeramg_strong_threshold 0.1 \
150-fieldsplit_Pa_pc_hypre_boomeramg_print_statistics 1";
151 }
152 // To avoid this issue on Nvidia: CUSPARSE ERROR (code = 11, insufficient resources) at csr_spgemm_device_cusparse.c:152
153#ifdef TRUST_USE_CUDA
154 if (n==2) chaine_lue_+=" -fieldsplit_P0_pc_mg_galerkin_mat_product_algorithm hypre";
155 if (n==2) chaine_lue_+=" -fieldsplit_P1_pc_mg_galerkin_mat_product_algorithm hypre";
156 if (n==3) chaine_lue_+=" -fieldsplit_Pa_pc_mg_galerkin_mat_product_algorithm hypre";
157#endif
158 }
159 else if (precond=="amgx")
160 {
161 Cerr << "Warning! PETSc with AmgX preconditioner was not tested yet for nnz>2^31 !" << finl;
162 chaine_lue_+=" -fieldsplit_P0_ksp_type preonly \
163-fieldsplit_P0_pc_type amgx \
164-fieldsplit_P0_pc_amgx_strength_threshold 0.1 \
165-fieldsplit_P0_pc_amgx_verbose 1 \
166-fieldsplit_P0_pc_amgx_print_grid_stats 1 \
167-fieldsplit_P1_ksp_type preonly \
168-fieldsplit_P1_pc_type amgx \
169-fieldsplit_P1_pc_amgx_strength_threshold 0.1 \
170-fieldsplit_P1_pc_amgx_verbose 1 \
171-fieldsplit_P1_pc_amgx_print_grid_stats 1";
172 if (n==3)
173 {
174 chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \
175-fieldsplit_Pa_pc_type amgx \
176-fieldsplit_Pa_pc_amgx_strength_threshold 0.5 \
177-fieldsplit_Pa_pc_amgx_verbose 1 \
178-fieldsplit_Pa_pc_amgx_print_grid_stats 1";
179 }
180 }
181 else
182 Process::exit("Error in Solv_AMG::create_block_amg");
183 chaine_lue_ +=" }";
184}
185
186Nom boomeramg(double st)
187{
188 Nom chaine(" { precond boomeramg { }");
189 if (st>=0)
190 {
191 chaine += " cli { -pc_hypre_boomeramg_strong_threshold";
192 chaine += Nom(st, "%e");
193 chaine += " }";
194 }
195 return chaine;
196}
197void Solv_AMG::create_amg()
198{
199 // We select the more efficient/robust one:
200 chaine_lue_ = solver_;
201#if defined(TRUST_USE_CUDA)
202 library_ = "petsc_gpu";
203 chaine_lue_ += boomeramg(st_); // Best GPU solver
204 // KSP divergence with cg+boomeramg/amgx on multi-node with MPI GPU Aware (seen also on Lumi) so we switch to gmres (bgcs slower) !
205 if (Process::nproc()>4) petsc_cg_issue_ = true;
206#if defined(MPIX_CUDA_AWARE_SUPPORT)
207 if (Process::nproc()>4)
208 {
209 library_ = "amgx";
210 chaine_lue_ = solver_;
211 chaine_lue_ += " { precond c-amg {";
212 if (st_>=0) chaine_lue_ += Nom(st_, " p:strength_threshold %e");
213 chaine_lue_ += " }";
214 }
215#endif
216#elif defined(TRUST_USE_ROCM)
217 library_ = "petsc_gpu";
218 const char* value = std::getenv("ROCM_ARCH");
219 if (value != nullptr && std::string(value) == "gfx1100")
220 {
221 if (st_>=0) Process::exit("st option not supported yet in Solv_AMG");
223 chaine_lue_ += " { precond ua-amg { }"; // Converge mais plus lent que sa-amg
224 else
225 chaine_lue_ += " { precond sa-amg { }"; // Crash en parallele
226 }
227 else
228 chaine_lue_ += boomeramg(st_); // Best GPU solver (// sa-amg is slow...)
229#else
230 library_ = "petsc";
231 chaine_lue_ += boomeramg(st_); // Best CPU solver
232#endif
233 chaine_lue_ += rtol_>0 ? Nom(rtol_, " rtol %e") : Nom(atol_, " atol %e");
234 if (impr_) chaine_lue_ += " impr";
235 if (options_!="") chaine_lue_ += options_;
236 chaine_lue_ += " }";
237}
238
239int Solv_AMG::resoudre_systeme(const Matrice_Base& mat, const DoubleVect& b, DoubleVect& x)
240{
241 // We don't create solver during readOn as usual but just before solve to get more infos about matrix/vectors to fine tune
242 if (!solveur_)
243 {
244 create_amg();
245 int nb_blocks = sub_type(MD_Vector_composite, b.get_md_vector().valeur()) ? ref_cast(MD_Vector_composite, b.get_md_vector().valeur()).nb_parts() : 1;
246 if (nb_blocks>1)
247 {
248 // Block matrix : we use PCFieldsplit (eg: VEF) for preconditioner
249 // Much better convergence for P0P1 for instance
250 Cerr << "Detecting " << nb_blocks << "x" << nb_blocks << " blocks into the matrix. Creating a specific block preconditioning:" << finl;
251 if (chaine_lue_.contient("gamg"))
252 create_block_amg(nb_blocks, "gamg");
253 else if (chaine_lue_.contient("boomeramg"))
254 create_block_amg(nb_blocks, "boomeramg");
255 else if (library_=="amgx")
256 {
257 library_ = "petsc_gpu";
258 create_block_amg(nb_blocks, "amgx");
259 }
260 }
261 Cerr << "====================================================================" << finl;
262 Cerr << "Creating AMG solver: " << library_ << " " << chaine_lue_ << finl;
263 Cerr << "====================================================================" << finl;
264 EChaine entree(chaine_lue_);
265 Nom nom_solveur("Solv_");
266 nom_solveur+=library_;
267 solveur_.typer(nom_solveur);
268 solveur_.nommer("solveur_pression");
269 if (library_=="amgx")
270 ref_cast(Solv_AMGX, solveur_.valeur()).create_solver(entree);
271 else if (library_=="petsc")
272 ref_cast(Solv_Petsc, solveur_.valeur()).create_solver(entree);
273 else if (library_=="petsc_gpu")
274 ref_cast(Solv_Petsc_GPU, solveur_.valeur()).create_solver(entree);
275 else
276 Process::exit("Unsupported case in Solv_AMG::readOn");
277 solveur_->set_save_matrix(save_matrix());
278 solveur_->set_read_matrix(read_matrix());
279 }
280 statistics().end_count(STD_COUNTERS::system_solver,-1,0);
281 int res = solveur_.resoudre_systeme(mat, b, x);
282 statistics().begin_count(STD_COUNTERS::system_solver,statistics().get_last_opened_counter_level()+1);
283 return res;
284}
Une entree dont la source est une chaine de caracteres.
Definition EChaine.h:31
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42
Metadata for a distributed composite vector.
const MD_Vector_base & valeur() const
Definition MD_Vector.h:77
Classe Matrice_Base Classe de base de la hierarchie des matrices.
class Nom Une chaine de caractere pour nommer les objets de TRUST
Definition Nom.h:31
virtual void nommer(const Nom &)
Donne un nom a l'Objet_U Methode virtuelle a surcharger.
Definition Objet_U.cpp:329
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
Definition Objet_U.cpp:293
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
Definition Objet_U.cpp:282
static bool is_parallel()
Definition Process.cpp:110
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
Definition Process.cpp:104
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
AMD solver wrapper to switch to the more robust/performant AMG preconditioner on CPU/GPU Nvidia/GPU A...
Definition Solv_AMG.h:28
virtual int resoudre_systeme(const Matrice_Base &mat, const DoubleVect &b, DoubleVect &x) override
Definition Solv_AMG.cpp:239
bool read_matrix() const
void set_save_matrix(int flag)
void set_read_matrix(bool flag)
int save_matrix() const
Classe de base des flux de sortie.
Definition Sortie.h:52
virtual const MD_Vector & get_md_vector() const
Definition TRUSTVect.h:123