en/master/Solv__AMG_8cpp_source.html

/****************************************************************************

* Copyright (c) 2026, CEA

* All rights reserved.

*

* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

*

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*

*****************************************************************************/


#include <Solv_AMG.h>

#include <EChaine.h>

#include <Motcle.h>

#include <Solv_AMGX.h>

#include <Solv_Petsc_GPU.h>

#ifdef TRUST_USE_ROCM

#include <rocm-core/rocm_version.h>

#endif

#include <comm_incl.h> // Mandatory to have MPIX_CUDA_AWARE_SUPPORT defined or not

#include <MD_Vector_composite.h>


Implemente_instanciable(Solv_AMG,"Solv_AMG",SolveurSys_base);

// XD amg solveur_sys_base amg NO_BRACE Wrapper for AMG preconditioner-based solver which switch for the best one on

// XD_CONT CPU/GPU Nvidia/GPU AMD

// XD attr solveur chaine solveur REQ not_set

// XD attr option_solveur bloc_lecture option_solveur REQ not_set


// printOn

Sortie& Solv_AMG::printOn(Sortie& s ) const

{

  s << chaine_lue_;

  return s;

}


/**

 * @brief Reads the configuration for the AMG solver from the input stream.

 *

 * This function parses the input stream to configure the AMG solver parameters,

 * including the relative tolerance (RTOL), absolute tolerance (ATOL), and whether

 * to print additional information (IMPR). It supports different solver libraries

 * based on the available hardware (CPU or GPU).

 *

 * The expected input format is:

 *   amg solver { rtol value [impr] }

 *

 * @param is The input stream from which to read the solver configuration.

 * @return The input stream after reading the configuration.

 *

 * @throws Process::exit if the input syntax is incorrect or if an unsupported

 * library is specified.

 */

Entree& Solv_AMG::readOn(Entree& is)

{

  // amg GCP|BISGTSTAB|GMRES { atol|rtol doublee [st double] [impr]  }

  is >> solver_;

  if ((Motcle)solver_!="GCP")

    {

      Cerr << solver_ << " not supported yet for AMG !" << finl;

      Process::exit();

    }

  Motcle motcle;

  is >> motcle;

  while (motcle != "}")

    {

      if (motcle=="{") {}

      else if (motcle=="RTOL") is >> rtol_;

      else if (motcle=="ATOL") is >> atol_;

      else if (motcle=="ST") is >> st_;

      else if (motcle=="IMPR") impr_ = true;

      else if (motcle=="READ_MATRIX") set_read_matrix(true);

      else if (motcle=="SAVE_MATRIX_PETSC_FORMAT") set_save_matrix(2);

      else if (motcle=="SEUIL") Process::exit("Use atol 'absolute tolerance' instead of seuil.");

      else

        {

          options_+=" ";

          options_+=motcle;

          Cerr << motcle << " is not an option of AMG solver." << finl;

          Process::exit();

        }

      is >> motcle;

    }

  if (atol_<0 && rtol_<0) Process::exit("atol or rtol should be defined in AMG solver.");

  return is;

}


void Solv_AMG::create_block_amg(int n, Nom precond)

{

  if (getenv("TRUST_AMG")!=nullptr) precond = getenv("TRUST_AMG");

  // ToDo: not efficient on P0P1Pa (n==3)

  chaine_lue_="cli { -ksp_type ";

  chaine_lue_+=petsc_cg_issue_ ? "gmres" : "cg"; // Switch CG to GMRES for more robustness (BiCGstab is slower than GMRES 2xSPMV vs 1)

  chaine_lue_+=rtol_>0 ? Nom(rtol_, " -ksp_rtol %e") : "";

  chaine_lue_+=atol_>0 ? Nom(atol_, " -ksp_atol %e") : "";

  chaine_lue_+=" -ksp_norm_type UNPRECONDITIONED \

-pc_type fieldsplit \

-pc_fieldsplit_type additive";

  // Gamg is using MPI GPU-Aware but less robust than Boomeramg

  // Il faut -pc_gamg_agg_nsmooths 0 (defaut 1) si crash mais plus lent

  // Ajouter sur Nvidia -mat aijkokkos

  if (precond=="gamg")

    {

      Cerr << "If Gamg setup crashes during MatProductSymbolic_SeqAIJCUSPARSE_SeqAIJCUSPARSE, it is related to not enough RAM device." << finl;

      Cerr << "Use more GPUs, or try slower options: -fieldsplit_P0_pc_gamg_agg_nsmooths 0 -fieldsplit_P1_pc_gamg_agg_nsmooths 0" << finl;

      chaine_lue_+=" -info :pc -fieldsplit_P0_ksp_type preonly \

-fieldsplit_P0_pc_type gamg \

-fieldsplit_P0_pc_gamg_threshold 0.01 \

-fieldsplit_P0_pc_gamg_square_graph 1 \

-fieldsplit_P1_ksp_type preonly \

-fieldsplit_P1_pc_type gamg \

-fieldsplit_P1_pc_gamg_threshold 0.01 \

-fieldsplit_P1_pc_gamg_square_graph 1";

      if (n==3)

        {

          chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \

-fieldsplit_Pa_ksp_type preonly \

-fieldsplit_Pa_pc_type gamg \

-fieldsplit_Pa_pc_gamg_threshold 0.01 \

-fieldsplit_Pa_pc_gamg_square_graph 1";

        }

      // Use Kokkos backend (slower though) to avoid memory issue on Nvidia:

      // src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu:3269 cuda error 2 (cudaErrorMemoryAllocation) : out of memory

#ifdef TRUST_USE_CUDA

      chaine_lue_+=" -mat_type aijkokkos -vec_type kokkos";

#endif

    }

  // Boomeramg do not exploit MPI GPU-Aware (issue reported to Hypre: https://github.com/hypre-space/hypre/issues/1354)

  else if (precond=="boomeramg")

    {

      chaine_lue_+=" -fieldsplit_P0_ksp_type preonly \

-fieldsplit_P0_pc_type hypre \

-fieldsplit_P0_pc_hypre_type boomeramg \

-fieldsplit_P0_pc_hypre_boomeramg_strong_threshold 0.1 \

-fieldsplit_P0_pc_hypre_boomeramg_print_statistics 1 \

-fieldsplit_P1_ksp_type preonly \

-fieldsplit_P1_pc_type hypre \

-fieldsplit_P1_pc_hypre_type boomeramg \

-fieldsplit_P1_pc_hypre_boomeramg_strong_threshold 0.1 \

-fieldsplit_P1_pc_hypre_boomeramg_print_statistics 1";

      if (n==3)

        {

          chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \

-fieldsplit_Pa_pc_type hypre \

-fieldsplit_Pa_pc_hypre_type boomeramg \

-fieldsplit_Pa_pc_hypre_boomeramg_strong_threshold 0.1 \

-fieldsplit_Pa_pc_hypre_boomeramg_print_statistics 1";

        }

      // To avoid this issue on Nvidia:  CUSPARSE ERROR (code = 11, insufficient resources) at csr_spgemm_device_cusparse.c:152

#ifdef TRUST_USE_CUDA

      if (n==2) chaine_lue_+=" -fieldsplit_P0_pc_mg_galerkin_mat_product_algorithm hypre";

      if (n==2) chaine_lue_+=" -fieldsplit_P1_pc_mg_galerkin_mat_product_algorithm hypre";

      if (n==3) chaine_lue_+=" -fieldsplit_Pa_pc_mg_galerkin_mat_product_algorithm hypre";

#endif

    }

  else if (precond=="amgx")

    {

      Cerr << "Warning! PETSc with AmgX preconditioner was not tested yet for nnz>2^31 !" << finl;

      chaine_lue_+=" -fieldsplit_P0_ksp_type preonly \

-fieldsplit_P0_pc_type amgx \

-fieldsplit_P0_pc_amgx_strength_threshold 0.1 \

-fieldsplit_P0_pc_amgx_verbose 1 \

-fieldsplit_P0_pc_amgx_print_grid_stats 1 \

-fieldsplit_P1_ksp_type preonly \

-fieldsplit_P1_pc_type amgx \

-fieldsplit_P1_pc_amgx_strength_threshold 0.1 \

-fieldsplit_P1_pc_amgx_verbose 1 \

-fieldsplit_P1_pc_amgx_print_grid_stats 1";

      if (n==3)

        {

          chaine_lue_+=" -fieldsplit_Pa_ksp_type preonly \

-fieldsplit_Pa_pc_type amgx \

-fieldsplit_Pa_pc_amgx_strength_threshold 0.5 \

-fieldsplit_Pa_pc_amgx_verbose 1 \

-fieldsplit_Pa_pc_amgx_print_grid_stats 1";

        }

    }

  else

    Process::exit("Error in Solv_AMG::create_block_amg");

  chaine_lue_ +=" }";

}


Nom boomeramg(double st)

{

  Nom chaine(" { precond boomeramg { }");

  if (st>=0)

    {

      chaine += " cli { -pc_hypre_boomeramg_strong_threshold";

      chaine += Nom(st, "%e");

      chaine += " }";

    }

  return chaine;

}

void Solv_AMG::create_amg()

{

  // We select the more efficient/robust one:

  chaine_lue_ = solver_;

#if defined(TRUST_USE_CUDA)

  library_ = "petsc_gpu";

  chaine_lue_ += boomeramg(st_); // Best GPU solver

  // KSP divergence with cg+boomeramg/amgx on multi-node with MPI GPU Aware (seen also on Lumi) so we switch to gmres (bgcs slower) !

  if (Process::nproc()>4) petsc_cg_issue_ = true;

#if defined(MPIX_CUDA_AWARE_SUPPORT)

  if (Process::nproc()>4)

    {

      library_ = "amgx";

      chaine_lue_ = solver_;

      chaine_lue_ += " { precond c-amg {";

      if (st_>=0) chaine_lue_ += Nom(st_, " p:strength_threshold %e");

      chaine_lue_ += " }";

    }

#endif

#elif defined(TRUST_USE_ROCM)

  library_ = "petsc_gpu";

  const char* value = std::getenv("ROCM_ARCH");

  if (value != nullptr && std::string(value) == "gfx1100")

    {

      if (st_>=0) Process::exit("st option not supported yet in Solv_AMG");

      if (Process::is_parallel())

        chaine_lue_ += " { precond ua-amg { }";  // Converge mais plus lent que sa-amg

      else

        chaine_lue_ += " { precond sa-amg { }";  // Crash en parallele

    }

  else

    chaine_lue_ += boomeramg(st_); // Best GPU solver (// sa-amg is slow...)

#else

  library_ = "petsc";

  chaine_lue_ += boomeramg(st_); // Best CPU solver

#endif

  chaine_lue_ += rtol_>0 ? Nom(rtol_, " rtol %e") : Nom(atol_, " atol %e");

  if (impr_) chaine_lue_ += " impr";

  if (options_!="") chaine_lue_ += options_;

  chaine_lue_ += " }";

}


int Solv_AMG::resoudre_systeme(const Matrice_Base& mat, const DoubleVect& b, DoubleVect& x)

{

  // We don't create solver during readOn as usual but just before solve to get more infos about matrix/vectors to fine tune

  if (!solveur_)

    {

      create_amg();

      int nb_blocks = sub_type(MD_Vector_composite, b.get_md_vector().valeur()) ? ref_cast(MD_Vector_composite, b.get_md_vector().valeur()).nb_parts() : 1;

      if (nb_blocks>1)

        {

          // Block matrix : we use PCFieldsplit (eg: VEF) for preconditioner

          // Much better convergence for P0P1 for instance

          Cerr << "Detecting " << nb_blocks << "x" << nb_blocks << " blocks into the matrix. Creating a specific block preconditioning:" << finl;

          if (chaine_lue_.contient("gamg"))

            create_block_amg(nb_blocks, "gamg");

          else if (chaine_lue_.contient("boomeramg"))

            create_block_amg(nb_blocks, "boomeramg");

          else if (library_=="amgx")

            {

              library_ = "petsc_gpu";

              create_block_amg(nb_blocks, "amgx");

            }

        }

      Cerr << "====================================================================" << finl;

      Cerr << "Creating AMG solver: " << library_ << " " << chaine_lue_ << finl;

      Cerr << "====================================================================" << finl;

      EChaine entree(chaine_lue_);

      Nom nom_solveur("Solv_");

      nom_solveur+=library_;

      solveur_.typer(nom_solveur);

      solveur_.nommer("solveur_pression");

      if (library_=="amgx")

        ref_cast(Solv_AMGX, solveur_.valeur()).create_solver(entree);

      else if (library_=="petsc")

        ref_cast(Solv_Petsc, solveur_.valeur()).create_solver(entree);

      else if (library_=="petsc_gpu")

        ref_cast(Solv_Petsc_GPU, solveur_.valeur()).create_solver(entree);

      else

        Process::exit("Unsupported case in Solv_AMG::readOn");

      solveur_->set_save_matrix(save_matrix());

      solveur_->set_read_matrix(read_matrix());

    }

  statistics().end_count(STD_COUNTERS::system_solver,-1,0);

  int res = solveur_.resoudre_systeme(mat, b, x);

  statistics().begin_count(STD_COUNTERS::system_solver,statistics().get_last_opened_counter_level()+1);

  return res;

}


EChaine
Une entree dont la source est une chaine de caracteres.
Definition EChaine.h:31

Entree
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42

MD_Vector_composite
Metadata for a distributed composite vector.
Definition MD_Vector_composite.h:38

MD_Vector::valeur
const MD_Vector_base & valeur() const
Definition MD_Vector.h:77

Matrice_Base
Classe Matrice_Base Classe de base de la hierarchie des matrices.
Definition Matrice_Base.h:35

Nom
class Nom Une chaine de caractere pour nommer les objets de TRUST
Definition Nom.h:31

Objet_U::nommer
virtual void nommer(const Nom &)
Donne un nom a l'Objet_U Methode virtuelle a surcharger.
Definition Objet_U.cpp:329

Objet_U::readOn
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
Definition Objet_U.cpp:293

Objet_U::printOn
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
Definition Objet_U.cpp:282

Process::is_parallel
static bool is_parallel()
Definition Process.cpp:110

Process::nproc
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
Definition Process.cpp:104

Process::exit
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455

Solv_AMGX
Definition Solv_AMGX.h:27

Solv_AMG
AMD solver wrapper to switch to the more robust/performant AMG preconditioner on CPU/GPU Nvidia/GPU A...
Definition Solv_AMG.h:28

Solv_AMG::resoudre_systeme
virtual int resoudre_systeme(const Matrice_Base &mat, const DoubleVect &b, DoubleVect &x) override
Definition Solv_AMG.cpp:239

Solv_Petsc_GPU
Definition Solv_Petsc_GPU.h:22

Solv_Petsc
Definition Solv_Petsc.h:47

SolveurSys_base
Definition SolveurSys_base.h:25

SolveurSys_base::read_matrix
bool read_matrix() const
Definition SolveurSys_base.h:67

SolveurSys_base::set_save_matrix
void set_save_matrix(int flag)
Definition SolveurSys_base.h:70

SolveurSys_base::set_read_matrix
void set_read_matrix(bool flag)
Definition SolveurSys_base.h:68

SolveurSys_base::save_matrix
int save_matrix() const
Definition SolveurSys_base.h:69

SolveurSys_base::chaine_lue_
Nom chaine_lue_
Definition SolveurSys_base.h:84

Sortie
Classe de base des flux de sortie.
Definition Sortie.h:52

TRUSTVect::get_md_vector
virtual const MD_Vector & get_md_vector() const
Definition TRUSTVect.h:123