en/v1.9.8/Parallel__io__parameters_8cpp_source.html

/****************************************************************************

* Copyright (c) 2026, CEA

* All rights reserved.

*

* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

*

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*

*****************************************************************************/


#include <Parallel_io_parameters.h>

#include <Param.h>

#include <IJK_Lata_writer.h>

#include <Interprete_bloc.h>

#include <Perf_counters.h>

#include <IJK_tools.h>


Implemente_instanciable(Parallel_io_parameters, "Parallel_io_parameters", Interprete);


// XD Parallel_io_parameters interprete Parallel_io_parameters BRACE Object to handle parallel files in IJK

// XD_CONT discretization


// Reasonable default value for typical machines nowadays

Size_t Parallel_io_parameters::max_block_size_ = (Size_t) 0; // for the moment, deactivate parallel write by default 1024*1024*64;


// 0 means that the number of writing processes will be guessed depending on the

// number of compute processors

int Parallel_io_parameters::nb_writing_processes_ = 0;


Sortie& Parallel_io_parameters::printOn(Sortie& os) const

{

  Objet_U::printOn(os);

  return os;

}


Entree& Parallel_io_parameters::readOn(Entree& is)

{

  return is;

}


Entree& Parallel_io_parameters::interpreter(Entree& is)

{

  int bs_bytes = -1;

  int bs = (int) (max_block_size_ >> 20);

  int n  = nb_writing_processes_;

  Nom ijk_name_write, ijk_name_read;


  Param param(que_suis_je());

  param.ajouter("block_size_bytes", &bs_bytes); // XD_ADD_P entier

  // XD_CONT File writes will be performed by chunks of this size (in bytes). This parameter will not be taken into

  // XD_CONT account if block_size_megabytes has been defined

  param.ajouter("block_size_megabytes", &bs); // XD_ADD_P entier

  // XD_CONT File writes will be performed by chunks of this size (in megabytes). The size should be a multiple of the

  // XD_CONT GPFS block size or lustre stripping size (typically several megabytes)

  param.ajouter("writing_processes", &n); // XD_ADD_P entier

  // XD_CONT This is the number of processes that will write concurrently to the file system (this must be set according

  // XD_CONT to the capacity of the filesystem, set to 1 on small computers, can be up to 64 or 128 on very large

  // XD_CONT systems).

  param.ajouter("bench_ijk_splitting_write", &ijk_name_write); // XD_ADD_P chaine

  // XD_CONT Name of the splitting object we want to use to run a parallel write bench (optional parameter)

  param.ajouter("bench_ijk_splitting_read", &ijk_name_read); // XD_ADD_P chaine

  // XD_CONT Name of the splitting object we want to use to run a parallel read bench (optional parameter)

  param.lire_avec_accolades(is);


  if (bs < 0)

    {

      Cerr << "Error in Parallel_io_parameters::interpreter: block_size is negative" << finl;

      Process::exit();

    }

  if (n < 0)

    n = 0;


  max_block_size_ = (long long) bs * 1024 * 1024;

  if (bs_bytes >= 0)

    max_block_size_ = (long long) bs_bytes; // A buffer smaller than 1MB is usefull only for debugging...


  nb_writing_processes_ = n;


  Cerr << "Parallel_io_parameters: blocksize= " << bs << " MB, nb writing processes= "

       << get_nb_writing_processes() << finl;


  if (ijk_name_write != "??")

    {

      run_bench_write(ijk_name_write);

    }

  if (ijk_name_read != "??")

    {

      run_bench_read(ijk_name_read);

    }

  return is;

}


int Parallel_io_parameters::get_nb_writing_processes()

{

  if (nb_writing_processes_ <= 0)

    {

      // Determine automatically:

      // Reasonable default value for the number of writing processes (one per node, but no more than 16)

      // Assume 24 cores per node...

      int n = Process::nproc() / 24;

      if (n > 16)

        n = 16;

      if (n == 0)

        n = 1;

      return n;

    }

  return nb_writing_processes_;

}


void Parallel_io_parameters::run_bench_write(const Nom& ijk_splitting_name)

{

  // Get the mesh:

  const Domaine_IJK& splitting = ref_cast(Domaine_IJK, Interprete_bloc::objet_global(ijk_splitting_name));

  // Build a velocity field and a scalar field:

  IJK_Field_double vx, vy, vz;

  vx.allocate(splitting, Domaine_IJK::FACES_I,0);

  vy.allocate(splitting, Domaine_IJK::FACES_J,0);

  vz.allocate(splitting, Domaine_IJK::FACES_K,0);


  set_field_data(vx,Nom("x*0.9+y*0.09*0.001+z*0.009"));

  set_field_data(vy,Nom("1.+x*0.9+y*0.09+z*0.009"));

  set_field_data(vz,Nom("-1.+x*0.9+y*0.09+z*0.009"));


  dumplata_header("test.lata", vx);

  dumplata_newtime("test.lata", 0.);


  statistics().set_nb_time_steps_elapsed(0);

  statistics().create_custom_counter("Parallel_io benchmark",1,"IJK");

  statistics().begin_count("Parallel_io benchmark",statistics().get_last_opened_counter_level()+1);

  dumplata_vector("test.lata", "VELOCITY", vx, vy, vz, 1);

  double t = statistics().get_time_since_last_open("Parallel_io benchmark");

  statistics().end_count("Parallel_io benchmark");

  double sz = (double) (splitting.get_nb_elem_tot(DIRECTION_I)+1)

              * (splitting.get_nb_elem_tot(DIRECTION_J)+1)

              * (splitting.get_nb_elem_tot(DIRECTION_K)+1)

              * 3 * sizeof(float);

  Nom bw = (t==0)? Nom("infty") : Nom(sz/1024/1024/1024/t);

  Cerr << "Parallel_io_parameters benchmark write: data_size= " << sz/1024/1024/1024

       << " GB. Time= " << t << " s. Bandwidth= "

       << bw << " GB/s.(x3)" << finl;

}


double max_val_abs_ijk(const IJK_Field_double& residu,const IJK_Field_double& x)

{

  const int ni = residu.ni();

  const int nj = residu.nj();

  const int nk = residu.nk();

  double m = 0.;

  for (int k = 0; k < nk; k++)

    {

      for (int j = 0; j < nj; j++)

        {

          for (int i = 0; i < ni; i++)

            {

              m = std::fmax(fabs(residu(i,j,k)-x(i,j,k)), m);

            }

        }

    }

  m = Process::mp_max(m);

  return m;

}


void Parallel_io_parameters::run_bench_read(const Nom& ijk_splitting_name)

{

  // Get the mesh:

  const Domaine_IJK& splitting = ref_cast(Domaine_IJK, Interprete_bloc::objet_global(ijk_splitting_name));

  // Build a velocity field and a scalar field:

  IJK_Field_double vx, vy, vz;

  vx.allocate(splitting, Domaine_IJK::FACES_I,0);

  vy.allocate(splitting, Domaine_IJK::FACES_J,0);

  vz.allocate(splitting, Domaine_IJK::FACES_K,0);


  vx.data() = 1e9;

  vy.data() = 1e9;

  vz.data() = 1e9;


  statistics().set_nb_time_steps_elapsed(0);

  statistics().create_custom_counter("Parallel_io benchmark_read",1,"IJK");

  statistics().begin_count("Parallel_io benchmark_read",statistics().get_last_opened_counter_level()+1);

  lire_dans_lata("test.lata", 1 /* timestep */,

                 splitting.le_nom(),

                 "VELOCITY", vx, vy, vz);

  double t = statistics().get_time_since_last_open("Parallel_io benchmark_read");

  statistics().end_count("Parallel_io benchmark_read");


  double sz = (double) (splitting.get_nb_elem_tot(DIRECTION_I)+1)

              * (splitting.get_nb_elem_tot(DIRECTION_J)+1)

              * (splitting.get_nb_elem_tot(DIRECTION_K)+1)

              * 3 * sizeof(float);

  Nom bw = (t==0)? Nom("infty") : Nom(sz/1024/1024/1024/t);

  Cerr << "Parallel_io_parameters benchmark read: data_size= " << sz/1024/1024/1024

       << " GB. Time= " << t << " s. Bandwidth= " << bw << " GB/s.(x3)" << finl;


  // Check values:

  IJK_Field_double vx2, vy2, vz2;

  vx2.allocate(splitting, Domaine_IJK::FACES_I,0);

  vy2.allocate(splitting, Domaine_IJK::FACES_J,0);

  vz2.allocate(splitting, Domaine_IJK::FACES_K,0);


  set_field_data(vx2,Nom("x*0.9+y*0.09*0.001+z*0.009"));

  set_field_data(vy2,Nom("1.+x*0.9+y*0.09+z*0.009"));

  set_field_data(vz2,Nom("-1.+x*0.9+y*0.09+z*0.009"));


  double delta;


  delta = max_val_abs_ijk(vx,vx2);

  Cerr << "L2 Norm of difference on vx: " << delta << finl;

  delta = max_val_abs_ijk(vy,vy2);

  Cerr << "L2 Norm of difference on vy: " << delta << finl;

  delta = max_val_abs_ijk(vz,vz2);

  Cerr << "L2 Norm of difference on vz: " << delta << finl;

}


Domaine_IJK
This class encapsulates all the information related to the eulerian mesh for TrioIJK.
Definition Domaine_IJK.h:47

Domaine_IJK::get_nb_elem_tot
int get_nb_elem_tot(int direction) const
Returns the total (global) number of mesh cells in requested direction.
Definition Domaine_IJK.h:269

Domaine_IJK::FACES_J
@ FACES_J
Definition Domaine_IJK.h:53

Domaine_IJK::FACES_K
@ FACES_K
Definition Domaine_IJK.h:53

Domaine_IJK::FACES_I
@ FACES_I
Definition Domaine_IJK.h:53

Domaine_base::le_nom
const Nom & le_nom() const override
Donne le nom de l'Objet_U Methode a surcharger : renvoie "neant" dans cette implementation.
Definition Domaine_base.h:53

Entree
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42

IJK_Field_local_template::ni
int ni() const
Definition IJK_Field_local_template.h:167

IJK_Field_local_template::nj
int nj() const
Definition IJK_Field_local_template.h:168

IJK_Field_local_template::data
_TYPE_ARRAY_ & data()
Definition IJK_Field_local_template.h:178

IJK_Field_local_template::nk
int nk() const
Definition IJK_Field_local_template.h:169

IJK_Field_template::allocate
void allocate(const Domaine_IJK &d, Domaine_IJK::Localisation l, int ghost_size, int additional_k_layers=0, int nb_compo=1, const Nom &name=Nom(), bool external_storage=false, int monofluide=0, double rov=0., double rol=0., int use_inv_rho_in_pressure_solver=0)
Definition IJK_Field_template.tpp:562

Interprete_bloc::objet_global
static Objet_U & objet_global(const Nom &nom)
cherche l'objet demande dans l'Interprete_bloc courant (Interprete_bloc::interprete_courant()) et dan...
Definition Interprete_bloc.cpp:262

Interprete
Classe de base des objets "interprete".
Definition Interprete.h:38

Nom
class Nom Une chaine de caractere pour nommer les objets de TRUST
Definition Nom.h:31

Objet_U::Entree
friend class Entree
Definition Objet_U.h:76

Objet_U::que_suis_je
const Nom & que_suis_je() const
renvoie la chaine identifiant la classe.
Definition Objet_U.cpp:104

Objet_U::readOn
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
Definition Objet_U.cpp:293

Objet_U::printOn
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
Definition Objet_U.cpp:282

Parallel_io_parameters
Definition Parallel_io_parameters.h:22

Parallel_io_parameters::get_nb_writing_processes
static int get_nb_writing_processes()
Definition Parallel_io_parameters.cpp:99

Parallel_io_parameters::run_bench_write
static void run_bench_write(const Nom &ijk_splitting)
Definition Parallel_io_parameters.cpp:116

Parallel_io_parameters::run_bench_read
static void run_bench_read(const Nom &ijk_splitting)
Definition Parallel_io_parameters.cpp:170

Parallel_io_parameters::interpreter
Entree & interpreter(Entree &) override
Definition Parallel_io_parameters.cpp:47

Parallel_io_parameters::nb_writing_processes_
static int nb_writing_processes_
Definition Parallel_io_parameters.h:42

Parallel_io_parameters::max_block_size_
static Size_t max_block_size_
Definition Parallel_io_parameters.h:37

Param
Helper class to factorize the readOn method of Objet_U classes.
Definition Param.h:112

Param::ajouter
void ajouter(const char *keyword, const int *value, Param::Nature nat=Param::OPTIONAL)
Register an integer parameter.
Definition Param.cpp:364

Param::lire_avec_accolades
int lire_avec_accolades(Entree &is)
Alias of lire_avec_accolades_depuis.
Definition Param.h:577

Process::mp_max
static double mp_max(double)
Definition Process.cpp:376

Process::nproc
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
Definition Process.cpp:104

Process::exit
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455

Sortie
Classe de base des flux de sortie.
Definition Sortie.h:52