16#include <EcritureLectureSpecial.h>
17#include <Entree_Fichier_base.h>
18#include <Entree_Fichier_base.h>
19#include <Discretisation_base.h>
20#include <EcrFicCollecteBin.h>
21#include <LecFicDiffuseBin.h>
22#include <communications.h>
23#include <FichierHDFPar.h>
24#include <Probleme_base.h>
25#include <Sortie_Nulle.h>
26#include <Save_Restart.h>
27#include <TRUST_2_PDI.h>
28#include <Ecrire_YAML.h>
31#include <Perf_counters.h>
32#define CHECK_ALLOCATE 0
41inline int version_format_sauvegarde() {
return 184; }
44inline int version_format_PDI() {
return 196; }
49long int Save_Restart::File_size_=0;
50int Save_Restart::Bad_allocate_=1;
51int Save_Restart::Nb_pb_total_=0;
52int Save_Restart::Num_pb_=1;
74 const int canal = 2007;
77 envoyer(Bad_allocate_,p,canal);
79 recevoir(Bad_allocate_,0,canal);
84 if (Num_pb_==Nb_pb_total_)
89 Cerr <<
"***Error*** " << error_ << finl;
90 Cerr <<
"A xyz backup was made because you do not have enough disk space" << finl;
91 Cerr <<
"to continue the current calculation. Free up disk space and" << finl;
92 Cerr <<
"restart the calculation thanks to the backup just made." << finl;
115 Fichier_File_size+=
"_File_size";
116 const char *file = Fichier_File_size;
119 off_t taille = off_t(size+size);
121 int fichier = open(file, O_WRONLY | O_CREAT, 0666);
127 error_+=strerror(errno);
133 if (posix_fallocate(fichier, 0, taille) != 0)
135 error_=
"Allocation of ";
138 error_+=strerror(errno);
157 Nom nom_fich_xyz(
".xyz");
161 ifstream fichier(nom_fich_xyz);
162 fichier.seekg(0, std::ios_base::end);
163 File_size_ += fichier.tellg();
165 remove(nom_fich_xyz);
171 for(
int i=0; i<pb_base_->nombre_d_equations(); i++)
172 pb_base_->equation(i).init_save_file();
175void Save_Restart::setTinitFromLastTime(
double last_time)
178 pb_base_->schema_temps().set_temps_courant() = last_time;
180 if (pb_base_->schema_temps().temps_init() > -DMAXFLOAT)
182 Cerr <<
"tinit was defined in .data file to " << pb_base_->schema_temps().temps_init() <<
". The value is fixed to " << last_time <<
" accroding to resume_last_time_option" << finl;
184 pb_base_->schema_temps().set_temps_init() = last_time;
185 pb_base_->schema_temps().set_temps_precedent() = last_time;
186 Cerr <<
"==================================================================================================" << finl;
187 Cerr <<
"In the backup file, we find the last time: " << last_time <<
" and read the fields." << finl;
190void Save_Restart::checkVersion(
const Nom& nomfic)
194 Cerr <<
"The version of the format backup/resumption is not the same in the resumption files " << nomfic << finl;
197 if (restart_version_ > version_format_sauvegarde())
199 Cerr <<
"The format " << restart_version_ <<
" of the resumption file " << nomfic <<
" is posterior" << finl;
200 Cerr <<
"to the format " << version_format_sauvegarde() <<
" recognized by this version of TRUST." << finl;
201 Cerr <<
"Please use a more recent version." << finl;
206 Cerr <<
"The version of the resumption format of file " << nomfic <<
" is " << restart_version_ << finl;
209void Save_Restart::prepare_PDI_restart(
int resume_last_time)
212 TRUST_2_PDI pdi_interface;
214 int last_iteration = -1;
218 if (resume_last_time)
221 pdi_interface.
prepareRestart(restartComm_, last_iteration, tinit, 1 );
224 setTinitFromLastTime(tinit);
229 tinit = pb_base_->schema_temps().temps_init();
230 pdi_interface.
prepareRestart(restartComm_, last_iteration, tinit, 0 );
236 statistics().begin_count(STD_COUNTERS::backup_file,statistics().get_last_opened_counter_level()+1);
237 Nom nom_fich_xyz(
"");
242 nom_fich_xyz += pb_base_->
le_nom();
243 nom_fich_xyz +=
".xyz";
244 Cerr <<
"Creation of " << nom_fich_xyz <<
" (" <<
EcritureLectureSpecial::get_Output() <<
") for resumption of a calculation with a different number of processors." << finl;
248 nom_fich_xyz =
".xyz";
252 ficsauv_->ouvrir(nom_fich_xyz);
255 ficsauv_.valeur() <<
"format_sauvegarde:" << finl << version_format_sauvegarde() << finl;
258 int bytes = pb_base_->sauvegarder(ficsauv_.valeur());
262 ficsauv_.valeur() <<
Nom(
"fin");
263 (ficsauv_.valeur()).flush();
264 (ficsauv_.valeur()).syncfile();
266 Cout <<
"[IO] " << statistics().get_time_since_last_open(STD_COUNTERS::backup_file) <<
" s to write xyz file." << finl;
267 statistics().end_count(STD_COUNTERS::backup_file,1,bytes);
271void Save_Restart::lire_pdi_sauvegarde_reprise(
Entree& is,
Motcle& motlu,
Nom& restart_file_name,
Nom& yaml_fname)
280 compris[1]=
"checkpoint_fname";
281 compris[2]=
"yaml_fname";
286 ind = compris.rang(motlu);
288 is >> restart_file_name;
291 Cerr <<
"[Save_Restart] lire_pdi_sauvegarde_reprise :: You have provided your own yaml file to initialize PDI ! " << finl;
296 if (!test.ouvrir(yaml_fname))
298 Cerr <<
"[Save_Restart] lire_pdi_sauvegarde_reprise :: Error! The provided file " << yaml_fname <<
" does not exist " << finl;
304 Cerr <<
"[Save_Restart] lire_pdi_sauvegarde_reprise :: " << motlu <<
" is not understood. Keywords are:" << finl;
305 Cerr << compris << finl;
312 Cerr <<
"[Save_Restart] lire_pdi_sauvegarde_reprise :: " << motlu <<
" is not understood. Expected { :" << finl;
321void Save_Restart::lire_reprise(
Entree& is,
Motcle& motlu)
323 int resume_last_time = (motlu ==
"resume_last_time" ? 1 : 0);
328 if ((format_rep !=
"formatte") && (format_rep !=
"binaire") && (format_rep !=
"xyz") && (format_rep !=
"single_hdf") && (format_rep !=
"pdi") && (format_rep !=
"pdi_expert"))
330 Cerr <<
"Restarting calculation... : keyword " << format_rep <<
" not understood. Waiting for:" << finl << motlu <<
" formatte|binaire|xyz|single_hdf|pdi|pdi_expert Filename" << finl;
335 if (pb_base_->discretisation().is_PolyMAC_MPFA() && format_rep !=
"binaire")
337 Cerr <<
"Error in Save_Restart::" << __func__ <<
" !! " << finl;
338 Cerr <<
"Only the binary format is currently supported to resume a simulation with the discretization " << pb_base_->discretisation().que_suis_je() <<
" ! " << finl;
339 Cerr <<
"Please update your data file and use a .sauv file !" << finl;
345 if( format_rep ==
"pdi_expert" )
347 lire_pdi_sauvegarde_reprise(is, motlu, restart_filename_, nom_yaml);
351 is >> restart_filename_;
356 if(format_rep ==
"pdi")
358 std::string yaml_fname = nom_yaml.
getString();
359 if(yaml_fname ==
"??")
361 Ecrire_YAML yaml_file;
362 yaml_file.
add_pb_base(pb_base_, restart_filename_);
363 yaml_fname =
"restart_" + pb_base_->le_nom().getString() +
".yml";
369 prepare_PDI_restart(resume_last_time);
372 pb_base_->reprendre(useless);
376 else if(format_rep ==
"single_hdf")
379 Cerr <<
"==============================================================================" << finl;
380 Cerr <<
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << finl;
381 Cerr <<
"WARNING::you are using a deprecated backup file format. Please switch to PDI." << finl;
382 Cerr <<
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << finl;
383 Cerr <<
"==============================================================================" << finl;
385 if (!test.ouvrir(restart_filename_))
387 Cerr <<
"Error! " << restart_filename_ <<
" file not found ! " << finl;
390 FichierHDFPar fic_hdf;
391 Entree_Brute input_data;
392 fic_hdf.
open(restart_filename_,
true);
397 double last_time = -1;
398 last_time = get_last_time(input_data);
399 setTinitFromLastTime(last_time);
404 if (motlu==
"format_sauvegarde:")
406 input_data >> restart_version_;
407 checkVersion(restart_filename_);
411 Cerr<<
"This .sauv file is too old and the format is not supported anymore."<<finl;
417 pb_base_->reprendre(input_data);
421 OWN_PTR(Entree_Fichier_base) fic;
422 if (format_rep ==
"formatte")
423 fic.typer(
"LecFicDistribue");
424 else if (format_rep ==
"binaire")
425 fic.typer(
"LecFicDistribueBin");
426 else if (format_rep ==
"xyz")
431 fic->ouvrir(restart_filename_);
434 Cerr <<
"Error during the opening of the restart file : " << restart_filename_ << finl;
439 if (resume_last_time)
442 double last_time = -1.;
443 last_time = get_last_time(fic);
444 setTinitFromLastTime(last_time);
447 fic->ouvrir(restart_filename_);
458 fic.valeur() >> motlu;
459 if (motlu !=
"FORMAT_SAUVEGARDE:")
461 if (format_rep ==
"xyz")
465 fic->ouvrir(restart_filename_);
466 restart_version_ = 151;
470 Cerr <<
"-------------------------------------------------------------------------------------" << finl;
471 Cerr <<
"The resumption file " << restart_filename_ <<
" can not be read by this version of TRUST" << finl;
472 Cerr <<
"which is a later version than 1.5. Indeed, the numbering of the faces have changed" << finl;
473 Cerr <<
"and it would produce an erroneous resumption. If you want to use this version," << finl;
474 Cerr <<
"you must do a resumption of the file .xyz saved during the previous calculation" << finl;
475 Cerr <<
"because this file is independent of the numbering of the faces." << finl;
476 Cerr <<
"The next backup will be made in a format compatible with the new" << finl;
477 Cerr <<
"numbering of the faces and you can then redo classical resumptions." << finl;
478 Cerr <<
"-------------------------------------------------------------------------------------" << finl;
484 fic.valeur() >> restart_version_;
485 checkVersion(restart_filename_);
492 restart_done_ =
true;
493 restart_in_progress_ =
true;
499void Save_Restart::lire_sauvegarde(
Entree& is,
Motcle& motlu)
502 if (motlu ==
"sauvegarde_simple")
503 simple_restart_ =
true;
504 is >> checkpoint_format_;
505 if ((Motcle(checkpoint_format_) !=
"binaire") && (Motcle(checkpoint_format_) !=
"formatte") &&
506 (Motcle(checkpoint_format_) !=
"xyz") && (Motcle(checkpoint_format_) !=
"single_hdf") &&
507 (Motcle(checkpoint_format_) !=
"pdi") && (Motcle(checkpoint_format_) !=
"pdi_expert") )
509 checkpoint_filename_ = checkpoint_format_;
510 checkpoint_format_ =
"binaire";
514 if( Motcle(checkpoint_format_) ==
"pdi_expert" )
516 lire_pdi_sauvegarde_reprise(is, motlu, checkpoint_filename_, yaml_fname_);
517 checkpoint_format_ =
"pdi";
521 if( Motcle(checkpoint_format_) ==
"single_hdf" )
523 Cerr <<
"==============================================================================" << finl;
524 Cerr <<
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << finl;
525 Cerr <<
"WARNING::you are using a deprecated backup file format. Please switch to PDI." << finl;
526 Cerr <<
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << finl;
527 Cerr <<
"==============================================================================" << finl;
529 is >> checkpoint_filename_;
537 if (pb_base_->discretisation().is_PolyMAC_MPFA())
539 Cerr <<
"Problem " << pb_base_->le_nom() <<
" with the discretization "
540 << pb_base_->discretisation().que_suis_je() <<
" => EcritureLectureSpecial = 0 !" << finl;
543 checkpoint_format_ =
"binaire";
545 checkpoint_filename_ +=
"_";
546 checkpoint_filename_ += pb_base_->le_nom();
547 checkpoint_filename_ +=
".sauv";
551 if ((motlu ==
"reprise") || (motlu ==
"resume_last_time"))
552 lire_reprise(is, motlu);
553 else if (motlu ==
"sauvegarde" || motlu ==
"sauvegarde_simple")
554 lire_sauvegarde(is, motlu);
555 else if (motlu ==
"}")
559 Cerr <<
"Error in Save_Restart::lire_sauvegarde_reprise" << finl;
560 Cerr <<
"We expected } instead of " << motlu <<
" to mark the end of the data set" << finl;
569 checkpoint_format_ =
"pdi";
571 if ((
Motcle(checkpoint_format_) !=
"binaire") && (
Motcle(checkpoint_format_) !=
"formatte") &&
572 (
Motcle(checkpoint_format_) !=
"xyz") && (
Motcle(checkpoint_format_) !=
"pdi") &&
573 (
Motcle(checkpoint_format_) !=
"single_hdf"))
575 Cerr <<
"Error of backup format ! We expected formatte, binaire, xyz, or pdi/pdi_expert (which replace single_hdf). single_hdf format is still available but is deprecated, please use pdi instead." << finl;
579 if (pb_base_->schema_temps().temps_init() <= -DMAXFLOAT)
581 pb_base_->schema_temps().set_temps_init() = 0;
582 pb_base_->schema_temps().set_temps_courant() = 0;
592 if (stat(fichier, &f))
602 int pdi_format =
Motcle(checkpoint_format_) ==
"pdi";
607 std::string yaml_fname = yaml_fname_.getString();
608 if(yaml_fname ==
"??")
611 yaml_fname =
"save_" + pb_base_->le_nom().getString() +
".yml";
612 yaml_file.
add_pb_base(pb_base_, checkpoint_filename_);
618 if(!config_file_created_)
621 IntTab nodeRanks(nb_proc);
623 envoyer_gather(nodeRanks, nodeRanks, 0);
629 pdi_interface.
write(
"nb_proc", &nb_proc);
630 pdi_interface.
trigger(
"InitConfig");
633 pdi_interface.
write(
"nb_nodes", &nb_nodes);
634 pdi_interface.
trigger(
"WriteConfig");
636 pdi_interface.
trigger(
"WriteNodeRanks");
639 config_file_created_ =
true;
648 int non_const_sr = simple_restart_;
650 std::string
event =
"init_" + pb_base_->le_nom().getString();
655 int version = version_format_PDI();
656 pdi_interface.
write(
"version", &version);
657 ficsauv_created_ =
true;
660 else if (!ficsauv_ && !osauv_hdf_)
663 if (
Motcle(checkpoint_format_) ==
"formatte")
665 ficsauv_.typer(
"EcrFicCollecte");
671 ficsauv_->set_64b(
false);
672 ficsauv_->ouvrir(checkpoint_filename_);
673 ficsauv_->setf(ios::scientific);
675 else if (
Motcle(checkpoint_format_) ==
"binaire")
677 ficsauv_.typer(
"EcrFicCollecteBin");
678 ficsauv_->set_64b(
false);
679 ficsauv_->ouvrir(checkpoint_filename_);
681 else if (
Motcle(checkpoint_format_) ==
"xyz")
684 ficsauv_->ouvrir(checkpoint_filename_);
686 else if (
Motcle(checkpoint_format_) ==
"single_hdf")
690 Cerr <<
"Error in Save_Restart::sauver() " << finl;
691 Cerr <<
"The format for the backup file must be either binary or formatted or pdi/pdi_expert (which replace single_hdf). single_hdf is still available but is deprecated, please use pdi instead." << finl;
692 Cerr <<
"But it is :" << checkpoint_format_ << finl;
696 if (
Motcle(checkpoint_format_) ==
"xyz")
699 ficsauv_.valeur() <<
"format_sauvegarde:" << finl << version_format_sauvegarde() << finl;
701 else if ((
Motcle(checkpoint_format_) ==
"single_hdf"))
702 *osauv_hdf_ <<
"format_sauvegarde:" << finl << version_format_sauvegarde() << finl;
704 ficsauv_.valeur() <<
"format_sauvegarde:" << finl << version_format_sauvegarde() << finl;
718 bytes = pb_base_->sauvegarder(useless);
721 std::string f_event =
"local_backup_" + pb_base_->le_nom().getString();
722 pdi_interface.
trigger(f_event);
726 std::string s_event =
"global_backup_" + pb_base_->le_nom().getString();
727 pdi_interface.
trigger(s_event);
730 std::string t_event = pb_base_->le_nom().getString() +
"_get_types";
731 pdi_interface.
trigger(t_event);
735 else if (
Motcle(checkpoint_format_) ==
"single_hdf")
736 bytes = pb_base_->sauvegarder(*osauv_hdf_);
738 bytes = pb_base_->sauvegarder(ficsauv_.valeur());
743 if (simple_restart_ && !pdi_format)
745 if (
Motcle(checkpoint_format_) ==
"xyz")
748 ficsauv_.valeur() <<
Nom(
"fin");
749 (ficsauv_.valeur()).flush();
750 (ficsauv_.valeur()).syncfile();
752 else if (
Motcle(checkpoint_format_) ==
"single_hdf")
754 *osauv_hdf_ <<
Nom(
"fin");
756 fic_hdf.
create(checkpoint_filename_);
764 ficsauv_.valeur() <<
Nom(
"fin");
765 (ficsauv_.valeur()).flush();
778 else if (!simple_restart_ && (ficsauv_ || osauv_hdf_) )
780 if (
Motcle(checkpoint_format_) ==
"xyz")
783 ficsauv_.valeur() <<
Nom(
"fin");
784 (ficsauv_.valeur()).flush();
785 (ficsauv_.valeur()).syncfile();
787 else if (
Motcle(checkpoint_format_) ==
"single_hdf")
789 *osauv_hdf_ <<
Nom(
"fin");
791 fic_hdf.
create(checkpoint_filename_);
799 ficsauv_.valeur() <<
Nom(
"fin");
800 (ficsauv_.valeur()).flush();
807 if (
Motcle(checkpoint_format_) !=
"xyz")
812 Cerr <<
"As saving .xyz file disabled since 1.9.7, add into your datafile \"EcritureLectureSpecial 1\" to enable it again if wanted." << finl;
815 for(
int i=0; i<pb_base_->nombre_d_equations(); i++)
816 pb_base_->equation(i).close_save_file();
int get_node_id() const
Retrieve ID of my numa node.
int get_number_of_nodes() const
classe Ecrire_YAML Use this to generate a yaml file that will then be read by the PDI library (for ch...
void write_checkpoint_file(const std::string &yaml_fname)
Generate the YAML file that will be read for checkpoint.
void add_pb_base(const Probleme_base &pb_base, const Nom &full_file_name)
void write_restart_file(const std::string &yaml_fname)
Generate the YAML file that will be read for restart.
static Nom & get_Output()
Renvoie le mode d'ecriture utilise (pour pouvoir le modifier).
Class defining operators and methods for all reading operation in an input flow (file,...
Parallel collective version of FichierHDF, to be used for all concurrent reading/writing on HDF files...
virtual void create_and_fill_dataset_MW(Nom dataset_basename, Sortie_Brute &sortie)
virtual void read_dataset(Nom dataset_basename, int proc_rank, Entree_Brute &entree)
virtual void open(Nom filename, bool readOnly)
virtual void create(Nom filename)
Cette classe implemente les operateurs et les methodes virtuelles de la classe EFichier de la facon s...
Une chaine de caractere (Nom) en majuscules.
Un tableau d'objets de la classe Motcle.
class Nom Une chaine de caractere pour nommer les objets de TRUST
const std::string & getString() const
const Nom & le_nom() const override
Renvoie *this;.
static const Nom & nom_du_cas()
Renvoie une reference constante vers le nom du cas.
virtual int reprendre(Entree &)
Reprise d'un Objet_U sur un flot d'entree Methode a surcharger.
static const Comm_Group & get_node_group()
Renvoie une reference au groupe sur les noeuds.
classe Probleme_base C'est un Probleme_U qui n'est pas un couplage.
static double mp_min(double)
static double mp_max(double)
static int node_master()
renvoie 1 si on est sur le processeur maitre du noeud numa, 0 sinon.
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
static void barrier()
Synchronise tous les processeurs du groupe courant (attend que tous les processeurs soient arrives a ...
static bool force_single_file(const int ranks, const Nom &filename)
static int me()
renvoie mon rang dans le groupe de communication courant.
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
static int je_suis_maitre()
renvoie 1 si on est sur le processeur maitre du groupe courant (c'est a dire me() == 0),...
bool & reprise_effectuee()
int sauver() const
Ecriture sur fichier en vue d'une reprise (sauvegarde).
void sauver_xyz(int) const
void assoscier_pb_base(const Probleme_base &)
void lire_sauvegarde_reprise(Entree &is, Motcle &motlu)
int allocate_file_size(long int &size) const
Verifie que la place necessaire existe sur le disque dur.
This derived class of Sortie stacks whatever it receives in an internal binary buffer.
Classe derivee de Sortie qui ne sort les donnees nulle part (c'est une poubelle) Classe utilisee dans...
classe TRUST_2_PDI Encapsulation of PDI methods (library used for IO operations). See the website pdi...
static void set_PDI_restart(int r)
void stop_sharing_last_variable()
void write(const std::string &name, const void *data)
void prepareRestart(OWN_PTR(Comm_Group)&nodeGroup, int &last_iteration, double &tinit, int resume_last_time)
Generic method to prepare the restart of a computation.
static void init(std::string IO_config)
static void set_PDI_checkpoint(int c)
static int is_PDI_initialized()
void TRUST_start_sharing(const std::string &name, const void *data)
void trigger(const std::string &event)