17#include <Matrice_Morse.h>
19#include <communications.h>
20#include <Perf_counters.h>
43void Solv_AMGX::initialize()
45 if (amgx_initialized())
return;
46 Nom AmgXmode =
"dDDI";
53 Cerr <<
"Initializing Amgx and reading the " <<
config() <<
" file." << finl;
55 SolveurAmgX_.initialize(PETSC_COMM_WORLD, AmgXmode.
getString(),
config().getString());
56 Cout <<
"[AmgX] Time to initialize: " << statistics().
compute_time(start) << finl;
62void Solv_AMGX::Create_objects(
const Matrice_Morse& mat_morse,
int blocksize)
67 Cerr <<
"Read_matrix not supported on GPU yet." << finl;
71 if (MatricePetsc_ !=
nullptr) MatDestroy(&MatricePetsc_);
73 Create_MatricePetsc(MatricePetsc_, mataij_, mat_morse);
75 petscToCSR(MatricePetsc_, SolutionPetsc_, SecondMembrePetsc_);
76 Cout <<
"[AmgX] Time to create CSR pointers: " << statistics().
compute_time(start) << finl;
77 statistics().
begin_count(STD_COUNTERS::gpu_copytodevice,statistics().get_last_opened_counter_level()+1);
79 double* values_device;
80 cudaMalloc((
void**)&values_device, nNz *
sizeof(
double));
81 cudaMemcpy(values_device, values, nNz *
sizeof(
double), cudaMemcpyHostToDevice);
83 SolveurAmgX_.setA(nRowsGlobal, nRowsLocal, nNz, rowOffsets, colIndices, values_device,
nullptr);
85 Cout <<
"[AmgX] Time to set matrix (copy+setup) on GPU: " << statistics().
get_time_since_last_open(STD_COUNTERS::gpu_copytodevice) << finl;
86 statistics().
end_count(STD_COUNTERS::gpu_copytodevice, 1,
static_cast<int>(
sizeof(
int) * (nRowsLocal + nNz) +
sizeof(
double) * nNz));
89void Solv_AMGX::Create_vectors(
const DoubleVect& b)
94void Solv_AMGX::Update_vectors(
const DoubleVect& secmem, DoubleVect& solution)
106PetscErrorCode Solv_AMGX::petscToCSR(Mat& A, Vec& lhs_petsc, Vec& rhs_petsc)
108 PetscFunctionBeginUser;
113 PetscErrorCode ierr = MatGetType(A, &type);
117 if (std::strcmp(type, MATSEQAIJ) == 0)
122 else if (std::strcmp(type, MATMPIAIJ) == 0)
125 if (localA!=
nullptr) MatDestroy(&localA);
126 ierr = MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &localA);
131 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG,
"Mat type %s is not supported!\n", type);
135 ierr = MatGetRowIJ(localA, 0, PETSC_FALSE, PETSC_FALSE, &nRowsLocal, &rowOffsets, &colIndices, &done);
139 ierr = MatSeqAIJGetArray(localA, &values);
151 ierr = MPI_Allreduce(&nRowsLocal, &nRowsGlobal, 1, MPI_INT, MPI_SUM, PETSC_COMM_WORLD);
155 nNz = rowOffsets[nRowsLocal];
156 PetscFunctionReturn(0);
159void Solv_AMGX::Update_matrix(Mat& MatricePetsc,
const Matrice_Morse& mat_morse)
162 statistics().
begin_count(STD_COUNTERS::gpu_copytodevice,statistics().get_last_opened_counter_level()+1);
163 SolveurAmgX_.updateA(nRowsLocal, nNz, values);
164 Cout <<
"[AmgX] Time to update matrix (copy+resetup) on GPU: " << statistics().
get_time_since_last_open(STD_COUNTERS::gpu_copytodevice) << finl;
165 statistics().
end_count(STD_COUNTERS::gpu_copytodevice, 1,
sizeof(
double)*nNz);
169bool Solv_AMGX::detect_new_stencil(
const Matrice_Morse& mat_morse)
172 cudaGetDeviceCount(&num_devices);
176 Cout <<
"[AmgX] In Solv_AMGX::check_stencil same_stencil=true cause bug in SolveurAmgX_::updateA on multi-GPU (ToDo: fix by switching to CSR interface)!" << finl;
182 const auto& tab1 = mat_morse.
get_tab1();
183 const auto& tab2 = mat_morse.
get_tab2();
184 const auto& coeff = mat_morse.
get_coeff();
185 const auto& renum_array =
renum_;
186 int new_stencil = 0, RowLocal = 0;
188 for (
int i = 0; i < tab1.size_array() - 1; i++)
193 for (
auto k = tab1(i) - 1; k < tab1(i + 1) - 1; k++)
194 if (coeff(k) != 0) nnz_row++;
195 if (nnz_row != rowOffsets[RowLocal + 1] - rowOffsets[RowLocal])
197 Journal() <<
"Provisoire: Number of non-zero on GPU will change from " << rowOffsets[RowLocal + 1] - rowOffsets[RowLocal] <<
" to " << nnz_row <<
" on row " << RowLocal << finl;
203 for (
auto k = tab1(i) - 1; k < tab1(i + 1) - 1; k++)
208 auto col = renum_array[tab2(k) - 1];
211 for (
auto kk = rowOffsets[RowLocal]; kk < rowOffsets[RowLocal + 1]; kk++)
213 if (colIndices[kk] == col)
215 values[kk] = coeff(k);
222 Journal() <<
"Provisoire: mat_morse(" << RowGlobal <<
"," << col <<
")!=0 new on GPU " << finl;
232 new_stencil =
mp_max(new_stencil);
233 Cout <<
"[AmgX] Time to check stencil: " << statistics().
compute_time(start) << finl;
238int Solv_AMGX::solve(ArrOfDouble& residu)
241 computeOnTheDevice(
lhs_);
242 statistics().
begin_count(STD_COUNTERS::gpu_library,statistics().get_last_opened_counter_level()+1);
244 SolveurAmgX_.solve(addrOnDevice(
lhs_), addrOnDevice(
rhs_),
static_cast<int>(nRowsLocal), seuil_);
245 statistics().
end_count(STD_COUNTERS::gpu_library);
246 Cout <<
"[AmgX] Time to solve system on GPU: " << statistics().
get_total_time(STD_COUNTERS::gpu_library) << finl;
247 return nbiter(residu);
250int Solv_AMGX::nbiter(ArrOfDouble& residu)
253 SolveurAmgX_.getIters(nbiter);
255 envoyer_broadcast(nbiter, 0);
258 SolveurAmgX_.getResidual(0, residu(0));
259 if (nbiter>0) SolveurAmgX_.getResidual(nbiter - 1, residu(nbiter));
Class defining operators and methods for all reading operation in an input flow (file,...
Classe Matrice_Morse Represente une matrice M (creuse), non necessairement carree.
const auto & get_tab2() const
const auto & get_tab1() const
const auto & get_coeff() const
const std::string & getString() const
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
std::chrono::time_point< clock > time_point
void begin_count(const STD_COUNTERS &std_cnt, int counter_lvl=-100000)
double get_time_since_last_open(const STD_COUNTERS &name)
Give as a double the time (in second) elapsed in the operation tracked by the standard counter call n...
double get_total_time(const STD_COUNTERS &name)
Give as a double the total time (in second) elapsed in the operation tracked by the standard counter ...
double compute_time(time_point start)
return time since start in seconds
time_point start_clock()
Start a clock, return a time_point, not a double.
void end_count(const std::string &custom_count_name, int count_increment=1, long int quantity_increment=0)
End the count of a counter and update the counter values.
static double mp_max(double)
static Sortie & Journal(int message_level=0)
Renvoie un objet statique de type Sortie qui sert de journal d'evenements.
static void abort()
Routine de sortie de Trio-U sur une erreur abort().
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
public_for_cuda void Update_lhs_rhs(const DoubleVect &b, DoubleVect &x)
void Create_lhs_rhs_onDevice()
void Update_solution(DoubleVect &x)
Classe de base des flux de sortie.