TRUST 1.9.8
HPC thermohydraulic platform
Loading...
Searching...
No Matches
Solv_cuDSS.cpp
1/****************************************************************************
2* Copyright (c) 2026, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <Solv_cuDSS.h>
17#include <TRUSTVect.h>
18#include <EChaine.h>
19#include <Motcle.h>
20#include <SFichier.h>
21#include <Comm_Group_MPI.h>
22#include <MD_Vector_std.h>
23#include <MD_Vector_composite.h>
24#include <Device.h>
25#include <Perf_counters.h>
26#include <Array_tools.h>
27#include <TRUSTTrav.h>
28
29#define CUDSS_CALL_AND_CHECK(call, status, msg) \
30 do { \
31 status = call; \
32 if (status != CUDSS_STATUS_SUCCESS) { \
33 printf("Example FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", status); \
34 Process::exit(); \
35 } \
36 } while(0);
37
38Implemente_instanciable_sans_constructeur_ni_destructeur(Solv_cuDSS, "Solv_cuDSS", Solv_Externe);
39// XD cuDSS petsc cuDSS NO_BRACE Solver via cuDSS API
40// XD attr solveur chaine solveur REQ not_set
41// XD attr option_solveur bloc_lecture option_solveur REQ not_set
42
43// printOn
45{
46 s << chaine_lue_;
47 return s;
48}
49
50// Read parameters
52{
53#ifdef cuDSS_
54 Cerr <<"[cuDSS] reading jdd "<< finl;
55 lecture(entree);
56 EChaine is(get_chaine_lue());
57 Motcle accolade_ouverte("{"), accolade_fermee("}");
58 Motcle solver, motlu;
59 is >> solver; // On lit le solveur en premier puis les options du solveur
60 is >> motlu; // On lit l'accolade
61 if (motlu != accolade_ouverte)
62 {
63 Cerr << "[cuDSS] Error while reading the parameters of the solver " << solver << " { ... }" << finl;
64 Cerr << "We expected " << accolade_ouverte << " instead of " << motlu << finl;
65 exit();
66 }
67
68 if (solver==(Motcle)"LU")
69 {
70 std::cout<<"[cuDSS] LU is read, matrix is assumed non symmetric"<<std::endl;
71 mtype=CUDSS_MTYPE_GENERAL;
72 mview=CUDSS_MVIEW_FULL;
73 }
74 else if (solver==(Motcle)"Cholesky")
75 {
76 std::cout<<"[cuDSS] Cholesky is read, matrix is assumed symmetric"<<std::endl;
77 mtype=CUDSS_MTYPE_SYMMETRIC;
78 mview=CUDSS_MVIEW_UPPER;
79 }
80 // Lecture des parametres du solver (LU non symetric, cholesky symmetric)
81 // LU|Cholesky { algo name [impr] }
82 is >> motlu;
83 while (motlu!=accolade_fermee)
84 {
85 if (motlu==(Motcle)"impr")
86 {
87 fixer_limpr(1);
88 }
89 else if (motlu==(Motcle)"algo")
90 {
91 is >> motlu; // Lecture de l'algo
92
93 if (motlu==(Motcle)"0") { reorder_alg = CUDSS_ALG_DEFAULT;}
94 else if (motlu==(Motcle)"1") { reorder_alg = CUDSS_ALG_1;}
95 else if (motlu==(Motcle)"2") { reorder_alg = CUDSS_ALG_2;}
96 else if (motlu==(Motcle)"3") { reorder_alg = CUDSS_ALG_3;}
97 else if (motlu==(Motcle)"4") { reorder_alg = CUDSS_ALG_4;}
98 else if (motlu==(Motcle)"5") { reorder_alg = CUDSS_ALG_5;}
99 else
100 {
101 Cerr << "[cuDSS] algo number "<<motlu<<" unrecognized (0-5) only"<<finl;
103 }
104 Cerr << "[cuDSS] algo number "<<motlu<<" read. "<<finl;
105 Cerr <<"[cuDSS] Note from cuDSS doc: Different values represent different algorithms (for reordering, factorization, etc.) and can lead to significant differences in accuracy and performance. It is currently recommended to use CUDSS_ALG_DEFAULT (0) and only in case accuracy or performance are not sufficient, one can experiment with other values."<<finl;
106
107 }
108 else
109 {
110 Cerr << motlu << "[cuDSS] keyword not recognized for cuDSS solver " << solver << finl;
112 }
113 is >> motlu;
114 }
115#else
116 Process::exit("Sorry, cuDSS solvers not available with this build (cuDSS readon).");
117#endif
118 return entree;
119}
120
121
123{
124 // on relance la lecture .... Necessary for some reasons
125 EChaine recup(org.get_chaine_lue());
126 readOn(recup);
127}
128
130{
131#ifdef cuDSS_
132
133 if (Axb_are_built)
134 {
135 cudssMatrixDestroy(A);
136 cudssMatrixDestroy(b);
137 cudssMatrixDestroy(x);
138 }
139
140 if (solver_is_built)
141 {
142 cudssDataDestroy(handle, solverData);
143 cudssConfigDestroy(solverConfig);
144 cudssDestroy(handle);
145 }
146#endif
147}
148
149#include <Debog.h>
150int Solv_cuDSS::resoudre_systeme(const Matrice_Base& a, const DoubleVect& bvect, DoubleVect& xvect)
151{
152#ifdef cuDSS_
153 if ((nouvelle_matrice()||first_solve))
154 {
155 /* conversion matric base to csr */
156 /*build the csr matrix on host*/
158 const Matrice_Morse& csr = csr_.nb_lignes() ? csr_ : ref_cast(Matrice_Morse, a);
159
160 /* create cudss matrix, vector and solver and size them */
161 /* Does very few things if the matrix has not changed*/
162 Create_objects(csr);
163
164 /* give the device data / csr pointers to the cudss matrix */
165 set_pointers_A(csr);
166
167 /* sizes should never change */
168 assert(a.nb_lignes() == n);
169 assert(bvect.size_totale() == n);
170 assert(xvect.size_totale() == n);
171 }
172
173 /* analysis and facto can be only done when the matrix changes */
174 statistics().begin_count(STD_COUNTERS::gpu_library);
175 if ((nouvelle_matrice()||first_solve))
176 {
177 /* Symbolic factorization x, and b are unused*/
178 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData,
179 A, x, b), status, "cudssExecute for analysis");
180
181 /* Factorization x, and b are unused*/
182 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_FACTORIZATION, solverConfig,
183 solverData, A, x, b), status, "cudssExecute for facto");
184 }
185
186
187 //The pointers to x and b should never change between calls to resoudre
188// assert(x_values_d==const_cast<double*>(xvect.view_rw<1>().data()));
189// assert(b_values_d==const_cast<double*>(bvect.view_ro<1>().data()));
190
191 /* give the device data / csr pointers to the cudss vectors x and b */
192 /* This has to be done at each solve to ensure sync mechanism*/
193 set_pointers_xb(bvect, xvect);
194
195 /*some checks*/
196 /* sizes should never change */
197 assert(a.nb_lignes()==n);
198 assert(bvect.size_totale()==n);
199 assert(xvect.size_totale()==n);
200
201
202 /* Solving */
203 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData,
204 A, x, b), status, "cudssExecute for solve");
205 cudaStreamSynchronize(nullptr); // Important factorization and solve phases are asynchronous
206 statistics().end_count(STD_COUNTERS::gpu_library);
207
208 /*compute error in debug mode */
209#ifndef NDEBUG
210 DoubleVect test(bvect);
211 test*=-1;
212 a.ajouter_multvect(xvect,test);
213 double vrai_residu = mp_norme_vect(test);
214 assert(vrai_residu<1e-5);
215 Cout << "||Ax-b||=" << vrai_residu << finl;
216#endif
217 first_solve = false;
219 return 1;
220#else
221 Process::exit("Sorry, cuDSS solvers not available with this build (resoudre_systeme).");
222 return -1;
223#endif
224}
225
226void Solv_cuDSS::Create_objects(const Matrice_Morse& csr)
227{
228#ifdef cuDSS_
229 /* Create the solver and matrix / vector objects */
230 /* Solver and vectors are created only in the first call */
231 /*Matrix can be destroyed and re-created if nnz changes*/
233 {
234 Process::exit("Sorry, cuDSS is a sequential solver.");
235 }
236
237 /* get dimensions and nnz */
238 /* check that n does not change between solves */
239 csr.set_tab1_int32();
240 int new_n = csr.get_tab1_int32().size_array()-1;
241 int new_nnz = (int)(csr.get_coeff().size_array());
242#ifndef NDEBUG
243 assert(((new_n==n)||(first_solve))); // n should never change between solves
244#endif
245
246
247 //Re-build A only if new nnz is different from old one, possible if matrix changes
248 if((first_solve)||(new_nnz != nnz))
249 {
250 nnz=new_nnz;
251 n = new_n;
252
253 /* destroy if A is built */
254 if (Axb_are_built)
255 cudssMatrixDestroy(A);
256
257 /* create matrix, we give nullptrs*/
258 CUDSS_CALL_AND_CHECK(cudssMatrixCreateCsr(&A, n, n, nnz, nullptr, nullptr,
259 nullptr, nullptr, CUDA_R_32I, CUDA_R_64F, mtype, mview,
260 base), status, "cudssMatrixCreateCsr");
261 }
262
263 if (first_solve)
264 {
265 /* create rhs and vector we give nullptrs*/
266 // n should never change between solves, no need to resize
267
268 CUDSS_CALL_AND_CHECK(cudssMatrixCreateDn(&b, n, nrhs, n, nullptr, CUDA_R_64F,
269 CUDSS_LAYOUT_COL_MAJOR), status, "cudssMatrixCreateDn for b");
270 CUDSS_CALL_AND_CHECK(cudssMatrixCreateDn(&x, n, nrhs, n, nullptr, CUDA_R_64F,
271 CUDSS_LAYOUT_COL_MAJOR), status, "cudssMatrixCreateDn for x");
272
273 Axb_are_built=true;
274
275 /* Create the solver */
276
277 /* Create handle */
278 CUDSS_CALL_AND_CHECK(cudssCreate(&handle), status, "cudssCreate");
279 /* Create config */
280 CUDSS_CALL_AND_CHECK(cudssConfigCreate(&solverConfig), status, "cudssConfigCreate");
281 /* set options to config */
282 CUDSS_CALL_AND_CHECK(cudssConfigSet(solverConfig, CUDSS_CONFIG_REORDERING_ALG,
283 &reorder_alg, sizeof(cudssAlgType_t)), status, "cudssConfigSet");
284 /* create solver data */
285 CUDSS_CALL_AND_CHECK(cudssDataCreate(handle, &solverData), status, "cudssDataCreate");
286
287 solver_is_built = true;
288 }
289#endif
290}
291
292void Solv_cuDSS::set_pointers_A(const Matrice_Morse& csr)
293{
294#ifdef cuDSS_
295 /* get pointers */
296 csr_offsets_d = const_cast<int*>(reinterpret_cast<const int*>(csr.get_tab1_int32().view_ro<1>().data()));
297 csr_columns_d = const_cast<int*>(csr.get_tab2().view_ro<1>().data());
298 csr_values_d = const_cast<double*>(csr.get_coeff().view_ro<1>().data());
299
300 /* give pointers to A*/
301 CUDSS_CALL_AND_CHECK(cudssMatrixSetCsrPointers(A,
302 csr_offsets_d, /*rowStart*/
303 nullptr,/*rowEnd*/
304 csr_columns_d,
305 csr_values_d), status, "cudssMatrixSetCsrPointers")
306#endif
307}
308
309void Solv_cuDSS::set_pointers_xb(const DoubleVect& bvect, DoubleVect& xvect)
310{
311#ifdef cuDSS_
312
313 /* get pointers */
314 x_values_d = const_cast<double*>(xvect.view_wo<1>().data());
315 b_values_d = const_cast<double*>(bvect.view_ro<1>().data());
316
317 /* give pointers to vectors*/
318 CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(x, x_values_d), status, "cudssMatrixSetValues for x");
319 CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(b, b_values_d), status, "cudssMatrixSetValues for b");
320#endif
321}
322
Une entree dont la source est une chaine de caracteres.
Definition EChaine.h:31
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42
Classe Matrice_Base Classe de base de la hierarchie des matrices.
virtual int nb_lignes() const =0
Return local number of lines (=size on the current proc).
virtual DoubleVect & ajouter_multvect(const DoubleVect &x, DoubleVect &r) const
Operation de multiplication-accumulation (saxpy) matrice vecteur.
Classe Matrice_Morse Represente une matrice M (creuse), non necessairement carree.
const auto & get_tab2() const
const IntVect & get_tab1_int32() const
const auto & get_coeff() const
int nb_lignes() const override
Return local number of lines (=size on the current proc).
void set_tab1_int32() const
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
Definition Objet_U.cpp:293
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
Definition Objet_U.cpp:282
static bool is_parallel()
Definition Process.cpp:110
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
void construit_matrice_morse_intermediaire(const Matrice_Base &, Matrice_Morse &)
~Solv_cuDSS() override
int resoudre_systeme(const Matrice_Base &a, const DoubleVect &b, DoubleVect &x, int niter_max) override
Definition Solv_cuDSS.h:41
const Nom & get_chaine_lue() const
bool nouvelle_matrice() const
void fixer_limpr(int l)
void fixer_nouvelle_matrice(bool i)
void lecture(Entree &)
Classe de base des flux de sortie.
Definition Sortie.h:52
_SIZE_ size_array() const
_TYPE_ * data()
_SIZE_ size_totale() const
Definition TRUSTVect.tpp:61