16#include <Solv_cuDSS.h>
21#include <Comm_Group_MPI.h>
22#include <MD_Vector_std.h>
23#include <MD_Vector_composite.h>
25#include <Perf_counters.h>
26#include <Array_tools.h>
29#define CUDSS_CALL_AND_CHECK(call, status, msg) \
32 if (status != CUDSS_STATUS_SUCCESS) { \
33 printf("Example FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", status); \
54 Cerr <<
"[cuDSS] reading jdd "<< finl;
57 Motcle accolade_ouverte(
"{"), accolade_fermee(
"}");
61 if (motlu != accolade_ouverte)
63 Cerr <<
"[cuDSS] Error while reading the parameters of the solver " << solver <<
" { ... }" << finl;
64 Cerr <<
"We expected " << accolade_ouverte <<
" instead of " << motlu << finl;
68 if (solver==(Motcle)
"LU")
70 std::cout<<
"[cuDSS] LU is read, matrix is assumed non symmetric"<<std::endl;
71 mtype=CUDSS_MTYPE_GENERAL;
72 mview=CUDSS_MVIEW_FULL;
74 else if (solver==(Motcle)
"Cholesky")
76 std::cout<<
"[cuDSS] Cholesky is read, matrix is assumed symmetric"<<std::endl;
77 mtype=CUDSS_MTYPE_SYMMETRIC;
78 mview=CUDSS_MVIEW_UPPER;
83 while (motlu!=accolade_fermee)
85 if (motlu==(Motcle)
"impr")
89 else if (motlu==(Motcle)
"algo")
93 if (motlu==(Motcle)
"0") { reorder_alg = CUDSS_ALG_DEFAULT;}
94 else if (motlu==(Motcle)
"1") { reorder_alg = CUDSS_ALG_1;}
95 else if (motlu==(Motcle)
"2") { reorder_alg = CUDSS_ALG_2;}
96 else if (motlu==(Motcle)
"3") { reorder_alg = CUDSS_ALG_3;}
97 else if (motlu==(Motcle)
"4") { reorder_alg = CUDSS_ALG_4;}
98 else if (motlu==(Motcle)
"5") { reorder_alg = CUDSS_ALG_5;}
101 Cerr <<
"[cuDSS] algo number "<<motlu<<
" unrecognized (0-5) only"<<finl;
104 Cerr <<
"[cuDSS] algo number "<<motlu<<
" read. "<<finl;
105 Cerr <<
"[cuDSS] Note from cuDSS doc: Different values represent different algorithms (for reordering, factorization, etc.) and can lead to significant differences in accuracy and performance. It is currently recommended to use CUDSS_ALG_DEFAULT (0) and only in case accuracy or performance are not sufficient, one can experiment with other values."<<finl;
110 Cerr << motlu <<
"[cuDSS] keyword not recognized for cuDSS solver " << solver << finl;
116 Process::exit(
"Sorry, cuDSS solvers not available with this build (cuDSS readon).");
135 cudssMatrixDestroy(A);
136 cudssMatrixDestroy(b);
137 cudssMatrixDestroy(x);
142 cudssDataDestroy(handle, solverData);
143 cudssConfigDestroy(solverConfig);
144 cudssDestroy(handle);
174 statistics().begin_count(STD_COUNTERS::gpu_library);
178 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData,
179 A, x, b), status,
"cudssExecute for analysis");
182 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_FACTORIZATION, solverConfig,
183 solverData, A, x, b), status,
"cudssExecute for facto");
193 set_pointers_xb(bvect, xvect);
203 CUDSS_CALL_AND_CHECK(cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData,
204 A, x, b), status,
"cudssExecute for solve");
205 cudaStreamSynchronize(
nullptr);
206 statistics().end_count(STD_COUNTERS::gpu_library);
210 DoubleVect test(bvect);
213 double vrai_residu = mp_norme_vect(test);
214 assert(vrai_residu<1e-5);
215 Cout <<
"||Ax-b||=" << vrai_residu << finl;
221 Process::exit(
"Sorry, cuDSS solvers not available with this build (resoudre_systeme).");
241 int new_nnz = (int)(csr.
get_coeff().size_array());
243 assert(((new_n==n)||(first_solve)));
248 if((first_solve)||(new_nnz != nnz))
255 cudssMatrixDestroy(A);
258 CUDSS_CALL_AND_CHECK(cudssMatrixCreateCsr(&A, n, n, nnz,
nullptr,
nullptr,
259 nullptr,
nullptr, CUDA_R_32I, CUDA_R_64F, mtype, mview,
260 base), status,
"cudssMatrixCreateCsr");
268 CUDSS_CALL_AND_CHECK(cudssMatrixCreateDn(&b, n, nrhs, n,
nullptr, CUDA_R_64F,
269 CUDSS_LAYOUT_COL_MAJOR), status,
"cudssMatrixCreateDn for b");
270 CUDSS_CALL_AND_CHECK(cudssMatrixCreateDn(&x, n, nrhs, n,
nullptr, CUDA_R_64F,
271 CUDSS_LAYOUT_COL_MAJOR), status,
"cudssMatrixCreateDn for x");
278 CUDSS_CALL_AND_CHECK(cudssCreate(&handle), status,
"cudssCreate");
280 CUDSS_CALL_AND_CHECK(cudssConfigCreate(&solverConfig), status,
"cudssConfigCreate");
282 CUDSS_CALL_AND_CHECK(cudssConfigSet(solverConfig, CUDSS_CONFIG_REORDERING_ALG,
283 &reorder_alg,
sizeof(cudssAlgType_t)), status,
"cudssConfigSet");
285 CUDSS_CALL_AND_CHECK(cudssDataCreate(handle, &solverData), status,
"cudssDataCreate");
287 solver_is_built =
true;
296 csr_offsets_d =
const_cast<int*
>(
reinterpret_cast<const int*
>(csr.
get_tab1_int32().view_ro<1>().data()));
297 csr_columns_d =
const_cast<int*
>(csr.
get_tab2().view_ro<1>().data());
298 csr_values_d =
const_cast<double*
>(csr.
get_coeff().view_ro<1>().data());
301 CUDSS_CALL_AND_CHECK(cudssMatrixSetCsrPointers(A,
305 csr_values_d), status,
"cudssMatrixSetCsrPointers")
309void Solv_cuDSS::set_pointers_xb(
const DoubleVect& bvect, DoubleVect& xvect)
314 x_values_d =
const_cast<double*
>(xvect.view_wo<1>().
data());
315 b_values_d =
const_cast<double*
>(bvect.view_ro<1>().
data());
318 CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(x, x_values_d), status,
"cudssMatrixSetValues for x");
319 CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(b, b_values_d), status,
"cudssMatrixSetValues for b");
Une entree dont la source est une chaine de caracteres.
Class defining operators and methods for all reading operation in an input flow (file,...
Classe Matrice_Base Classe de base de la hierarchie des matrices.
virtual int nb_lignes() const =0
Return local number of lines (=size on the current proc).
virtual DoubleVect & ajouter_multvect(const DoubleVect &x, DoubleVect &r) const
Operation de multiplication-accumulation (saxpy) matrice vecteur.
Classe Matrice_Morse Represente une matrice M (creuse), non necessairement carree.
const auto & get_tab2() const
const IntVect & get_tab1_int32() const
const auto & get_coeff() const
int nb_lignes() const override
Return local number of lines (=size on the current proc).
void set_tab1_int32() const
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
static bool is_parallel()
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
void construit_matrice_morse_intermediaire(const Matrice_Base &, Matrice_Morse &)
int resoudre_systeme(const Matrice_Base &a, const DoubleVect &b, DoubleVect &x, int niter_max) override
const Nom & get_chaine_lue() const
bool nouvelle_matrice() const
void fixer_nouvelle_matrice(bool i)
Classe de base des flux de sortie.
_SIZE_ size_array() const
_SIZE_ size_totale() const