16#include <Solv_Petsc.h>
19#include <petscdmshell.h>
20#include <petscsection.h>
21#ifdef PETSC_HAVE_HYPRE
22#include <HYPRE_config.h>
26#include <Matrice_Morse_Sym.h>
27#include <Matrice_Bloc_Sym.h>
28#include <Matrice_Bloc.h>
30#include <MD_Vector_tools.h>
32#include <Comm_Group_MPI.h>
38#include <Matrice_Petsc.h>
43#include <MD_Vector_composite.h>
46#include <Perf_counters.h>
280void check_not_defined(
option o)
284 Cerr <<
"Error! Option " << o.
name <<
" should not be defined with the preconditioner of this solver." << finl;
285 Cerr <<
"Change your data file." << finl;
292static bool is_number(
const std::string& s)
294 std::string::const_iterator it = s.begin();
295 while (it != s.end() && std::isdigit(*it)) ++it;
296 return !s.empty() && it == s.end();
300bool gmres_right_unpreconditionned=
true;
304 if (
amgx_ ||
gpu_ || std::getenv(
"TRUST_PETSC_VERBOSE"))
307 if(!std::is_same<PetscInt, trustIdType>::value)
308 Process::exit(
"Type mismatch!!! PetscInt and trustIdType should be equal!!! PETSc not compiled in 64b??");
310 Motcle accolade_ouverte(
"{");
311 Motcle accolade_fermee(
"}");
319 if (motlu != accolade_ouverte)
321 Cerr <<
"Error while reading the parameters of PETSc solver: " << ksp <<
" { ... }" << finl;
322 Cerr <<
"We expected " << accolade_ouverte <<
" instead of " << motlu << finl;
326 PetscBool isInitialized;
327 PetscInitialized(&isInitialized);
330 Cerr <<
"On this queuing system cluster, you need to use mpirun even on sequential mode" << finl;
331 Cerr <<
"(mpirun -np 1 ...) with a PETSc solver or the calculation will crash. " << finl;
332 Cerr <<
"You can use the trust script as a workaround:" << finl;
338 if (option_prefix_==
"??")
348 option_prefix_ +=
"solver";
350 option_prefix_ +=
"_";
363 KSPCreate(PETSC_COMM_WORLD, &SolveurPetsc_);
364 KSPGetPC(SolveurPetsc_, &PreconditionneurPetsc_);
372 petsc_TU+=
"_petsc.TU";
376#ifndef TRUST_USE_CUDA
378 add_option(
"log_view",petsc_TU);
379 PetscLogDefaultBegin();
394 les_solveurs[0] =
"CLI";
395 les_solveurs[1] =
"GCP";
396 les_solveurs[2] =
"GMRES";
397 les_solveurs[3] =
"CHOLESKY|MUMPS";
398 les_solveurs[4] =
"CHOLESKY_OUT_OF_CORE|MUMPS_OUT_OF_CORE";
399 les_solveurs[5] =
"BICGSTAB";
400 les_solveurs[6] =
"IBICGSTAB";
401 les_solveurs[7] =
"CHOLESKY_SUPERLU|LU_SUPERLU";
402 les_solveurs[8] =
"PGMRES";
403 les_solveurs[9] =
"LU";
404 les_solveurs[10] =
"PIPECG";
405 les_solveurs[11] =
"CHOLESKY_LAPACK";
406 les_solveurs[12] =
"CHOLESKY_UMFPACK";
407 les_solveurs[13] =
"CHOLESKY_PASTIX";
408 les_solveurs[14] =
"CLI_VERBOSE";
409 les_solveurs[15] =
"CLI_QUIET";
410 les_solveurs[16] =
"CHOLESKY_MUMPS_BLR|MUMPS_BLR";
411 les_solveurs[17] =
"CHOLESKY_CHOLMOD";
412 les_solveurs[18] =
"PIPECG2";
413 les_solveurs[19] =
"FGMRES";
414 les_solveurs[20] =
"LU_STRUMPACK";
416 int solver_supported_on_gpu_by_petsc=0;
417 int solver_supported_on_gpu_by_amgx=0;
419 int rang=les_solveurs.
search(ksp);
420 nommer(les_solveurs[rang]);
429 solver_supported_on_gpu_by_petsc=1;
430 solver_supported_on_gpu_by_amgx=1;
431 if (
limpr() >= 0) Cerr <<
"Reading of the " << (
amgx_ ?
"AmgX" :
"Petsc") <<
" commands:" << finl;
436 while (motlu!=accolade_fermee)
439 if (motlu ==
"-file")
444 Cerr <<
"Reading AmgX config file " << motlu <<
" :" << finl;
447 while (!config_amgx.
eof())
450 if (line.find(
"#") && line.find(
"config_version"))
452 Cerr << line << finl;
461 amgx_options_ += motlu;
462 amgx_options_ +=
"\n";
468 while (motlu!=accolade_fermee)
472 if (valeur.debute_par(
"-") || valeur==accolade_fermee)
474 add_option(motlu.suffix(
"-"),
"", 1);
480 if (motlu ==
"-ksp_max_it") ignore_nb_it_max_ = 1;
481 add_option(motlu.suffix(
"-"), valeur, 1);
488 add_option(
"ksp_view",
"");
489 add_option(
"options_view",
"");
490 add_option(
"options_left",
"");
500 if (!has_option(
option, current_pc))
504 add_option(
"pc_type",
"bjacobi");
505 add_option(
"sub_pc_type",
"ilu");
509 add_option(
"pc_type",
"ilu");
517 KSPSetType(SolveurPetsc_, KSPCG);
519 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
525 KSPCGUseSingleReduction(SolveurPetsc_,(PetscBool)1);
528 solver_supported_on_gpu_by_petsc=1;
529 solver_supported_on_gpu_by_amgx=1;
530 add_amgx_option(
"solver(s)",
"PCG");
536 KSPSetType(SolveurPetsc_, KSPPIPECG);
538 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
543 KSPSetType(SolveurPetsc_, KSPPIPECG2);
545 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
550 KSPSetType(SolveurPetsc_, KSPGMRES);
555 if (gmres_right_unpreconditionned)
557 KSPSetPCSide(SolveurPetsc_, PC_RIGHT);
558 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
560 solver_supported_on_gpu_by_petsc=1;
561 solver_supported_on_gpu_by_amgx=1;
564 add_amgx_option(
"solver(s)",
"GMRES");
565 Process::exit(
"Gmres solver on GPU with AmgX fails to return a valid solution. Try GCP, BiCGSTAB or FGMRES solvers.");
571 KSPSetType(SolveurPetsc_, KSPFGMRES);
572 KSPSetPCSide(SolveurPetsc_, PC_RIGHT);
573 KSPSetNormType(SolveurPetsc_, KSP_NORM_UNPRECONDITIONED);
574 solver_supported_on_gpu_by_petsc=1;
575 solver_supported_on_gpu_by_amgx=1;
576 add_amgx_option(
"solver(s)",
"FGMRES");
581 KSPSetType(SolveurPetsc_, KSPPGMRES);
585 KSPSetPCSide(SolveurPetsc_, PC_LEFT);
587 solver_supported_on_gpu_by_petsc=1;
596#ifdef PETSC_HAVE_MUMPS
599 if (rang == 4) add_option(
"mat_mumps_icntl_22",
"1");
605 <<
"Activating BLR factorization. For more info, see http://mumps.enseeiht.fr/doc/userguide_5.1.2.pdf (page 18, 51, 52)."
607 add_option(
"mat_mumps_icntl_35",
"1");
612 KSPSetType(SolveurPetsc_, KSPPREONLY);
620 KSPSetType(SolveurPetsc_, KSPPREONLY);
621 solver_supported_on_gpu_by_petsc=1;
625 add_option(
"mat_strumpack_gpu",
"1");
626 add_option(
"mat_strumpack_metis_nodeNDP",
"1");
632 KSPSetType(SolveurPetsc_, KSPBCGS);
633 solver_supported_on_gpu_by_petsc=1;
634 solver_supported_on_gpu_by_amgx=1;
636 add_amgx_option(
"solver(s)",
"PBICGSTAB");
641 KSPSetType(SolveurPetsc_, KSPIBCGS);
644 KSPSetLagNorm(SolveurPetsc_, PETSC_TRUE);
651 KSPSetType(SolveurPetsc_, KSPPREONLY);
654 solver_supported_on_gpu_by_petsc=1;
663 add_option(
"pc_factor_nonzeros_along_diagonal",
"");
664 KSPSetType(SolveurPetsc_, KSPPREONLY);
672 KSPSetType(SolveurPetsc_, KSPPREONLY);
674 add_option(
"mat_umfpack_pivot_tolerance",
"1.0");
681 KSPSetType(SolveurPetsc_, KSPPREONLY);
691 Cerr << ksp <<
" is only supported for symmetric linear system." << finl;
694 solver_supported_on_gpu_by_petsc=1;
695 KSPSetType(SolveurPetsc_, KSPPREONLY);
700 Cerr << ksp <<
" : solver not officially recognized by TRUST among those possible for the moment:" << finl;
701 Cerr << les_solveurs << finl;
702 Cerr <<
"You can try to access directly to Petsc solvers with the command line:" << finl;
703 Cerr <<
"PETSC CLI { -ksp_type solver_name -pc_type preconditioning_name -ksp_atol threshold -ksp_monitor }" << finl;
704 Cerr <<
"See the reference manual for all Petsc options." << finl;
712#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
713 Cerr <<
"GPU capabilities of PETSc will be used." << finl;
715 Cerr <<
"You can not use petsc_gpu keyword cause GPU" << finl;
716 Cerr <<
"capabilities will not work on your workstation with PETSc." << finl;
717 Cerr <<
"Check if you have a NVidia video card and its driver up to date." << finl;
718 Cerr <<
"Check petsc.log file under $TRUST_ROOT/lib/src/LIBPETSC for more details." << finl;
721 if (solver_supported_on_gpu_by_petsc==0)
723 Cerr << les_solveurs[rang] <<
" is not supported yet by PETSc on GPU." << finl;
730 Cerr <<
"GPU capabilities of AmgX will be used." << finl;
732 Cerr <<
"You can not use amgx keyword cause TRUST version is not build with CUDA support." << finl;
735 if (solver_supported_on_gpu_by_amgx==0)
737 Cerr << les_solveurs[rang] <<
" is not supported yet by AmgX on GPU." << finl;
742 int convergence_with_seuil=0;
744 if (motlu==accolade_ouverte)
747 Motcles les_parametres_solveur(31);
749 les_parametres_solveur[0] =
"impr";
750 les_parametres_solveur[1] =
"seuil";
751 les_parametres_solveur[2] =
"precond";
752 les_parametres_solveur[3] =
"precond_nul";
753 les_parametres_solveur[4] =
"nb_it_max";
754 les_parametres_solveur[5] =
"save_matrix_petsc_format";
755 les_parametres_solveur[6] =
"factored_matrix";
756 les_parametres_solveur[7] =
"read_matrix";
757 les_parametres_solveur[8] =
"controle_residu";
758 les_parametres_solveur[9] =
"cli";
759 les_parametres_solveur[10] =
"ordering";
760 les_parametres_solveur[11] =
"petsc_decide";
761 les_parametres_solveur[12] =
"aij";
762 les_parametres_solveur[13] =
"nb_cpus";
763 les_parametres_solveur[14] =
"divtol";
764 les_parametres_solveur[15] =
"save_matrice|save_matrix";
765 les_parametres_solveur[16] =
"quiet";
766 les_parametres_solveur[17] =
"restart";
767 les_parametres_solveur[18] =
"cli_verbose";
768 les_parametres_solveur[19] =
"dropping_parameter";
769 les_parametres_solveur[20] =
"rtol";
770 les_parametres_solveur[21] =
"atol";
771 les_parametres_solveur[22] =
"ignore_new_nonzero";
772 les_parametres_solveur[23] =
"rebuild_matrix";
773 les_parametres_solveur[24] =
"allow_realloc";
774 les_parametres_solveur[25] =
"clean_matrix";
775 les_parametres_solveur[26] =
"save_matrix_mtx_format";
776 les_parametres_solveur[27] =
"reuse_preconditioner_nb_it_max";
777 les_parametres_solveur[28] =
"reduce_ram";
778 les_parametres_solveur[29] =
"reorder_matrix";
779 les_parametres_solveur[30] =
"pcshell";
788 while (motlu!=accolade_fermee)
790 switch(les_parametres_solveur.
search(motlu))
802 add_option(
"mat_mumps_icntl_4",
"3");
804 add_option(
"mat_superlu_dist_printstat",
"");
808 add_option(
"mat_umfpack_prl",
"2");
810 add_option(
"mat_pastix_verbose",
"2");
812 add_option(
"mat_cholmod_print",
"3");
814 add_option(
"mat_strumpack_verbose",
"1");
817 Cerr <<
"impr not coded yet for this direct solver." << finl;
827 Cerr <<
"Definition of " << les_parametres_solveur[les_parametres_solveur.
search(motlu)] <<
" is useless for a direct method." << finl;
828 Cerr <<
"Suppress the keyword." << finl;
832 convergence_with_seuil=1;
833 add_amgx_option(
"s:convergence",
"ABSOLUTE");
834 add_amgx_option(
"s:tolerance",
Nom(seuil_,
"%e"));
841 if (motlu != accolade_ouverte)
843 Cerr <<
"Error while reading the parameters of the PETSC preconditioner: precond " << pc <<
" { ... }" << finl;
844 Cerr <<
"We expected " << accolade_ouverte <<
" instead of " << motlu << finl;
848 while (motlu!=accolade_fermee)
850 Motcles les_parametres_precond(4);
852 les_parametres_precond[0] = omega.
name;
853 les_parametres_precond[1] = level.
name;
854 les_parametres_precond[2] = epsilon.
name;
855 les_parametres_precond[3] = ordering.
name;
860 switch(les_parametres_precond.
search(motlu))
865 omega.
value()=tmp_double;
872 level.
value()=(int)tmp_int;
874 add_amgx_option(
"p:ilu_sparsity_level",(
Nom)level.
value());
876 add_amgx_option(
"p:coloring_level",(
Nom)
Nom(level.
value()+1));
882 epsilon.
value()=tmp_double;
889 ordering.
value()=tmp_string;
897 Cerr <<
"Reading option: " << motlu << finl;
898 add_amgx_option(motlu);
904 <<
" : unrecognized option among all of those possible on Petsc preconditioner:"
906 Cerr << les_parametres_precond << finl;
932 convergence_with_nb_it_max_=1;
933 add_amgx_option(
"s:max_iters",
Nom(nb_it_max_));
957 Cerr <<
"factored_matrix option is not available for parallel calculation." << finl;
963 Cerr <<
"Only cholesky_lapack keyword may be used with the option factored_matrix." << finl;
966 is >> factored_matrix_;
976 is >> controle_residu_;
982 if (motlu != accolade_ouverte)
984 Cerr <<
"We expected " << accolade_ouverte <<
" instead of " << motlu << finl;
987 Cerr <<
"Reading of the Petsc commands:" << finl;
990 while (motlu!=accolade_fermee)
995 bool negative_value = is_number(valeur.getSuffix(
"-").getString());
996 if ((valeur.debute_par(
"-") && !negative_value) || valeur==accolade_fermee)
998 add_option(motlu.suffix(
"-"),
"");
1003 add_option(motlu.suffix(
"-"), valeur);
1013 add_option(
"ksp_view",
"");
1014 add_option(
"options_view",
"");
1015 add_option(
"options_left",
"");
1025 Cerr <<
"Ordering keyword for a solver is limited to Cholesky only." << finl;
1031 mumps_ordering[0] =
"amd";
1032 mumps_ordering[1] =
"pt-scotch";
1033 mumps_ordering[2] =
"parmetis";
1034 mumps_ordering[3] =
"scotch";
1035 mumps_ordering[4] =
"pord";
1036 mumps_ordering[5] =
"metis";
1038 int rang_mumps=mumps_ordering.
search(motlu);
1044 Cerr << motlu <<
" : unrecognized ordering from those available for the MUMPS solver Cholesky:" << finl;
1045 Cerr << mumps_ordering << finl;
1048 else if (rang_mumps==1 || rang_mumps==2)
1052 Cerr <<
"You can't use the parallel ordering " << motlu <<
" during a sequential calculation." << finl;
1055 add_option(
"mat_mumps_icntl_28",
"2");
1056 add_option(
"mat_mumps_icntl_29",(
Nom)rang_mumps);
1060 add_option(
"mat_mumps_icntl_28",
"1");
1061 add_option(
"mat_mumps_icntl_7",(
Nom)rang_mumps);
1067 is >> petsc_decide_;
1068 different_partition_ = petsc_decide_;
1079 is >> petsc_nb_cpus_;
1080 different_partition_ = 1;
1082 petsc_cpus_selection_=1;
1083 else if (motlu==
"every")
1084 petsc_cpus_selection_=2;
1087 Cerr <<
"We should read the option first or every after the keyword nb_cpus." << finl;
1088 Cerr <<
"Or we read: " << motlu << finl;
1093 Cerr <<
"Incorrect number of CPUs selected for solving the PETSc matrix: " << petsc_nb_cpus_ << finl;
1105 KSPType type_ksp_method;
1106 KSPGetType(SolveurPetsc_, &type_ksp_method);
1107 if ((
Nom)type_ksp_method !=
Nom(
"gmres") && ((
Nom)type_ksp_method !=
Nom(
"fgmres")))
1109 Cerr <<
"restart option is available only with [f]gmres methods" << finl;
1113 is >> restart_gmres;
1114 KSPGMRESSetRestart(SolveurPetsc_,restart_gmres);
1122 Cerr << les_parametres_solveur[rang] <<
" keyword for a solver is limited to " << les_solveurs[14] <<
" only." << finl;
1125 double dropping_parameter;
1126 is >> dropping_parameter;
1127 add_option(
"mat_mumps_cntl_7",dropping_parameter);
1134 Cerr <<
"Definition of " << les_parametres_solveur[les_parametres_solveur.
search(motlu)] <<
" is useless for a direct method." << finl;
1135 Cerr <<
"Suppress the keyword." << finl;
1138 is >> seuil_relatif_;
1139 convergence_with_seuil=1;
1140 add_amgx_option(
"s:convergence",
"RELATIVE_INI_CORE");
1141 add_amgx_option(
"s:tolerance",
Nom(seuil_relatif_,
"%e"));
1176 Cerr <<
"Reading option: " << motlu << finl;
1177 add_amgx_option(motlu);
1181 Cerr << motlu <<
" : unrecognized option from those available in the Petsc solver:" << finl;
1182 Cerr << les_parametres_solveur << finl;
1190 if (petsc_decide_ && petsc_cpus_selection_)
1192 Cerr <<
"You can't use petsc_decide and nb_cpus option together." << finl;
1196 int pc_supported_on_gpu_by_petsc=0;
1197 int pc_supported_on_gpu_by_amgx=0;
1200 les_precond[0] =
"NULL";
1201 les_precond[1] =
"ILU";
1202 les_precond[2] =
"SSOR";
1203 les_precond[3] =
"EISENSTAT";
1204 les_precond[4] =
"SPAI";
1205 les_precond[5] =
"PILUT";
1206 les_precond[6] =
"DIAG|JACOBI";
1207 les_precond[7] =
"BOOMERAMG";
1208 les_precond[8] =
"BLOCK_JACOBI_ICC";
1209 les_precond[9] =
"BLOCK_JACOBI_ILU";
1210 les_precond[10] =
"C-AMG";
1211 les_precond[11] =
"SA-AMG";
1212 les_precond[12] =
"GS";
1213 les_precond[13] =
"PCSHELL";
1214 les_precond[14] =
"LU|MUMPS";
1215 les_precond[15] =
"UA-AMG";
1216 les_precond[16] =
"ILU_MUMPS";
1217 les_precond[17] =
"ILU_STRUMPACK";
1226 Cerr <<
"Using precond keyword with a direct method like Cholesky is useless" << finl;
1227 Cerr <<
"because for PETSc the LU factorization is used as a preconditioner." << finl;
1231 rang = les_precond.
search(pc);
1236 PCSetType(PreconditionneurPetsc_, PCNONE);
1237 pc_supported_on_gpu_by_petsc=1;
1238 pc_supported_on_gpu_by_amgx=1;
1239 add_amgx_option(
"s:preconditioner(p)",
"NOSOLVER");
1240 check_not_defined(omega);
1241 check_not_defined(level);
1242 check_not_defined(epsilon);
1243 check_not_defined(ordering);
1261 pc_supported_on_gpu_by_amgx=1;
1263 add_amgx_option(
"s:preconditioner(p)",
"MULTICOLOR_DILU");
1266 add_option(
"pc_type",
"ilu");
1267 add_option(
"pc_factor_mat_solver_type",
"cusparse");
1268 add_option(
"pc_factor_levels",(
Nom)level.
value());
1272 Cerr <<
"Error: CHANGES in the PETSc 3.6 version: Removed -pc_hypre_type euclid due to bit-rot."
1274 Cerr <<
"So the ILU { level k } preconditioner no longer available. " << finl;
1275 Cerr <<
"Change your data file." << finl;
1282 PCSetType(PreconditionneurPetsc_, PCSOR);
1286 PCSORSetOmega(PreconditionneurPetsc_, omega.
value());
1290 Cerr <<
"omega value for SSOR should be between 1 and 2" << finl;
1293 pc_supported_on_gpu_by_amgx=0;
1294 check_not_defined(level);
1295 check_not_defined(epsilon);
1296 check_not_defined(ordering);
1301 PCSetType(PreconditionneurPetsc_, PCEISENSTAT);
1304 PCEisenstatSetOmega(PreconditionneurPetsc_, omega.
value());
1308 Cerr <<
"omega value for EISENSTAT should be between 1 and 2" << finl;
1311 check_not_defined(level);
1312 check_not_defined(epsilon);
1313 check_not_defined(ordering);
1318 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1319 PCHYPRESetType(PreconditionneurPetsc_,
"parasails");
1320 add_option(
"pc_hypre_parasails_nlevels",(
Nom)level.
value());
1321 add_option(
"pc_hypre_parasails_thresh",(
Nom)epsilon.
value());
1323 check_not_defined(omega);
1324 check_not_defined(ordering);
1326 KSPGetType(SolveurPetsc_, &type_ksp);
1327 Process::exit(
"SPAI preconditioner is not supported anymore.");
1332 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1333 PCHYPRESetType(PreconditionneurPetsc_,
"pilut");
1334 add_option(
"pc_hypre_pilut_factorrowsize",(
Nom)level.
value());
1335 add_option(
"pc_hypre_pilut_tol",(
Nom)epsilon.
value());
1336 check_not_defined(omega);
1337 check_not_defined(ordering);
1348 PCSetType(PreconditionneurPetsc_, PCJACOBI);
1349 pc_supported_on_gpu_by_petsc=1;
1350 pc_supported_on_gpu_by_amgx=1;
1353 add_amgx_option(
"s:preconditioner(p)",
"BLOCK_JACOBI");
1354 Process::exit(
"Diagonal preconditioner on GPU with AmgX is slow to converge. Try GS (Gauss-Seidel) preconditioner.");
1356 check_not_defined(omega);
1357 check_not_defined(level);
1358 check_not_defined(epsilon);
1359 check_not_defined(ordering);
1365 if (rang==9) preconditionnement_non_symetrique_ = 1;
1366 pc_supported_on_gpu_by_amgx=1;
1367 pc_supported_on_gpu_by_petsc=1;
1369 add_amgx_option(
"s:preconditioner(p)",
"MULTICOLOR_DILU");
1372 add_option(
"sub_pc_type",rang==8 ?
"icc" :
"ilu");
1373 add_option(
"sub_pc_factor_levels",(
Nom)level.
value());
1377 if (ordering.
value()!=
"")
1379 add_option(
"sub_pc_factor_mat_ordering_type",ordering.
value());
1381 preconditionnement_non_symetrique_=1;
1385 PCSetType(PreconditionneurPetsc_, PCBJACOBI);
1386 check_not_defined(omega);
1387 check_not_defined(epsilon);
1394 add_option(
"sub_pc_type",
"lu");
1395 add_option(
"sub_pc_factor_mat_solver_type",
"mumps");
1396 if(
limpr()) add_option(
"mat_mumps_icntl_4",
"3");
1397 if (ordering.
value()!=
"")
1398 add_option(
"sub_pc_factor_mat_ordering_type",ordering.
value());
1399 PCSetType(PreconditionneurPetsc_, PCBJACOBI);
1400 check_not_defined(omega);
1401 check_not_defined(epsilon);
1406#ifdef HYPRE_USING_GPU
1410 PCSetType(PreconditionneurPetsc_, PCHYPRE);
1411 PCHYPRESetType(PreconditionneurPetsc_,
"boomeramg");
1412 pc_supported_on_gpu_by_petsc=1;
1416 if (!
gpu_) add_option(
"pc_hypre_boomeramg_relax_type_all",
"Jacobi");
1419 if (
dimension==3) add_option(
"pc_hypre_boomeramg_strong_threshold",
"0.7");
1420 if (
limpr()) add_option(
"pc_hypre_boomeramg_print_statistics",
"1");
1421 check_not_defined(omega);
1422 check_not_defined(level);
1423 check_not_defined(epsilon);
1424 check_not_defined(ordering);
1444 pc_supported_on_gpu_by_amgx=1;
1445 pc_supported_on_gpu_by_petsc=1;
1446 preconditionnement_non_symetrique_ = 1;
1449 add_amgx_option(
"s:preconditioner(p)",
"AMG");
1450 add_amgx_option(
"s:use_scalar_norm",
"1");
1451 add_amgx_option(
"p:error_scaling",
"0");
1452 add_amgx_option(
"p:print_grid_stats",
"1");
1453 add_amgx_option(
"p:max_iters",
"1");
1454 add_amgx_option(
"p:cycle",
"V");
1455 add_amgx_option(
"p:min_coarse_rows",
"2");
1456 add_amgx_option(
"p:max_levels",
"100");
1457 add_amgx_option(
"p:smoother(smoother)",
"BLOCK_JACOBI");
1458 add_amgx_option(
"p:presweeps",
"1");
1459 add_amgx_option(
"p:postsweeps",
"1");
1460 add_amgx_option(
"p:coarsest_sweeps",
"1");
1461 add_amgx_option(
"p:coarse_solver",
"DENSE_LU_SOLVER");
1462 add_amgx_option(
"p:dense_lu_num_rows",
"2");
1465 add_amgx_option(
"p:algorithm",
"CLASSICAL",
"Best choice if you have enough memory and fine tune carefully p:selector and p:strength parameters.");
1467 add_amgx_option(
"p:selector",
"HMIS",
"PMIS may offer faster setup but weaker (some issues for a high number of GPUs) than serial implementation!");
1468 add_amgx_option(
"p:interpolator",
"D2",
"Also available D1. D2 is considerably more expensive during both setup and solve phases, but convergence on difficult problems");
1469 Nom strength(
"AHAT");
1470 add_amgx_option(
"p:strength",strength,
"Choose the strength of connection metric to use. Allowable options are AHAT and ALL");
1473 if (strength==
"AHAT") add_amgx_option(
"p:strength_threshold",
"0.8",
"All edges with strength below this threshold will be discarded. Higher: faster setup, lower memory but lower convergence. You may try 0.25 for better convergence if enough memory.");
1477 add_amgx_option(
"p:algorithm",
"AGGREGATION");
1478 add_amgx_option(
"p:selector",
"SIZE_2");
1479 add_amgx_option(
"p:max_matching_iterations",
"100000");
1480 add_amgx_option(
"p:max_unassigned_percentage",
"0.0");
1486 add_amgx_option(
"smoother:relaxation_factor",
"0.8");
1491 add_option(
"pc_type",
"gamg");
1498 add_option(
"pc_gamg_type",
"classical");
1502 add_option(
"pc_gamg_type",
"agg");
1503 add_option(
"pc_gamg_agg_nsmooths",
"1");
1510 add_option(
"pc_gamg_type",
"agg");
1511 add_option(
"pc_gamg_agg_nsmooths",
"0");
1526 Cerr <<
"Relaxation value omega for GS should be between 0 and 2" << finl;
1529 pc_supported_on_gpu_by_amgx=1;
1532 add_amgx_option(
"s:preconditioner(p)",
"GS");
1534 add_amgx_option(
"p:relaxation_factor",
Nom(omega.
value()));
1537 check_not_defined(level);
1538 check_not_defined(epsilon);
1539 check_not_defined(ordering);
1545 PCSetType(PreconditionneurPetsc_, PCSHELL);
1548 auto PCShellUserApply = [](PC pc_apply, Vec x, Vec y)
1552 PCShellGetContext(pc_apply,(
void**) &pcstruct);
1554 return pcs->computePC_(pc_apply,x,y);
1557 auto PCShellUserPreSolve = [](PC pc_apply, KSP ksp_apply, Vec x, Vec y)
1561 PCShellGetContext(pc_apply,(
void**) &pcstruct);
1563 return pcs->preSolve_(pc_apply, ksp_apply, x, y);
1566 auto PCShellUserPostSolve = [](PC pc_apply, KSP ksp_apply, Vec x, Vec y)
1570 PCShellGetContext(pc_apply,(
void**) &pcstruct);
1572 return pcs->postSolve_(pc_apply, ksp_apply, x, y);
1575 auto PCShellUserDestroy = [](PC pc_apply)
1579 PCShellGetContext(pc_apply,(
void**) &pcstruct);
1581 return pcs->destroyPC_(pc_apply);
1584 PCShellSetApply(PreconditionneurPetsc_, PCShellUserApply);
1585 PCShellSetContext(PreconditionneurPetsc_, &pc_user_);
1586 PCShellSetDestroy(PreconditionneurPetsc_, PCShellUserDestroy);
1587 PCShellSetPreSolve(PreconditionneurPetsc_, PCShellUserPreSolve);
1588 PCShellSetPostSolve(PreconditionneurPetsc_, PCShellUserPostSolve);
1594 add_option(
"pc_type",
"cholesky");
1595 add_option(
"pc_factor_mat_solver_type",
"mumps");
1596 add_option(
"mat_mumps_icntl_35",
"1");
1599 add_option(
"mat_mumps_cntl_7", (
double)epsilon.
value());
1601 check_not_defined(level);
1607 add_option(
"pc_type",
"ilu");
1608 add_option(
"mat_type",
"aij");
1609 add_option(
"pc_factor_mat_solver_type",
"strumpack");
1610 add_option(
"mat_strumpack_compression",
"BLR");
1611 add_option(
"mat_strumpack_compression_rel_tol", (
double)epsilon.
value());
1615 check_not_defined(level);
1620 Cerr << pc <<
" : preconditioner not officially recognized by TRUST among those possible for the moment:" << finl;
1621 Cerr << les_precond << finl;
1622 Cerr <<
"You can try to access directly to Petsc preconditioners with the command line." << finl;
1623 Cerr <<
"See the reference manual of Petsc to do this." << finl;
1632 Cerr <<
"You forgot to define a preconditioner with the keyword precond." << finl;
1633 Cerr <<
"If you don't want a preconditioner, add for the solver definition:" << finl;
1634 Cerr <<
"precond null" << finl;
1640 pc_supported_on_gpu_by_petsc = solver_supported_on_gpu_by_petsc;
1641 pc_supported_on_gpu_by_amgx = solver_supported_on_gpu_by_amgx;
1645 if (
gpu_ && pc_supported_on_gpu_by_petsc==0)
1647 Cerr << les_precond[rang] <<
" is not supported yet by PETSc on GPU." << finl;
1650 if (
amgx_ && pc_supported_on_gpu_by_amgx==0)
1652 Cerr << les_precond[rang] <<
" is not supported yet by AmgX on GPU." << finl;
1662 KSPSetInitialGuessNonzero(SolveurPetsc_, PETSC_FALSE);
1663 PCSetType(PreconditionneurPetsc_, PCLU);
1667 KSPSetInitialGuessNonzero(SolveurPetsc_, PETSC_TRUE);
1668 if (convergence_with_nb_it_max_)
1670 if (convergence_with_seuil)
1672 Cerr <<
"You can only define solver convergence either by seuil or by nb_it_max." << finl;
1673 Cerr <<
"So suppress seuil keyword or nb_it_max keyword." << finl;
1678 add_option(
"ksp_check_norm_iteration",(
Nom)(nb_it_max_-1));
1679 nb_it_max_ = NB_IT_MAX_DEFINED;
1682 if (seuil_==0 && seuil_relatif_==_RTOL_MIN_)
1686 if (seuil_relatif_<_RTOL_MIN_)
1688 KSPSetTolerances(SolveurPetsc_, seuil_relatif_, seuil_, (divtol_==0 ? PETSC_DEFAULT : divtol_), nb_it_max_);
1694 KSPConvergedDefaultSetUIRNorm(SolveurPetsc_);
1697 KSPSetOptionsPrefix(SolveurPetsc_, option_prefix_);
1698 KSPSetFromOptions(SolveurPetsc_);
1699 PCSetOptionsPrefix(PreconditionneurPetsc_, option_prefix_);
1700 PCSetFromOptions(PreconditionneurPetsc_);
1704 KSPGetType(SolveurPetsc_, &type_ksp);
1705 type_ksp_=(
Nom)type_ksp;
1708 PCGetType(PreconditionneurPetsc_, &type_pc);
1709 if (type_pc) type_pc_=(
Nom)type_pc;
1712#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1713 if (type_pc_==
"hypre")
gpu_ =
true;
1721 s <<
"# AmgX config file" << finl <<
"config_version=2" << finl;
1722 add_amgx_option(
"s:print_config",
limpr() ?
"1" :
"0");
1723 add_amgx_option(
"s:print_solve_stats",
limpr() ?
"1" :
"0");
1724 add_amgx_option(
"s:obtain_timings",
limpr() ?
"1" :
"0");
1725 add_amgx_option(
"s:store_res_history",
"1");
1726 add_amgx_option(
"s:monitor_residual",
"1");
1727 add_amgx_option(
"s:max_iters",
"10000");
1730#ifdef MPIX_CUDA_AWARE_SUPPORT
1731 if (getenv(
"AMGX_USE_MPI_GPU_AWARE")) add_amgx_option(
"communicator",
"MPI_DIRECT",
"Enable GPU direct with MPI Cuda-Aware. No gain for the moment.");
1734 Cerr <<
"Writing the AmgX config file: " <<
config() << finl;
1737 Cerr <<
"Error, the code is not built with PETSc support." << finl;
1738 Cerr <<
"Contact TRUST support." << finl;
1747 str+=option_prefix_.
prefix(
"_");
1749 Process::exit(
"ToDo fix Solv_Petsc::config(): build config filename with numero_solve in the constructor!");
1751 str+=
amgx_ ?
".amgx" :
".petsc";
1758PetscLogStage Solv_Petsc::Create_Stage_=1;
1759PetscLogStage Solv_Petsc::KSPSolve_Stage_=2;
1768 for (
int i=0; i<M_nb_lignes; i++)
1771 for (
int j=0 ; j<M_nb_colonnes; j++ )
1774 if (j!=(M_nb_colonnes-1)) s<<
",";
1777 if (i!=(M_nb_lignes-1)) s<<
",";
1783void Solv_Petsc::SaveObjectsToFile(
const DoubleVect& secmem, DoubleVect& solution)
1789 MatGetInfo(MatricePetsc_,MAT_GLOBAL_SUM,&Info);
1790 auto nnz = (trustIdType)Info.nz_allocated;
1792 Nom filename(
"Matrix_");
1795 filename+=(Nom)
nproc();
1796 filename+=
"_cpus.petsc";
1798 Cerr <<
"Writing the global PETSc matrix (" <<
nb_rows_tot_<<
" rows and " << nnz <<
" nnz) in the binary file " << filename << finl;
1799 if (
Process::nproc()>32) Cerr <<
"It may take some minutes..." << finl;
1801 PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1807 PetscViewerSetType(viewer, PETSCVIEWERBINARY);
1808 PetscViewerBinarySetUseMPIIO(viewer, PETSC_TRUE);
1810 PetscViewerFileSetMode(viewer, FILE_MODE_WRITE);
1811 PetscViewerFileSetName(viewer, filename);
1812 statistics().
begin_count(STD_COUNTERS::backup_file,statistics().get_last_opened_counter_level()+1);
1814 MatView(MatricePetsc_, viewer);
1815 Cerr <<
"[IO] " << statistics().
get_time_since_last_open(STD_COUNTERS::backup_file) <<
" s to write matrix file." << finl;
1816 statistics().
end_count(STD_COUNTERS::backup_file, 1,
static_cast<int>(bytes));
1818 if (SecondMembrePetsc_!=
nullptr)
1820 Cerr <<
"Writing also the RHS in the file " << filename << finl;
1821 VecView(SecondMembrePetsc_, viewer);
1824 Process::exit(
"You can't export anymore matrix&RHS at PETSc format with AmgX solver. Switch to PETSc solver instead.");
1825 PetscViewerDestroy(&viewer);
1830 Cerr <<
"The global PETSc matrix is small so we also print it:" << finl;
1831 MatView(MatricePetsc_, PETSC_VIEWER_STDOUT_WORLD);
1839 filename +=
"_matrix";
1842 SFichier mtx(filename);
1844 mtx.setf(ios::scientific);
1846 const PetscInt *ia, *ja;
1848 MatGetRowIJ(MatricePetsc_, 0, PETSC_FALSE, PETSC_FALSE, &rows, &ia, &ja, &done);
1852 MatGetType(MatricePetsc_, &mat_type);
1854 if (strcmp(mat_type, MATSEQAIJ) == 0)
1857 MatSeqAIJGetArray(MatricePetsc_, &v);
1859 else if (strcmp(mat_type, MATSEQSBAIJ) == 0)
1862 MatSeqSBAIJGetArray(MatricePetsc_, &v);
1865 mtx <<
"%%MatrixMarket matrix coordinate real " << type << finl;
1866 Cerr <<
"Matrix (" << (int)rows <<
" lines) written into file: " << filename << finl;
1867 mtx <<
"%%matrix" << finl;
1868 mtx << (int)rows <<
" " << (
int)rows <<
" " << (int)ia[rows] << finl;
1869 for (
int row=0; row<rows; row++)
1870 for (PetscInt j=ia[row]; j<ia[row+1]; j++)
1871 mtx << row+1 <<
" " << (
int)ja[j]+1 <<
" " << v[j] << finl;
1877 SFichier rhs_mtx(filename);
1878 rhs_mtx.precision(14);
1879 rhs_mtx.setf(ios::scientific);
1880 rhs_mtx <<
"%%MatrixMarket matrix array real general" << finl;
1882 for (
int row=0; row<rows; row++)
1883 rhs_mtx << secmem(row) << finl;
1885 if (SecondMembrePetsc_!=
nullptr)
1889 rhs_filename +=
"_rhs";
1891 rhs_filename +=
".petsc";
1892 PetscViewerASCIIOpen(PETSC_COMM_WORLD,rhs_filename,&viewer);
1893 PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
1894 VecView(SecondMembrePetsc_, viewer);
1895 Cerr <<
"Save RHS into " << rhs_filename << finl;
1896 PetscViewerDestroy(&viewer);
1899 if (
verbose) Cout <<
"[Petsc] Time to write matrix: \t" << statistics().
compute_time(start) << finl;
1904void Solv_Petsc::RestoreMatrixFromFile()
1906 Nom filename(
"Matrix_");
1909 filename+=(Nom)
nproc();
1910 filename+=
"_cpus.petsc";
1911 add_option(
"viewer_binary_skip_info",
"");
1913 MatCreate(PETSC_COMM_WORLD,&MatricePetsc_);
1916 else if (petsc_cpus_selection_)
1918 Cerr <<
"Reading a PETSc matrix with a different number of CPUs is not implemented yet." << finl;
1919 Cerr <<
"Contact TRUST support." << finl;
1923 MatSetSizes(MatricePetsc_,
nb_rows_,
nb_rows_, PETSC_DECIDE, PETSC_DECIDE);
1925 Cerr <<
"Reading the global PETSc matrix in the binary file " << filename << finl;
1926 if (
Process::nproc()>32) Cerr <<
"It may take some minutes..." << finl;
1928 PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1932 PetscViewerSetType(viewer, PETSCVIEWERBINARY);
1933 PetscViewerBinarySetUseMPIIO(viewer, PETSC_TRUE);
1935 PetscViewerFileSetMode(viewer, FILE_MODE_READ);
1936 PetscViewerFileSetName(viewer, filename);
1937 statistics().
begin_count(STD_COUNTERS::backup_file,statistics().get_last_opened_counter_level()+1);
1938 MatLoad(MatricePetsc_, viewer);
1940 MatGetInfo(MatricePetsc_,MAT_GLOBAL_SUM,&Info);
1941 auto nnz = (trustIdType)Info.nz_allocated;
1943 Cerr <<
"[IO] " << statistics().
get_time_since_last_open(STD_COUNTERS::backup_file) <<
" s to read matrix file." << finl;
1944 statistics().
end_count(STD_COUNTERS::backup_file, 1,
static_cast<int>(bytes));
1946 PetscViewerDestroy(&viewer);
1949 Cerr <<
"Reading a non symmetric PETSc matrix is not supported yet in TRUST." << finl;
1953 MatSetOption(MatricePetsc_, MAT_SYMMETRIC, PETSC_TRUE);
1954#ifdef PETSC_HAVE_CUDA
1956 MatConvert(MatricePetsc_, MATAIJCUSPARSE, MAT_INPLACE_MATRIX, &MatricePetsc_);
1959#ifdef PETSC_HAVE_HIP
1961 MatConvert(MatricePetsc_, MATAIJHIPSPARSE, MAT_INPLACE_MATRIX, &MatricePetsc_);
1964 MatConvert(MatricePetsc_, MATSBAIJ, MAT_INPLACE_MATRIX, &MatricePetsc_);
1966 PetscInt nb_rows_tot,nb_cols_tot;
1967 MatGetSize(MatricePetsc_,&nb_rows_tot,&nb_cols_tot);
1968 Cerr <<
"The matrix read has " << (int)nb_rows_tot <<
" rows." << finl;
1969 PetscInt nb_local_rows, nb_local_cols;
1970 MatGetLocalSize(MatricePetsc_, &nb_local_rows, &nb_local_cols);
1973 Cerr <<
"The matrix read has " << (int)nb_local_rows <<
" local columns whereas" << finl;
1974 Cerr <<
"the RHS/Solution vectors have a size of " <<
nb_items_to_keep_ <<
"." << finl;
1975 Cerr <<
"Check your data file or the file containing the PETSc matrix." << finl;
1984bool Solv_Petsc::enable_ksp_view()
1986 Nom option=
"-ksp_view";
1988 char *option_value = strdup( empty );
1990 PetscOptionsGetString( PETSC_NULLPTR, PETSC_NULLPTR, option, option_value, empty.
longueur( ), &enable );
1992 free( option_value );
1996int Solv_Petsc::add_option(
const Nom& astring,
const double& value,
int cli)
1999 snprintf(nom_value,80,
"%e",value);
2000 return add_option(astring, (Nom)nom_value, cli);
2003void Solv_Petsc::add_amgx_option(
const Nom& key,
const Nom& value,
const std::string& comment)
2005 if (
amgx_ && !amgx_options_.contient(key))
2007 if (comment!=
"") amgx_options_+=
"# "+comment+
":\n";
2008 amgx_options_+=key+
"="+value+
"\n";
2012void Solv_Petsc::add_amgx_option(
const Nom& key_value)
2017 if (!amgx_options_.contient(key)) amgx_options_ += key_value +
"\n";
2021bool Solv_Petsc::has_option(
const Nom&
option,
Nom& current_value)
2025 char* tmp=strdup(vide);
2026 PetscOptionsGetString(PETSC_NULLPTR,PETSC_NULLPTR,option,tmp,vide.
longueur(),&flg);
2027 current_value = tmp;
2029 return current_value!=vide;
2032int Solv_Petsc::add_option(
const Nom& astring,
const Nom& value,
int cli)
2044 option+=option_prefix_;
2052 if (has_option(option, current_value))
2054 if (
limpr() >= 0) Cerr <<
"Option Petsc: " << option <<
" " << value <<
" not taken cause " << option <<
" already defined to " << current_value << finl;
2061 PetscOptionsSetValue(PETSC_NULLPTR, option, PETSC_NULLPTR);
2062 if (
limpr() >= 0) Cerr <<
"Option Petsc: " << option << finl;
2066 PetscOptionsSetValue(PETSC_NULLPTR, option, value);
2067 if (
limpr() >= 0) Cerr <<
"Option Petsc: " << option <<
" " << value << finl;
2073#ifndef PETSC_HAVE_HYPRE
2075PetscErrorCode PCHYPRESetType(PC,
const char[])
2077 Cerr <<
"HYPRE preconditioners are not available in this TRUST version." << finl;
2078 Cerr <<
"May be, HYPRE library has been deactivated during the PETSc build process." << finl;
2087PetscErrorCode MyKSPMonitor(KSP SolveurPetsc, PetscInt it, PetscReal residu,
void *dummy)
2090 Cout <<
"Norm of the residue: " << residu <<
" (1)";
2092 Cout << residu <<
" ";
2093 if ((it % 15) == 0) Cout << finl ;
2106 std::feholdexcept(&fenv);
2108 statistics().begin_count(STD_COUNTERS::petsc_solver,statistics().get_last_opened_counter_level()+1);
2109 statistics().end_count(STD_COUNTERS::petsc_solver);
2112 bool log_Create_Stage =
false;
2113 if (log_Create_Stage) PetscLogStagePush(Create_Stage_);
2118 if (MatricePetsc_!=
nullptr && hasChanged != 0)
2121 KSPSetOperators(SolveurPetsc_, MatricePetsc_, PETSC_NULLPTR);
2123 VecDestroy(&SecondMembrePetsc_);
2124 SecondMembrePetsc_ =
nullptr;
2125 VecDestroy(&SolutionPetsc_);
2126 SolutionPetsc_ =
nullptr;
2127 if (LocalSolutionPetsc_!=
nullptr)
2129 VecDestroy(&LocalSolutionPetsc_);
2130 LocalSolutionPetsc_ =
nullptr;
2131 VecScatterDestroy(&VecScatter_);
2134 MatDestroy(&MatricePetsc_);
2135 MatricePetsc_ =
nullptr;
2144 if (MatricePetsc_==
nullptr)
2152 RestoreMatrixFromFile();
2157 MatricePetsc_ = ref_cast(
Matrice_Petsc, la_matrice).getMat();
2162 if (
verbose) Cout <<
"[Petsc] Time to convert matrix: \t" << statistics().compute_time(start) << finl;
2165 check_aij(matrice_morse_intermediaire);
2167 bool la_matrice_est_morse_non_symetrique =
2170 : matrice_morse_intermediaire;
2174 nouveau_stencil_ =
true;
2176 nouveau_stencil_ = detect_new_stencil(matrice_morse);
2179 Create_vectors(secmem);
2184 if (nouveau_stencil_)
2185 Create_objects(matrice_morse, secmem.
line_size());
2187 Update_matrix(MatricePetsc_, matrice_morse);
2193 MatGetInfo(MatricePetsc_, MAT_GLOBAL_SUM, &
info);
2194 PetscInt nnz = (PetscInt)
info.nz_used;
2195 Cout <<
"Order of the PETSc matrix : " <<
nb_rows_tot_ <<
" (~ "
2197 <<
" unknowns per PETSc process ) " << (nouveau_stencil_ ?
"New stencil." :
"Same stencil.") <<
" nnz= " << nnz << finl;
2201 Update_vectors(secmem, solution);
2205 if (log_Create_Stage) PetscLogStagePop();
2209 int size_residu = nb_it_max_ + 1;
2211 ArrOfDouble residu(size_residu);
2212 int nbiter = solve(residu);
2216 double residu_relatif=(residu[0]>0?residu[nbiter]/residu[0]:residu[nbiter]);
2217 Cout << finl <<
"Final residue: " << residu[nbiter] <<
" ( " << residu_relatif <<
" )"<<finl;
2223 bool check_residual = controle_residu_;
2226 if (getenv(
"TRUST_CLOCK_ON")==
nullptr)
2228 Cerr <<
"Warning checking residual. D2H and H2D copies are possible..." << finl;
2229 check_residual =
true;
2232 if (check_residual && !MatricePetsc_ )
2234 DoubleVect test(secmem);
2237 double vrai_residu = mp_norme_vect(test);
2238 if (
verbose) Cout <<
"||Ax-b||=" << vrai_residu << finl;
2242 double precision_machine=1.e-12;
2243 if (residu[0]>0 && residu[nbiter]/residu[0]>precision_machine && vrai_residu>10*residu[nbiter])
2245 Cerr <<
"Error, computed solution x is false !" << finl;
2246 Cerr <<
"True residual (" << vrai_residu <<
") is much higher than convergence residual (" << residu[nbiter] <<
") !" << finl;
2252 std::fesetenv(&fenv);
2263void handleSignal(
int signum)
2267 Cerr <<
"SIGFPE from PETSc KSPSolve() !" << finl;
2270 else if (signum==11)
2272 Cerr <<
"SIGSEGV from PETSc KSPSolve() !" << finl;
2277 fprintf(stderr,
"Signal %d received from PETSc. Exiting...\n", signum);
2282void setupSignalHandlers(
bool on)
2285 struct sigaction sa_fpe;
2286 sa_fpe.sa_handler = on ? handleSignal : SIG_DFL;
2287 sigemptyset(&sa_fpe.sa_mask);
2288 sa_fpe.sa_flags = 0;
2291 if (sigaction(SIGFPE, &sa_fpe,
nullptr) == -1)
2293 fprintf(stderr,
"Error installing signal handler for SIGFPE.\n");
2298 struct sigaction sa_segv;
2299 sa_segv.sa_handler = on ? handleSignal : SIG_DFL;
2300 sigemptyset(&sa_segv.sa_mask);
2301 sa_segv.sa_flags = 0;
2304 if (sigaction(SIGSEGV, &sa_segv,
nullptr) == -1)
2306 fprintf(stderr,
"Error installing signal handler for SIGSEGV.\n");
2311int Solv_Petsc::solve(ArrOfDouble& residu)
2313 PetscLogStagePush(KSPSolve_Stage_);
2320 KSPMonitorSet(SolveurPetsc_, MyKSPMonitor, PETSC_NULLPTR, PETSC_NULLPTR);
2323 KSPMonitorCancel(SolveurPetsc_);
2326 KSPSetResidualHistory(SolveurPetsc_, residu.
addr(), residu.
size_array(), PETSC_TRUE);
2328 if (enable_ksp_view())
2329 KSPView(SolveurPetsc_, PETSC_VIEWER_STDOUT_WORLD);
2335 PetscOptionsHasName(PETSC_NULLPTR,option_prefix_,
"-ksp_reuse_preconditioner",&flg);
2341 if (!reuse_precond) Cout <<
"Matrix preconditioner is recomputed cause previous iterations number>" <<
reuse_preconditioner_nb_it_max_ <<
"..." << finl;
2344 if (
reuse_preconditioner()) Cout <<
"Matrix has changed but reusing previous preconditioner..." << finl;
2347 OWN_PTR(PCShell_base)& pcs=pc_user_.pc_shell;
2348 pcs->setUpPC_(PreconditionneurPetsc_, SolveurPetsc_, MatricePetsc_, SecondMembrePetsc_);
2354 setupSignalHandlers(
true);
2356 statistics().
begin_count(STD_COUNTERS::gpu_library,statistics().get_last_opened_counter_level()+1);
2357 KSPSolve(SolveurPetsc_, SecondMembrePetsc_, SolutionPetsc_);
2359 statistics().
end_count(STD_COUNTERS::gpu_library);
2360 setupSignalHandlers(
false);
2362 KSPConvergedReason Reason;
2363 KSPGetConvergedReason(SolveurPetsc_, &Reason);
2364 if (Reason==KSP_DIVERGED_ITS && (convergence_with_nb_it_max_ || ignore_nb_it_max_)) Reason = KSP_CONVERGED_ITS;
2367 Cerr <<
"No convergence on the resolution with the Petsc solver." << finl;
2368 Cerr <<
"Reason given by Petsc: ";
2369 if (Reason==KSP_DIVERGED_NULL) Cerr <<
"KSP_DIVERGED_NULL" << finl;
2370 else if (Reason==KSP_DIVERGED_DTOL) Cerr <<
"KSP_DIVERGED_DTOL" << finl;
2371 else if (Reason==KSP_DIVERGED_BREAKDOWN) Cerr <<
"KSP_DIVERGED_BREAKDOWN" << finl;
2372 else if (Reason==KSP_DIVERGED_BREAKDOWN_BICG) Cerr <<
"KSP_DIVERGED_BREAKDOWN_BICG" << finl;
2373 else if (Reason==KSP_DIVERGED_NONSYMMETRIC) Cerr <<
"KSP_DIVERGED_NONSYMMETRIC" << finl;
2374 else if (Reason==KSP_DIVERGED_INDEFINITE_PC) Cerr <<
"KSP_DIVERGED_INDEFINITE_PC" << finl;
2375 else if (Reason==KSP_DIVERGED_NANORINF) Cerr <<
"KSP_DIVERGED_NANORINF" << finl;
2376 else if (Reason==KSP_DIVERGED_INDEFINITE_MAT) Cerr <<
"KSP_DIVERGED_INDEFINITE_MAT" << finl;
2377 else if (Reason==KSP_DIVERGED_PC_FAILED) Cerr <<
"KSP_DIVERGED_PC_FAILED" << finl;
2378 else if (Reason==KSP_DIVERGED_ITS)
2380 Cerr <<
"KSP_DIVERGED_ITS" << finl;
2381 Cerr <<
"That means the solver didn't converge within the maximal iterations number." << finl;
2382 Cerr <<
"You can change the maximal number of iterations with the -ksp_max_it option." << finl;
2383#ifdef MPIX_CUDA_AWARE_SUPPORT
2387 Cerr <<
"It seems there is a convergence issue (bug?) with MPI GPU Aware library with PETSc CG and some preconditioners." << finl;
2388 Cerr <<
"Try using BICGSTAB instead of GCP to bypass the issue." << finl;
2392 else Cerr << (int)Reason << finl;
2397 KSPGetIterationNumber(SolveurPetsc_, &nbiter);
2398 int nbit = (int)nbiter;
2405 VecNorm(SecondMembrePetsc_, NORM_2, &residu[0]);
2414 VecScale(SecondMembrePetsc_, -1);
2415 MatMultAdd(MatricePetsc_, SolutionPetsc_, SecondMembrePetsc_, SecondMembrePetsc_);
2416 VecNorm(SecondMembrePetsc_, NORM_2, &residu[nbit]);
2419 if (
verbose) Cout << finl <<
"[Petsc] Time to solve system: \t" << statistics().
compute_time(start) << finl;
2421 return Reason < 0 ? (int)Reason : nbit;
2426void Solv_Petsc::Update_vectors(
const DoubleVect& secmem, DoubleVect& solution)
2431 if (
gpu_ && DataOnDevice && !isViennaCLVector())
2435 if (isKokkosVector())
2437#ifdef PETSC_HAVE_KOKKOS
2438 VecKokkosPlaceArray(SecondMembrePetsc_, addrOnDevice(
rhs_));
2439 VecKokkosPlaceArray(SolutionPetsc_, addrOnDevice(
lhs_));
2446#ifdef PETSC_HAVE_CUDA
2447 VecCUDAPlaceArray(SecondMembrePetsc_, addrOnDevice(
rhs_));
2448 VecCUDAPlaceArray(SolutionPetsc_, addrOnDevice(
lhs_));
2450#ifdef PETSC_HAVE_HIP
2451 VecHIPPlaceArray(SecondMembrePetsc_, addrOnDevice(
rhs_));
2452 VecHIPPlaceArray(SolutionPetsc_, addrOnDevice(
lhs_));
2456 if (different_partition_)
Process::exit(
"different_partition option is not supported yet on GPU");
2463 PetscInt size=
ix.size_array();
2465 statistics().
begin_count(STD_COUNTERS::gpu_copytodevice,statistics().get_last_opened_counter_level()+1);
2466 VecSetOption(SecondMembrePetsc_, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
2467 VecSetValues(SecondMembrePetsc_, size,
ix.addr(), secmem.
addr(), INSERT_VALUES);
2468 VecSetOption(SolutionPetsc_, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
2469 VecSetValues(SolutionPetsc_, size,
ix.addr(), solution.
addr(), INSERT_VALUES);
2470 VecAssemblyBegin(SecondMembrePetsc_);
2471 VecAssemblyEnd(SecondMembrePetsc_);
2472 VecAssemblyBegin(SolutionPetsc_);
2473 VecAssemblyEnd(SolutionPetsc_);
2475 statistics().
end_count(STD_COUNTERS::gpu_copytodevice);
2478 VecPermute(SecondMembrePetsc_, colperm, PETSC_FALSE);
2479 VecPermute(SolutionPetsc_, colperm, PETSC_FALSE);
2482 if (
verbose) Cout << finl <<
"[Petsc] Time to update vectors: \t" << statistics().
compute_time(start) << finl;
2488bool Solv_Petsc::isKokkosVector()
2491 VecGetType(SecondMembrePetsc_, &type);
2492 return strcmp(type, VECSEQKOKKOS)==0 || strcmp(type, VECMPIKOKKOS)==0;
2495bool Solv_Petsc::isViennaCLVector()
2498 VecGetType(SecondMembrePetsc_, &type);
2499 return strcmp(type, VECSEQVIENNACL)==0 || strcmp(type, VECMPIVIENNACL)==0;
2507 if (
gpu_ && DataOnDevice && !isViennaCLVector())
2510 if (isKokkosVector())
2512#ifdef PETSC_HAVE_KOKKOS
2513 VecKokkosResetArray(SecondMembrePetsc_);
2514 VecKokkosResetArray(SolutionPetsc_);
2521#ifdef PETSC_HAVE_CUDA
2522 VecCUDAResetArray(SecondMembrePetsc_);
2523 VecCUDAResetArray(SolutionPetsc_);
2525#ifdef PETSC_HAVE_HIP
2526 VecHIPResetArray(SecondMembrePetsc_);
2527 VecHIPResetArray(SolutionPetsc_);
2533 int size=
ix.size_array();
2535 VecPermute(SolutionPetsc_, rowperm, PETSC_TRUE);
2537 if (different_partition_)
2540 VecScatterBegin(VecScatter_, SolutionPetsc_, LocalSolutionPetsc_, INSERT_VALUES, SCATTER_FORWARD);
2541 VecScatterEnd (VecScatter_, SolutionPetsc_, LocalSolutionPetsc_, INSERT_VALUES, SCATTER_FORWARD);
2543 PetscInt colonne_locale=0;
2544 for (
int i=0; i<size; i++)
2547 VecGetValues(LocalSolutionPetsc_, 1, &colonne_locale, &solution(i));
2556 statistics().
begin_count(STD_COUNTERS::gpu_copyfromdevice,statistics().get_last_opened_counter_level()+1);
2557 VecGetValues(SolutionPetsc_, size,
ix.addr(), solution.
addr());
2559 statistics().
end_count(STD_COUNTERS::gpu_copyfromdevice);
2562 if (
verbose) Cout << finl <<
"[Petsc] Time to update solution: \t" << statistics().
compute_time(start) << finl;
2578 if (type_pc_==PCEISENSTAT) mataij_=1;
2581 if (
read_matrix() && type_pc_==PCHYPRE) mataij_=1;
2594#ifdef PETSC_HAVE_OPENMP
2606 if (factored_matrix_!=
"") mataij_=1;
2615#ifdef PETSC_HAVE_CUDA
2616 if (!
amgx_) check_matrice_symetrique_=
false;
2619 check_matrice_symetrique_=
false;
2626 if (check_matrice_symetrique_)
2628 Mat MatricePetscComplete;
2630 Create_MatricePetsc(MatricePetscComplete, 1, matrice);
2632 Create_MatricePetsc(MatricePetsc, mataij_, matrice);
2633 PetscBool matrices_identiques;
2636 MatMultAddEqual(MatricePetsc, MatricePetscComplete, n, &matrices_identiques);
2637 if (!matrices_identiques)
2639 Cerr <<
"Error: matrix PETSc are different according to the symmetric storage or not." << finl;
2641 Cerr <<
"Contact TRUST support team." << finl;
2644 MatView(MatricePetsc, PETSC_VIEWER_STDOUT_WORLD);
2645 MatView(MatricePetscComplete, PETSC_VIEWER_STDOUT_WORLD);
2649 MatDestroy(&MatricePetsc);
2650 MatDestroy(&MatricePetscComplete);
2656void Solv_Petsc::Create_objects(
const Matrice_Morse& mat,
int blocksize)
2659 Mat MatricePrecondionnementPetsc;
2663 preconditionnement_non_symetrique_ = 1;
2664 if (mataij_==1) preconditionnement_non_symetrique_ = 0;
2668 if (preconditionnement_non_symetrique_)
2669 Create_MatricePetsc(MatricePrecondionnementPetsc, 1, mat);
2674 if (MatricePetsc_!=
nullptr) MatDestroy(&MatricePetsc_);
2675 Create_MatricePetsc(MatricePetsc_, mataij_, mat);
2677 MatSetBlockSize(MatricePetsc_, blocksize);
2698 MatSetOption(MatricePetsc_, MAT_SYMMETRIC, PETSC_TRUE);
2699 if (type_pc_ == PCLU)
2703 PCSetType(PreconditionneurPetsc_, PCCHOLESKY);
2705 else if (type_pc_ == PCSOR)
2706 PCSORSetSymmetric(PreconditionneurPetsc_, SOR_LOCAL_SYMMETRIC_SWEEP);
2712 static int message_affi =
limpr() >= 0;
2718 Cout <<
"The LU decomposition of a matrix with ";
2719 Cout <<
"Cholesky from MUMPS may take several minutes, please wait..." << finl;
2721 <<
"If the decomposition fails/crashes cause a lack of memory, then increase the number of CPUs for your calculation"
2724 <<
"or add reduce_ram option (syntax: cholesky { reduce_ram }) to suppress preventive memory increase (INCTL(14))"
2727 <<
"or use Cholesky_out_of_core keyword to write the decomposition on the disk, thus saving memory but with an extra CPU cost during solve."
2730 <<
"To see the RAM required by the decomposition in the .out file, add impr option to the solver: petsc cholesky { impr }"
2733 <<
"If an error INFOG(1)=-8|-9|-11|-17|-20 is returned, you can try to increase the ICNTL(14) parameter of MUMPS by using the -mat_mumps_icntl_14 command line option."
2737 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERMUMPS);
2750 add_option(
"mat_mumps_icntl_14",
"40");
2752 add_option(
"mat_mumps_icntl_14",
"90");
2758 Cout <<
"Cholesky from SUPERLU_DIST may take several minutes, please wait..." << finl;
2759 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERSUPERLU_DIST);
2764 Cout <<
"Cholesky from PETSc may take several minutes, please wait...";
2765 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERPETSC);
2770 Cout <<
"Cholesky from UMFPACK may take several minutes, please wait...";
2771 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERUMFPACK);
2776 Cout <<
"Cholesky from Pastix may take several minutes, please wait...";
2777 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERPASTIX);
2782 Cout <<
"Cholesky from Cholmod may take several minutes, please wait...";
2783 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERCHOLMOD);
2788 Cout <<
"Cholesky from Strumpack may take several minutes, please wait...";
2789 PCFactorSetMatSolverType(PreconditionneurPetsc_, MATSOLVERSTRUMPACK);
2794 Cout <<
"LU factorization may take several minutes, please wait...";
2798 Cerr <<
"PCFactorSetMatSolverType not called for direct solver, solveur_direct_=" <<
solveur_direct_ << finl;
2799 Cerr <<
"Contact TRUST support." << finl;
2805 Cout <<
" OK " << finl;
2812 if (preconditionnement_non_symetrique_)
2814 KSPSetOperators(SolveurPetsc_, MatricePetsc_, MatricePrecondionnementPetsc);
2815 MatDestroy(&MatricePrecondionnementPetsc);
2819 KSPSetOperators(SolveurPetsc_, MatricePetsc_, MatricePetsc_);
2829 filename +=
"_Factored_Matrix.petsc";
2830 if (factored_matrix_ ==
"read" || factored_matrix_ ==
"disk")
2832 int filename_found = 0;
2838 if (!stat(filename, &f)) filename_found = 1;
2840 envoyer_broadcast(filename_found, 0);
2844 factored_matrix_ =
"read";
2849 Cerr <<
"File " << filename <<
" not found";
2850 if (factored_matrix_ ==
"read")
2852 Cerr <<
"!" << finl;
2857 Cerr <<
"." << finl;
2858 Cerr <<
"So we will compute the factored matrix and we will save it to disk." << finl;
2859 factored_matrix_ =
"save";
2863 if (factored_matrix_ ==
"read")
2866 PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &viewer);
2867 PCFactorSetUpMatSolverType(PreconditionneurPetsc_);
2869 PCFactorGetMatrix(PreconditionneurPetsc_, &FactoredMatrix);
2873 Cerr <<
"Reading the factored matrix in the file " << filename << finl;
2874 MatLoad(FactoredMatrix, viewer);
2875 PetscViewerDestroy(&viewer);
2877 else if (factored_matrix_ ==
"save")
2881 PCFactorSetUpMatSolverType(PreconditionneurPetsc_);
2882 PCSetUp(PreconditionneurPetsc_);
2884 PCFactorGetMatrix(PreconditionneurPetsc_, &FactoredMatrix);
2890 Cerr <<
"A=" << finl;
2891 MatView(MatricePetsc_, PETSC_VIEWER_STDOUT_WORLD);
2892 Cerr <<
"LU=" << finl;
2893 MatView(FactoredMatrix, PETSC_VIEWER_STDOUT_WORLD);
2896 Cerr <<
"Writing the factored matrix in the file " << filename << finl;
2897 Cerr <<
"Not implemented yet." << finl;
2900 PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_WRITE, &viewer);
2901 MatView(FactoredMatrix, viewer);
2902 PetscViewerDestroy(&viewer);
2909 else if (factored_matrix_ !=
"")
2911 Cerr <<
"Unknown option for factored_matrix option: " << factored_matrix_ << finl;
2912 Cerr <<
"Options are: read|save|disk" << finl;
2921 KSPSetUp(SolveurPetsc_);
2922 PCSetUpOnBlocks(PreconditionneurPetsc_);
2924 if (
limpr()>0 &&
chaine_lue_.contient(
"gamg_")) KSPView(SolveurPetsc_, PETSC_VIEWER_STDOUT_WORLD);
2925 if (
verbose) Cout <<
"[Petsc] Time to setup solver: \t" << statistics().
compute_time(start) << finl;
2928void Solv_Petsc::Create_vectors(
const DoubleVect& b)
2930 if (SecondMembrePetsc_!=
nullptr)
return;
2938 VecCreate(PETSC_COMM_WORLD,&SecondMembrePetsc_);
2941 VecSetSizes(SecondMembrePetsc_, PETSC_DECIDE,
nb_rows_tot_);
2942 else if (petsc_cpus_selection_)
2944 PetscInt nb_rows_petsc = compute_nb_rows_petsc(
nb_rows_tot_);
2945 VecSetSizes(SecondMembrePetsc_, nb_rows_petsc, PETSC_DECIDE);
2948 VecSetSizes(SecondMembrePetsc_,
nb_rows_, PETSC_DECIDE);
2951 VecType vtype = VECSTANDARD;
2955 if (getenv(
"PETSC_USE_KOKKOS")!=
nullptr)
2958#ifdef PETSC_HAVE_CUDA
2961#ifdef PETSC_HAVE_HIP
2966 VecSetType(SecondMembrePetsc_, vtype);
2967 VecSetOptionsPrefix(SecondMembrePetsc_, option_prefix_);
2968 VecSetFromOptions(SecondMembrePetsc_);
2970 VecDuplicate(SecondMembrePetsc_,&SolutionPetsc_);
2978 if (different_partition_)
2981 VecCreateSeq(PETSC_COMM_SELF,
nb_rows_, &LocalSolutionPetsc_);
2987 ISCreateGeneral(PETSC_COMM_WORLD, from.size_array(), from.addr(), PETSC_COPY_VALUES, &fromis);
2988 VecScatterCreate(SolutionPetsc_, fromis, LocalSolutionPetsc_, PETSC_NULLPTR, &VecScatter_);
2995void Solv_Petsc::Create_DM(
const DoubleVect& b)
2997 if (dm_!=
nullptr)
return;
3001 std::map<std::string, std::vector<PetscInt>> champ;
3003 std::vector<std::tuple<const MD_Vector_composite *, int, int, std::string>>
3009 while (mdc_list.size())
3011 const MD_Vector_composite& mdc = *std::get<0>(mdc_list.back());
3012 int idx = std::get<1>(mdc_list.back()), mult = std::get<2>(mdc_list.back()), un = 1;
3013 std::string prefix = std::get<3>(mdc_list.back());
3014 mdc_list.pop_back();
3015 for (
int i = 0; i < mdc.
nb_parts(); i++)
3019 if (sub_type(MD_Vector_composite, mdb))
3020 mdc_list.push_back(std::make_tuple(&ref_cast(MD_Vector_composite, mdb), idx, mult2,
3021 prefix + std::to_string((
long long) i)));
3024 std::vector<PetscInt> indices;
3029 champ[prefix + std::to_string((
long long) i)] = indices;
3037 PetscSectionCreate(PETSC_COMM_WORLD, &sec);
3038 PetscSectionSetNumFields(sec, (
int)champ.size());
3042 for (
auto &&kv : champ)
3044 PetscSectionSetFieldName(sec, idx, kv.first.c_str());
3045 if (
limpr() >= 0) Cerr <<
"Field " << kv.first <<
" available for PCFieldsplit." << finl;
3046 for (
int j = 0; j < (int) kv.second.size(); j++)
3047 PetscSectionSetDof(sec, kv.second[j], 1), PetscSectionSetFieldDof(sec, kv.second[j], idx, 1);
3050 PetscSectionSetUp(sec);
3053 DMShellCreate(PETSC_COMM_WORLD, &dm_);
3054 DMSetLocalSection(dm_, sec);
3056 PetscSectionDestroy(&sec);
3058 if (sub_type(MD_Vector_composite, b.
get_md_vector().
valeur())) PCSetDM(PreconditionneurPetsc_, dm_);
3061PetscInt Solv_Petsc::compute_nb_rows_petsc(PetscInt nb_rows_tot)
3064 PetscInt nb_rows_petsc = nb_rows_tot / petsc_nb_cpus_;
3066 if (
je_suis_maitre()) nb_rows_petsc = nb_rows_tot - (petsc_nb_cpus_ - 1) * nb_rows_petsc;
3068 if (petsc_cpus_selection_==1)
3071 if (
Process::me() >= petsc_nb_cpus_) nb_rows_petsc = 0;
3073 else if (petsc_cpus_selection_==2)
3076 if (
Process::me() % petsc_nb_cpus_ != 0) nb_rows_petsc = 0;
3080 Cerr <<
"Error: petsc_cpus_selection_=" << petsc_cpus_selection_ << finl;
3083 return nb_rows_petsc;
3087void Solv_Petsc::Create_MatricePetsc(Mat& MatricePetsc,
int mataij,
const Matrice_Morse& mat_morse)
3093 assert(!sub_type(Matrice_Morse_Sym, mat_morse));
3098 Journal() <<
"renum_=" << finl;
3100 Journal() <<
"items_to_keep_=" << finl;
3108 MatCreate(PETSC_COMM_WORLD, &MatricePetsc);
3111 else if (petsc_cpus_selection_)
3113 PetscInt nb_rows_petsc = compute_nb_rows_petsc(
nb_rows_tot_);
3114 Journal() <<
"Process " <<
Process::me() <<
" has " << nb_rows_petsc <<
" rows of the matrix PETSc." << finl;
3115 MatSetSizes(MatricePetsc, nb_rows_petsc, nb_rows_petsc, PETSC_DECIDE, PETSC_DECIDE);
3126 MatSetType(MatricePetsc, MATSBAIJ);
3131 MatType mtype = MATAIJ;
3135 if (getenv(
"PETSC_USE_KOKKOS")!=
nullptr)
3136 mtype = MATAIJKOKKOS;
3138#ifdef PETSC_HAVE_CUDA
3139 mtype = MATAIJCUSPARSE;
3141#ifdef PETSC_HAVE_HIP
3142 mtype = MATAIJHIPSPARSE;
3146 MatSetType(MatricePetsc, mtype);
3150 MatSetOptionsPrefix(MatricePetsc, option_prefix_);
3151 MatSetFromOptions(MatricePetsc);
3154 MatGetType(MatricePetsc, &mat_type);
3155 if (strcmp(mat_type, MATSEQAIJ) == 0 || strcmp(mat_type, MATMPIAIJ) == 0) mataij = 1;
3161 bool use_coo = mat_morse.
get_coeff().isDataOnDevice() &&
gpu_;
3164 if (
verbose) Cout <<
"[Petsc] Using COO to preallocate the matrix on the device." << finl;
3168 const auto& tab1 = mat_morse.
get_tab1();
3169 const auto& tab2 = mat_morse.
get_tab2();
3170 const int n = tab1.size_array() - 1;
3176 PetscMalloc2(nnz, &coo_i, nnz, &coo_j);
3177 ArrOfTID& renum_array =
renum_;
3178 int ligne_locale = 0;
3180 for (
int i = 0; i < n; i++)
3184 const auto k0 = tab1[i] - 1;
3185 const auto k1 = tab1[i + 1] - 1;
3186 for (
auto k = k0; k < k1; k++)
3188 const int colonne_locale = tab2[k] - 1;
3190 const PetscInt colonne_globale = renum_array[colonne_locale];
3191 coo_i[nnz] = ligne_globale;
3192 coo_j[nnz] = colonne_globale;
3198 MatSetPreallocationCOO(MatricePetsc, nnz, coo_i, coo_j);
3199 PetscFree2(coo_i, coo_j);
3206 ArrOfTID& renum_array =
renum_;
3209 const auto& tab1 = mat_morse.
get_tab1();
3210 const auto& tab2 = mat_morse.
get_tab2();
3212 const int n = tab1.size_array() - 1;
3213 for (
int i = 0; i < n; i++)
3217 const auto k0 = tab1[i] - 1;
3218 const auto k1 = tab1[i + 1] - 1;
3220 for (
auto k = k0; k < k1; k++)
3222 const int colonne_locale = tab2[k] - 1;
3223 const PetscInt colonne_globale = renum_array[colonne_locale];
3224 if (colonne_globale >= premiere_colonne_globale && colonne_globale < derniere_colonne_globale)
3234 Journal() <<
"nnz=" << nnz << finl;
3235 Journal() <<
"d_nnz=" << d_nnz << finl;
3236 Journal() <<
"o_nnz=" << o_nnz << finl;
3241 if (different_partition_)
3245 int nz =
Process::mp_max((nnz.size_array() == 0 ? 0 : (
int) max_array(
3247 MatSeqSBAIJSetPreallocation(MatricePetsc, block_size_, nz, PETSC_NULLPTR);
3248 MatMPISBAIJSetPreallocation(MatricePetsc, block_size_, nz, PETSC_NULLPTR, nz, PETSC_NULLPTR);
3252 MatSeqSBAIJSetPreallocation(MatricePetsc, block_size_, PETSC_DEFAULT, nnz.addr());
3254 MatMPISBAIJSetPreallocation(MatricePetsc, block_size_, (
nb_rows_ == 0 ? 0 : PETSC_DEFAULT), d_nnz.addr(),
3255 (
nb_rows_ == 0 ? 0 : PETSC_DEFAULT), o_nnz.addr());
3260 if (different_partition_)
3264 int nz =
Process::mp_max((nnz.size_array() == 0 ? 0 : (
int) max_array(
3266 MatSeqAIJSetPreallocation(MatricePetsc, nz, PETSC_NULLPTR);
3267 MatMPIAIJSetPreallocation(MatricePetsc, nz, PETSC_NULLPTR, nz, PETSC_NULLPTR);
3271 MatSeqAIJSetPreallocation(MatricePetsc, PETSC_DEFAULT, nnz.addr());
3273 MatMPIAIJSetPreallocation(MatricePetsc, (
nb_rows_ == 0 ? 0 : PETSC_DEFAULT), d_nnz.addr(),
3274 (
nb_rows_ == 0 ? 0 : PETSC_DEFAULT), o_nnz.addr());
3283 MatSetOption(MatricePetsc, MAT_IGNORE_ZERO_ENTRIES, PETSC_FALSE);
3286 MatSetOption(MatricePetsc, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE);
3289 ArrOfDouble nonzeros(2);
3291 nonzeros[1] = (double)mat_morse.
nb_coeff();
3292 for (
int i = 0; i < nonzeros[1]; i++)
3296 if (nonzeros[1] > 0)
3298 double ratio = 1 - (double) nonzeros[0] / (
double) nonzeros[1];
3300 Cout <<
"Warning! Trust matrix contains a lot of useless stored zeros: " << (int) (ratio * 100)
3301 <<
"% (" << nonzeros[1] - nonzeros[0] <<
"/" << nonzeros[1] <<
")" << finl;
3303 int zero_discarded = (int) (std::lrint(nonzeros[1] - nonzeros[0]));
3305 Cout <<
"[Petsc] Discarding " << zero_discarded
3306 <<
" zeros from TRUST matrix into the PETSc matrix ..." << finl;
3311 MatSetOption(MatricePetsc, MAT_NEW_NONZERO_ALLOCATION_ERR,
allow_realloc_ ? PETSC_FALSE : PETSC_TRUE);
3315 MatSetOption(MatricePetsc, MAT_USE_HASH_TABLE, PETSC_TRUE);
3316 if (
verbose) Cout <<
"[Petsc] Time to create the matrix: \t" << statistics().
compute_time(start) << finl;
3319 Solv_Petsc::Update_matrix(MatricePetsc, mat_morse);
3325 MatOrderingType ordering = MATORDERINGRCM;
3326 MatGetOrdering(MatricePetsc, ordering, &rowperm, &colperm);
3327 ISInvertPermutation(rowperm, PETSC_DECIDE, &inv_rowperm);
3328 ISInvertPermutation(colperm, PETSC_DECIDE, &inv_colperm);
3329 MatPermute(MatricePetsc, rowperm, colperm, &Aperm);
3330 MatDestroy(&MatricePetsc);
3331 MatricePetsc = Aperm;
3335void Solv_Petsc::Update_matrix(Mat& MatricePetsc,
const Matrice_Morse& mat_morse)
3345 bool use_coo = mat_morse.
get_coeff().isDataOnDevice() &&
gpu_;
3348 if (
verbose) Cout <<
"[Petsc] Using COO to fill the matrix on the device." << finl;
3350 const auto& tab_coeff = mat_morse.
get_coeff();
3352 DoubleTrav tab_v(nnz);
3353 CDoubleArrView coeff = tab_coeff.view_ro();
3354 CIntArrView indice = tab_indice.view_ro();
3355 DoubleArrView v =
static_cast<ArrOfDouble&
>(tab_v).view_wo();
3356 Kokkos::parallel_for(start_gpu_timer(__KERNEL_NAME__), nnz, KOKKOS_LAMBDA(
const int i)
3358 v[i] = coeff[indice[i]];
3360 end_gpu_timer(__KERNEL_NAME__);
3361 MatSetValuesCOO(MatricePetsc, v.data(), INSERT_VALUES);
3371 ArrOfTID& renum_array =
renum_;
3372 const auto& tab1 = mat_morse.
get_tab1();
3373 const auto& tab2 = mat_morse.
get_tab2();
3375 for (
int i = 0; i < tab1.size_array() - 1; i++)
3378 nnz[cpt] = (int)(tab1[i + 1] - tab1[i]);
3382 int size = (
nb_rows_ == 0 ? 0 : max_array(nnz));
3383 ArrOfDouble coeff_tmp(size);
3384 ArrOfPetscInt tab2_tmp(size);
3385 const auto& coeff = mat_morse.
get_coeff();
3387 const int n = tab1.size_array() - 1;
3388 for (
int i = 0; i < n; i++)
3394 const auto k0 = tab1[i] - 1;
3395 const auto k1 = tab1[i + 1] - 1;
3396 for (
auto k = k0; k < k1; k++)
3399 coeff_tmp[ncol] = coeff[k];
3400 tab2_tmp[ncol] = renum_array[tab2[k] - 1];
3406 Journal() << (int) ligne_globale <<
" ";
3407 for (
int j = 0; j < ncol; j++)
Journal() << coeff_tmp[j] <<
" ";
3412 MatSetValues(MatricePetsc, 1, &ligne_globale, ncol, tab2_tmp.addr(), coeff_tmp.addr(),
3418 Cerr <<
"We detect that the PETSc matrix coefficients are changed without pre-allocation." << finl;
3419 Cerr <<
"Try one of the following option:" << finl;
3420 Cerr <<
"- Rebuild the matrix each time instead of updating the coefficients (slower)." << finl;
3421 Cerr <<
"enable_allocation : Enable re-allocation of coefficients (slow)." << finl;
3422 Cerr <<
"- Discard new coefficients (risk!)" << finl;
3423 Cerr <<
"Try the two options and select the costly one." << finl;
3432 MatAssemblyBegin(MatricePetsc, MAT_FINAL_ASSEMBLY);
3433 MatAssemblyEnd(MatricePetsc, MAT_FINAL_ASSEMBLY);
3440 MatPermute(MatricePetsc, rowperm, colperm, &Aperm);
3441 MatDestroy(&MatricePetsc);
3442 MatricePetsc = Aperm;
3449 PetscBool IsSymmetric;
3450 MatIsSymmetric(MatricePetsc, 0.0, &IsSymmetric);
3451 if (IsSymmetric &&
limpr() >= 0 && !
amgx_) Cerr <<
"Warning: The PETSc matrix is aij but is symmetric. May be use sbaij ?" << finl;
3456 MatSetOption(MatricePetsc, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);
3466 if (
verbose) Cout <<
"[Petsc] Time to fill the matrix: \t" << statistics().
compute_time(start) << finl;
3469bool Solv_Petsc::detect_new_stencil(
const Matrice_Morse& mat_morse)
3475 MatPermute(MatricePetsc_, inv_rowperm, inv_colperm, &Aperm);
3476 MatDestroy(&MatricePetsc_);
3477 MatricePetsc_ = Aperm;
3493 PetscInt nRowsLocal;
3494 const PetscInt *colIndices =
nullptr, *rowOffsets =
nullptr;
3498 localA = MatricePetsc_;
3503 MatMPIAIJGetLocalMat(MatricePetsc_, MAT_INITIAL_MATRIX, &localA);
3505 MatGetRowIJ(localA, 0, PETSC_FALSE, PETSC_FALSE, &nRowsLocal, &rowOffsets, &colIndices, &done);
3507 const auto& tab1 = mat_morse.
get_tab1();
3508 const auto& tab2 = mat_morse.
get_tab2();
3509 const auto& coeff = mat_morse.
get_coeff();
3510 const ArrOfTID& renum_array =
renum_;
3513 const int n = tab1.size_array() - 1;
3514 for (
int i = 0; i < n; i++)
3519 const auto k0 = tab1[i] - 1;
3520 const auto k1 = tab1[i + 1] - 1;
3523 for (
auto k = k0; k < k1; k++)
3524 if (coeff[k] != 0) nnz_row++;
3527 nnz_row += (int)(k1 - k0);
3528 const PetscInt kk0 = rowOffsets[RowLocal];
3529 const PetscInt kk1 = rowOffsets[RowLocal + 1];
3530 if (nnz_row != (
int)(kk1 - kk0))
3538 for (
auto k = k0; k < k1; k++)
3543 const PetscInt col = renum_array[tab2[k] - 1];
3545 for (PetscInt kk = kk0; kk < kk1; kk++)
3547 if (colIndices[kk] == col)
3555 Journal() <<
"Provisoire: mat_morse(" << RowGlobal <<
"," << col <<
")!=0 new " << finl;
3565 MatRestoreRowIJ(localA, 0, PETSC_FALSE, PETSC_FALSE, &nRowsLocal, &rowOffsets, &colIndices, &done);
3567 new_stencil =
mp_max(new_stencil);
3569 if (
verbose) Cout <<
"[Petsc] Time to check stencil: \t" << statistics().
compute_time(start) << finl;
: Classe Comm_Group_MPI, derivee de la classe abstraite Comm_Group.
Une entree dont la source est une chaine de caracteres.
Fichier en lecture Cette classe est a la classe C++ ifstream ce que la classe Entree est a la.
ifstream & get_ifstream()
Class defining operators and methods for all reading operation in an input flow (file,...
virtual int nb_items_seq_local() const
Nom get_name(int i) const
int get_shape(int i) const
const MD_Vector & get_desc_part(int i) const
const MD_Vector_base & valeur() const
Classe Matrice_Base Classe de base de la hierarchie des matrices.
virtual DoubleVect & ajouter_multvect(const DoubleVect &x, DoubleVect &r) const
Operation de multiplication-accumulation (saxpy) matrice vecteur.
Classe Matrice_Morse_Sym Represente une matrice M (creuse) symetrique stockee au format Morse.
Classe Matrice_Morse Represente une matrice M (creuse), non necessairement carree.
const auto & get_tab2() const
Sortie & imprimer_formatte(Sortie &s) const override
bool & constant_stencil() const
const auto & get_tab1() const
int nb_colonnes() const override
Return local number of columns (=size on the current proc).
const auto & get_coeff() const
int nb_lignes() const override
Return local number of lines (=size on the current proc).
Une chaine de caractere (Nom) en majuscules.
Un tableau d'objets de la classe Motcle.
int search(const Motcle &t) const
class Nom Une chaine de caractere pour nommer les objets de TRUST
virtual int debute_par(const char *const n) const
int longueur() const
Renvoie le nombre de caracteres de la chaine du Nom y compris le caractere zero de fin de chaine.
Nom & prefix(const char *const)
const std::string & getString() const
static const Type_info * info()
Donne des informations sur le type de l'Objet_U.
static bool disable_TU
Flag to disable or not the writing of the .TU files.
virtual Entree & readOn(Entree &)
Lecture d'un Objet_U sur un flot d'entree Methode a surcharger.
static const Nom & nom_du_cas()
Renvoie une reference constante vers le nom du cas.
virtual Sortie & printOn(Sortie &) const
Ecriture de l'objet sur un flot de sortie Methode a surcharger.
static const int & get_nb_groups()
static const Comm_Group & current_group()
renvoie une reference au groupe de processeurs actif courant
std::chrono::time_point< clock > time_point
void begin_count(const STD_COUNTERS &std_cnt, int counter_lvl=-100000)
double get_time_since_last_open(const STD_COUNTERS &name)
Give as a double the time (in second) elapsed in the operation tracked by the standard counter call n...
double compute_time(time_point start)
return time since start in seconds
time_point start_clock()
Start a clock, return a time_point, not a double.
void end_count(const std::string &custom_count_name, int count_increment=1, long int quantity_increment=0)
End the count of a counter and update the counter values.
static double mp_max(double)
static void mp_sum_for_each_item(TRUSTArray< _TYPE_ > &x, int n=-1)
static bool is_parallel()
static Sortie & Journal(int message_level=0)
Renvoie un objet statique de type Sortie qui sert de journal d'evenements.
static int nproc()
renvoie le nombre de processeurs dans le groupe courant Voir Comm_Group::nproc() et PE_Groups::curren...
static int me()
renvoie mon rang dans le groupe de communication courant.
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
static int je_suis_maitre()
renvoie 1 si on est sur le processeur maitre du groupe courant (c'est a dire me() == 0),...
static bool is_sequential()
Cette classe est a la classe C++ ofstream ce que la classe Sortie est a la classe C++ ostream Elle re...
public_for_cuda void Update_lhs_rhs(const DoubleVect &b, DoubleVect &x)
void Create_lhs_rhs_onDevice()
const ArrOfInt & indice_coeff_to_keep(const Matrice_Morse &)
void construit_matrice_morse_intermediaire(const Matrice_Base &, Matrice_Morse &)
void Update_solution(DoubleVect &x)
bool mat_ignore_zero_entries_
static public_for_cuda int instance
int reuse_preconditioner_nb_it_max_
static int numero_solveur
int resoudre_systeme(const Matrice_Base &, const DoubleVect &, DoubleVect &) override
void set_reuse_preconditioner(bool flag)
const Nom & get_chaine_lue() const
bool reuse_preconditioner()
bool nouvelle_matrice() const
void nommer(const Nom &nom) override
Donne un nom a l'Objet_U Methode virtuelle a surcharger.
void set_save_matrix(int flag)
void set_read_matrix(bool flag)
void fixer_nouvelle_matrice(bool i)
Classe de base des flux de sortie.
virtual void precision(int)
_SIZE_ size_array() const
bool isDataOnDevice() const
virtual const MD_Vector & get_md_vector() const
virtual void echange_espace_virtuel(IsExchangeBlocking exchange_type=IsExchangeBlocking::DefaultBlocking, const std::string kernel_name="noname")