next/Solv__GCP_8cpp_source.html

/****************************************************************************

* Copyright (c) 2026, CEA

* All rights reserved.

*

* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

*

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*

*****************************************************************************/


#include <Solv_GCP.h>

#include <SSOR.h>

#include <Param.h>

#include <Matrice_Bloc_Sym.h>

#include <Sparskit.h>

#include <MD_Vector_base.h>

#include <MD_Vector_tools.h>

#include <communications.h>

#include <TRUSTTab_parts.h>

#include <Perf_counters.h>


Implemente_instanciable_sans_constructeur(Solv_GCP,"Solv_GCP",solv_iteratif);

// XD solv_gcp solveur_sys_base gcp BRACE Preconditioned conjugated gradient.


Solv_GCP::Solv_GCP()

{

  seuil_=1.e-12;

}


Sortie& Solv_GCP::printOn(Sortie& s ) const

{

  s<<" { seuil " << seuil_ ;

  if (le_precond_)

    s<<" precond " <<le_precond_;

  else

    s<<" precond_nul ";

  if (limpr()==1) s<<" impr ";

  if (limpr()==-1) s<<" quiet ";

  if (save_matrice_) s<<" save_matrice " << save_matrice_;

  if (nb_it_max_!=-1) s<<" nb_it_max "<<nb_it_max_;


  s<<" } ";

  return s ;

}


Entree& Solv_GCP::readOn(Entree& is )

{

  bool precond_nul = false;

  bool impr = false;

  bool quiet = false;

  Param param((*this).que_suis_je());

  param.ajouter("seuil",&seuil_,Param::REQUIRED);  // XD attr seuil floattant seuil REQ Value of the final residue. The

  // XD_CONT gradient ceases iteration when the Euclidean residue standard ||Ax-B|| is less than this value.

  param.ajouter("nb_it_max",&nb_it_max_); // XD attr nb_it_max entier nb_it_max OPT Keyword to set the maximum

  // XD_CONT iterations number for the Gcp.

  param.ajouter_flag("impr",&impr);   // XD attr impr rien impr OPT Keyword which is used to request display of the

  // XD_CONT Euclidean residue standard each time this iterates through the conjugated gradient (display to the standard

  // XD_CONT outlet).

  param.ajouter_flag("quiet",&quiet); // XD attr quiet rien quiet OPT To not displaying any outputs of the solver.

  param.ajouter("save_matrice|save_matrix",&save_matrice_); // XD attr save_matrice|save_matrix entier save_matrice OPT

  // XD_CONT to save the matrix in a file.

  param.ajouter("precond",&le_precond_);  // XD attr precond precond_base precond OPT Keyword to define system

  // XD_CONT preconditioning in order to accelerate resolution by the conjugated gradient. Many parallel preconditioning

  // XD_CONT methods are not equivalent to their sequential counterpart, and you should therefore expect differences,

  // XD_CONT especially when you select a high value of the final residue (seuil). The result depends on the number of

  // XD_CONT processors and on the mesh splitting. It is sometimes useful to run the solver with no preconditioning at

  // XD_CONT all. In particular: NL2 - when the solver does not converge during initial projection, NL2 - when comparing

  // XD_CONT sequential and parallel computations. NL2 With no preconditioning, except in some particular cases (no open

  // XD_CONT boundary), the sequential and the parallel computations should provide exactly the same results within fpu

  // XD_CONT accuracy. If not, there might be a coding error or the system of equations is singular.

  param.ajouter_flag("precond_nul",&precond_nul);  // XD attr precond_nul rien precond_nul OPT Keyword to not use a

  // XD_CONT preconditioning method.

  param.ajouter_flag("precond_diagonal", &precond_diag_); // XD attr precond_diagonal rien precond_diagonal OPT Keyword

  // XD_CONT to use diagonal preconditioning.

  param.ajouter_flag("optimized", &optimized_);  // XD attr optimized rien optimized OPT This keyword triggers a memory

  // XD_CONT and network optimized algorithms useful for strong scaling (when computing less than 100 000 elements per

  // XD_CONT processor). The matrix and the vectors are duplicated, common items removed and only virtual items really

  // XD_CONT used in the matrix are exchanged.NL2 Warning: this is experimental and known to fail in some VEF

  // XD_CONT computations (L2 projection step will not converge). Works well in VDF.

  param.lire_avec_accolades_depuis(is);

  // A preconditioner must be defined

  if (!le_precond_ && precond_nul==0 && precond_diag_==0)

    {

      Cerr << "You forgot to define a preconditionner with the keyword precond." << finl;

      Cerr << "If you don't want a preconditionner, add for the solver definition:" << finl;

      Cerr << "precond_nul" << finl;

      Process::exit();

    }

  if (precond_nul)

    {

      le_precond_.detach();

    }

  assert(seuil_>0);

  if (impr and quiet)

    {

      Cerr << "'impr' and 'quiet' keywords in Solv_GCP are not compatible. Use only one of them." << finl;

      Process::exit();

    }

  else if (impr) { fixer_limpr(1); }

  else if (quiet) { fixer_limpr(-1); }

  else { fixer_limpr(0);}


  return is;

}


int Solv_GCP::resoudre_systeme(const Matrice_Base& matrice, const DoubleVect& secmem, DoubleVect& solution)

{

  statistics().end_count(STD_COUNTERS::system_solver,-1,0);

  int n = resoudre_(matrice, secmem, solution, 100);

  statistics().begin_count(STD_COUNTERS::system_solver,statistics().get_last_opened_counter_level()+1);

  return n;

}


int Solv_GCP::resoudre_systeme(const Matrice_Base& matrice, const DoubleVect& secmem, DoubleVect& solution,

                               int nmax)

{

  statistics().end_count(STD_COUNTERS::system_solver,-1,0);

  int n = resoudre_(matrice, secmem, solution, nmax);

  statistics().begin_count(STD_COUNTERS::system_solver,statistics().get_last_opened_counter_level()+1);

  return n;

}


void Solv_GCP::reinit()

{

  if (reinit_ > 1) // If reinit_ = 0, do not touch.

    reinit_ = 1;

  SolveurSys_base::reinit();

  if (le_precond_)

    le_precond_->reinit();

}


void Solv_GCP::prepare_data(const Matrice_Base& matrice, const DoubleVect& secmem, DoubleVect& solution)

{

  if (reinit_ == 0)

    {

      // Rebuild the entire structure (index arrays and coefficients)


      if (secmem.line_size() != 1)

        {

          Cerr << "Error line_size>1 not coded (GCP)" << finl;

          exit();

        }


      const Matrice_Bloc& mat_bloc = ref_cast(Matrice_Bloc, matrice);

      const Matrice_Morse_Sym& mat = ref_cast(Matrice_Morse_Sym, mat_bloc.get_bloc(0,0).valeur());

      const Matrice_Morse& mat_virt = ref_cast(Matrice_Morse, mat_bloc.get_bloc(0,1).valeur());


      // Determine the number of items actually used:

      {

        const int sztot_source = secmem.size_array();

        const int sz = secmem.size();

        renum_.reset();

        renum_.resize(sztot_source, RESIZE_OPTIONS::NOCOPY_NOINIT);

        renum_ = 0;

        // Remove virtual items

        int i;

        for (i = sz; i < sztot_source; i++)

          renum_[i] = -1;

        renum_.set_md_vector(secmem.get_md_vector());

        const auto& tab2 = mat_virt.get_tab2();

        const auto n = tab2.size_array();

        for (i = 0; i < n; i++)

          {

            // Note: tab2 of the real-virtual part contains indices

            //  relative to the start of the virtual part (hence "+ sz")

            const int j = tab2[i]-1 + sz; // fortran -> c

            renum_[j] = 0;

          }

      }

      // Determine the number of non-empty rows of mat_virt

      int nb_lignes_mat_virt = 0;

      {

        const int n = mat_virt.get_tab1().size_array() - 1;

        for (int i = 0; i < n; i++)

          if (mat_virt.get_tab1()(i+1) - mat_virt.get_tab1()(i) > 0)

            nb_lignes_mat_virt++;

      }


      // Descripteur contenant uniquement les items utiles:

      MD_Vector md;

      MD_Vector_tools::creer_md_vect_renum_auto(renum_, md);

      const int sz_tot = md->get_nb_items_tot();

      const int sz = md->get_nb_items_reels();


      // Compute the required memory size:

      int mem_size = 0;

      mem_size += sz_tot * (int)sizeof(double); //  vecteurs avec espace virtuel (tmp_p_)

      mem_size += sz * (int)sizeof(double) * 3; // vecteurs sans espace virtuel

      const int nb_lignes_mat = sz;

      // matrice reel/reel

      auto nnz_reel_reel(mat.get_tab1()(0));

      nnz_reel_reel = 0;

      mem_size += (sz + 1) * (int)sizeof(nnz_reel_reel); // pour tab1_

      assert(mat.get_tab1().size_array() == sz + 1);

      assert(mat.get_tab2().size_array() == mat.get_coeff().size_array());

      if (! precond_diag_)

        {

          nnz_reel_reel = mat.get_coeff().size_array();

        }

      else

        {

          // Diagonal coefficients are not stored:

          nnz_reel_reel = mat.get_coeff().size_array() - sz;

        }

      mem_size += (int)(nnz_reel_reel * (int)sizeof(double)); // for the coefficients

      mem_size += (int)(nnz_reel_reel * (int)sizeof(int)); // for the indices

      // real/virtual matrix

      mem_size += (nb_lignes_mat_virt+1) * (int)sizeof(nnz_reel_reel); // for tab1_

      const auto nnz_reel_virtuel = mat_virt.get_coeff().size_array();

      assert(mat_virt.get_tab2().size_array() == nnz_reel_virtuel);

      mem_size += (int)(nnz_reel_virtuel * (int)sizeof(double)); // for the coefficients

      mem_size += (int)(nnz_reel_virtuel * (int)sizeof(int)); // for the indices

      // size of tmp_mat_virt_.lignes_non_vides_

      mem_size += nb_lignes_mat_virt * (int)sizeof(int);

      // align size to a multiple of 8

      if (mem_size % 8 != 0)

        mem_size = (mem_size/8+1)*8;


      // Allocate arrays:

      // (double arrays first, then integer arrays at the end;

      //  otherwise padding would be needed to realign doubles after integers)

      //

      Journal() << "Solv_GCP::prepare allocating data chunk : " << mem_size << " bytes" << finl;

      tmp_data_block_.resize_array(mem_size/8, RESIZE_OPTIONS::NOCOPY_NOINIT);


      double *ptr = tmp_data_block_.addr();

      resu_.ref_data(ptr, sz);

      ptr += sz;

      residu_.ref_data(ptr, sz);

      ptr += sz;

      tmp_p_avec_items_virt_.ref_data(ptr, sz_tot); // avec espace virtuel

      tmp_p_avec_items_virt_.set_md_vector(md);

      // tmp_p_ points to the same memory region:

      tmp_p_.ref_data(ptr, sz); // sans l'espace virtuel

      ptr += sz_tot;

      tmp_solution_.ref_data(ptr, sz);

      ptr += sz;

      // Allocate arrays for the matrices:

      tmp_mat_.get_set_coeff().ref_data(ptr, nnz_reel_reel);

      ptr += nnz_reel_reel;

      tmp_mat_virt_.get_set_coeff().ref_data(ptr, nnz_reel_virtuel);

      ptr += nnz_reel_virtuel;

      // Done with doubles; now switch to integer arrays:

      // tab1_ stores trustIdType values, tab2_ stores int values

      using tab1_ptr_t = decltype(tmp_mat_.get_set_tab1().addr());

      auto * tidptr = static_cast<tab1_ptr_t>(static_cast<void*>(ptr));

      tmp_mat_.get_set_tab1().ref_data(tidptr, nb_lignes_mat + 1);

      tidptr += nb_lignes_mat + 1;

      tmp_mat_virt_.get_set_tab1().ref_data(tidptr, nb_lignes_mat_virt + 1);

      tidptr += nb_lignes_mat_virt + 1;

      int * iptr = (int*)tidptr;

      tmp_mat_.get_set_tab2().ref_data(iptr, (int)nnz_reel_reel);

      iptr += nnz_reel_reel;

      tmp_mat_virt_.lignes_non_vides_.ref_data(iptr, nb_lignes_mat_virt);

      iptr += nb_lignes_mat_virt;

      tmp_mat_virt_.get_set_tab2().ref_data(iptr, (int)nnz_reel_virtuel);

      iptr += nnz_reel_virtuel;

      // Allocation complete.

      assert(((char*)iptr) <= ((char*)tmp_data_block_.addr() + mem_size));


      // Fill index arrays (tab1_, tab2_ and lignes_non_vides_)

      if (! precond_diag_)

        {

          tmp_mat_.get_set_tab1().inject_array(mat.get_tab1());

          {

            // fill tab2 (with optional renumbering)

            for (auto i = 0; i < nnz_reel_reel; i++)

              {

                int j = mat.get_tab2()(i)-1; // fortran->c

                int rj = renum_[j];

                assert(rj < sz_tot);

                tmp_mat_.get_set_tab2()(i) = rj+1; // c->fortran

              }

          }

          tmp_mat_.set_nb_columns( sz_tot );

        }

      else

        {

          // Build the matrix D^(-1/2) * A * D^(-1/2)

          // diagonal coefficients are not stored

          // Filling tab1_ is non-trivial, hence:

          {

            auto src_index = 0; // index dans mat.tab2_ et coeff_

            auto dest_index = 0; // index dans tmp_mat_.tab2_ et coeff_

            int i_ligne;

            for (i_ligne = 0; i_ligne < nb_lignes_mat; i_ligne++)

              {

                // Each row has one fewer coefficient than in the original matrix

                // (the diagonal coefficient is not stored)

                tmp_mat_.get_set_tab1()(i_ligne) = dest_index + 1; // indice fortran du debut de ligne

                const int ncoeff = (int)(mat.get_tab1()(i_ligne+1) - mat.get_tab1()(i_ligne) - 1);

                // Do not insert the diagonal coefficient

                assert(mat.get_tab2()(src_index) == i_ligne + 1); // index fortran

                src_index++;

                // Insert the remaining coefficients:

                for (int i = 0; i < ncoeff; i++, src_index++, dest_index++)

                  tmp_mat_.get_set_tab2()(dest_index) = mat.get_tab2()(src_index);


              }

            // End of the last row:

            tmp_mat_.get_set_tab1()(i_ligne) = dest_index + 1; // indice fortran du debut de ligne

          }

          tmp_mat_.set_nb_columns( sz_tot );

        }

      // fill tmp_mat_virt_

      {

        tmp_mat_virt_.get_set_tab1()[0] = 1;

        int i_ligne_dest = 0;

        int dest_index = 0;

        for (int i_ligne = 0; i_ligne < nb_lignes_mat; i_ligne++)

          {

            const int count = (int)(mat_virt.get_tab1()(i_ligne+1) - mat_virt.get_tab1()(i_ligne));

            if (count > 0)

              {

                tmp_mat_virt_.lignes_non_vides_[i_ligne_dest] = i_ligne + 1; // indice fortran

                tmp_mat_virt_.get_set_tab1()[i_ligne_dest] = dest_index + 1; // index fortran

                i_ligne_dest++;

                auto src_index = mat_virt.get_tab1()(i_ligne) - 1; // fortran->c

                for (int i = 0; i < count; i++, src_index++, dest_index++)

                  {

                    // mat_virt contient des indices fortran relatifs au debut de la partie virtuelle,

                    // on transform en indice C, relatif au vecteur complet

                    int j = mat_virt.get_tab2()(src_index) + sz - 1;

                    int rj = renum_[j];

                    // on stocke dans tmp_mat_virt des indices de colonnes relatifs au vecteur complet

                    // (pas seulement la partie virtuelle)

                    tmp_mat_virt_.get_set_tab2()[dest_index] = rj + 1; // indice fortran

                  }

              }

          }

        // Fin de la derniere ligne:

        tmp_mat_virt_.get_set_tab1()[i_ligne_dest] = dest_index + 1; // index fortran

      }

      reinit_ = 1;

    }


  if (reinit_ < 2)

    {

      const Matrice_Bloc& mat_bloc = ref_cast(Matrice_Bloc, matrice);

      const Matrice_Morse_Sym& mat = ref_cast(Matrice_Morse_Sym, mat_bloc.get_bloc(0,0).valeur());

      const Matrice_Morse& mat_virt = ref_cast(Matrice_Morse, mat_bloc.get_bloc(0,1).valeur());

      if (!precond_diag_)

        {

          tmp_mat_.get_set_coeff() = mat.get_coeff();

          tmp_mat_virt_.get_set_coeff() = mat_virt.get_coeff();

        }

      else

        {

          // compute D^(-1/2)

          exit();

          // compute the product D^(-1/2) * A * D^(-1/2)


        }

      reinit_ = 2;

    }


  if (!precond_diag_)

    {

      tmp_solution_.inject_array(solution, tmp_solution_.size());

      resu_.inject_array(secmem, resu_.size_array());

    }

  else

    {

      exit();

    }

}


// Compute vx = vx * alpha - vy

static void multiply_sub(DoubleVect& vx, DoubleVect& vy, double alpha)

{

  int n = vx.size_reelle_ok() ? vx.size() : vx.size_totale();

  assert(vy.size_totale() >= n);

  double *x_ptr = vx.addr();

  double *y_ptr = vy.addr();

  for (n = n - 1; n > 0; n -= 2, x_ptr += 2, y_ptr += 2)

    {

      double a = x_ptr[0] * alpha - y_ptr[0];

      double b = x_ptr[1] * alpha - y_ptr[1];

      x_ptr[0] = a;

      x_ptr[1] = b;

    }

  // was n odd to start with?

  if (n == 0)

    x_ptr[0] = x_ptr[0] * alpha - y_ptr[0];

}


// Compute vx += alpha * vy

// Return value: local sum on this processor of vx[i]*vx[i] (after addition)

// Warning: not implemented for shared items

static double ajoute_alpha_v_norme(DoubleVect& vx, double alpha, DoubleVect& vy)

{

  int n = vx.size();

  assert(vy.size() == n);

  double *x_ptr = vx.addr();

  double *y_ptr = vy.addr();

  double norme1 = 0., norme2 = 0.;

  for (n = n - 1; n > 0; n -= 2, x_ptr += 2, y_ptr += 2)

    {

      double a = x_ptr[0] + alpha * y_ptr[0];

      double b = x_ptr[1] + alpha * y_ptr[1];

      x_ptr[0] = a;

      x_ptr[1] = b;

      norme1 += a * a;

      norme2 += b * b;

    }

  // was n odd to start with?

  if (n == 0)

    {

      double a = x_ptr[0] + alpha * y_ptr[0];

      x_ptr[0] = a;

      norme1 += a * a;

    }

  return norme1 + norme2;

}


int Solv_GCP::resoudre_(const Matrice_Base& matrice,

                        const DoubleVect& secmem,

                        DoubleVect& solution,

                        int nmax)

{

  const int n_items_reels = solution.size_reelle_ok() ? solution.size_reelle() : solution.size_totale();

  {

    const auto nb_items_seq = solution.get_md_vector()->nb_items_seq_tot();

    const int ls = secmem.line_size();

    const auto nb_inco_tot = nb_items_seq * ls;

    auto nmax0 = std::max(nb_inco_tot, (trustIdType)nmax);

    auto nmaxmax = 10000000;

    nmax = static_cast<int>(std::min<trustIdType>(nmax0, nmaxmax));

  }


  const int avec_precond = bool(le_precond_);

  const int precond_requires_echange_espace_virtuel =

    avec_precond && (le_precond_->get_flag_updated_input());


  const int optimized = optimized_;


  if (optimized)

    {

      prepare_data(matrice, secmem, solution);

    }

  else

    {

      resu_.reset();

      residu_.reset();

      tmp_p_avec_items_virt_.reset();

      resu_.copy(solution, RESIZE_OPTIONS::NOCOPY_NOINIT);

      residu_.copy(solution, RESIZE_OPTIONS::NOCOPY_NOINIT);

      tmp_p_avec_items_virt_.copy(solution, RESIZE_OPTIONS::NOCOPY_NOINIT);

      tmp_p_.ref(tmp_p_avec_items_virt_);

      tmp_solution_.ref(solution);

      resu_.inject_array(secmem);

    }


  tmp_p_avec_items_virt_.inject_array(tmp_solution_, n_items_reels);

  tmp_p_avec_items_virt_.echange_espace_virtuel();

  // residu does not need an up-to-date virtual space

  // but tmp_p_ does need its virtual space to be up to date...

  if (optimized)

    {

      tmp_mat_.multvect_(tmp_p_avec_items_virt_, residu_);

      // compute the product; the scalar product is not used:

      tmp_mat_virt_.ajouter_mult_vect_et_prodscal(tmp_p_avec_items_virt_, residu_);

    }

  else

    {

      matrice.multvect(tmp_p_avec_items_virt_, residu_);

    }

  // WARNING: it is assumed that secmem has been copied into resu_

  operator_sub(residu_, resu_, VECT_REAL_ITEMS); // ne pas toucher a l'espace virtuel


  if (avec_precond)

    {

      if (precond_requires_echange_espace_virtuel)

        residu_.echange_espace_virtuel();


      if (optimized)

        le_precond_->preconditionner(tmp_mat_, residu_, tmp_p_);

      else

        le_precond_->preconditionner(matrice, residu_, tmp_p_);


    }

  else

    {

      tmp_p_.inject_array(residu_, n_items_reels);

    }


  // Reduce 3 mp_sum calls to 1 by using mp_sum_for_each

  double dold = local_prodscal(residu_, tmp_p_);

  operator_negate(tmp_p_, VECT_REAL_ITEMS);

  double norme = local_carre_norme_vect(residu_);

  double norm_b = local_carre_norme_vect(resu_);

  Process::mp_sum_for_each(dold, norme, norm_b);

  norme = sqrt(norme);

  double norme_b = sqrt(norm_b);


  if (limpr()==1)

    {

      double norme_relative=(norme_b>DMINFLOAT?norme/(norme_b+DMINFLOAT):norme);

      Cout << "Norm of the residue: " << norme << " (" << norme_relative << ")" << finl;

    }

  int niter = 0;

  int nb_it_max=nmax;

  if (nb_it_max_>-1)

    nb_it_max=nb_it_max_;

  while ( ( norme > seuil_ ) && (niter++ < nmax) &&( niter<nb_it_max))

    {

      // Precondition pour multvect

      // (le seul echange espace virtuel de l'algo sauf si le precond en a besoin)

      tmp_p_avec_items_virt_.echange_espace_virtuel();

      // En revanche, on n'a pas besoin de l'espace virtuel a jour de resu:

      double resu_scalaire_p_local;

      if (optimized)

        {

          resu_scalaire_p_local = tmp_mat_.multvect_et_prodscal(tmp_p_avec_items_virt_, resu_);

          resu_scalaire_p_local += tmp_mat_virt_.ajouter_mult_vect_et_prodscal(tmp_p_avec_items_virt_, resu_);

        }

      else

        {

          matrice.multvect(tmp_p_avec_items_virt_, resu_);

          resu_scalaire_p_local = local_prodscal(resu_, tmp_p_avec_items_virt_);

        }

      const double resu_scalaire_p = mp_sum(resu_scalaire_p_local);

      const double alfa = dold / resu_scalaire_p;

      ajoute_alpha_v(tmp_solution_, alfa, tmp_p_, VECT_REAL_ITEMS);


      double norme_residu_locale;

      if (optimized)

        {

          norme_residu_locale = ajoute_alpha_v_norme(residu_, alfa, resu_);

        }

      else

        {

          ajoute_alpha_v(residu_, alfa, resu_);

          norme_residu_locale = local_carre_norme_vect(residu_);

        }


      if(avec_precond)

        {

          if (precond_requires_echange_espace_virtuel)

            residu_.echange_espace_virtuel();

          if (optimized)

            le_precond_->preconditionner(tmp_mat_, residu_, resu_);

          else

            le_precond_->preconditionner(matrice, residu_, resu_);


          double residu_scalaire_resu = local_prodscal(residu_, resu_);

          norme = norme_residu_locale;

          // optimisation: both sums are computed in a single pass

          mp_sum_for_each(residu_scalaire_resu, norme);

          assert(residu_scalaire_resu >= 0);

          multiply_sub(tmp_p_, resu_, residu_scalaire_resu / dold);

          dold = residu_scalaire_resu;

        }

      else

        {

          const double dnew = mp_carre_norme_vect(residu_);

          norme = mp_sum(norme_residu_locale);

          assert(dnew >= 0);

          multiply_sub(tmp_p_, residu_, dnew / dold);

          dold = dnew;

        }

      norme = sqrt(norme);


      if (limpr()==1)

        {

          Cout << norme << " ";

          if ((niter % 15) == 0) Cout << finl ;

        }

    }

  if ((nb_it_max_<0)&& (norme > seuil_))

    {

      Cerr << "No convergence after : " << niter << " iterations\n";

      Cerr << " Residue : "<< norme << "\n";

      Cerr << " threshold : "<< seuil_ << "\n";

      Cerr << "Change your data set." << finl;

      exit();

    }


  if (optimized)

    solution.inject_array(tmp_solution_, n_items_reels);


  // The user wants a result with updated virtual space:

  if (get_flag_updated_result())

    solution.echange_espace_virtuel();


  // Display the number of iterations regardless

  if (limpr()>-1)

    {

      double norme_relative=(norme_b>0?norme/(norme_b+DMINFLOAT):norme);

      Cout << finl;

      Cout << "Final residue: " << norme << " ( " << norme_relative << " )"<<finl;

    }

  return(niter);


}


Entree
Class defining operators and methods for all reading operation in an input flow (file,...
Definition Entree.h:42

MD_Vector_base::get_nb_items_tot
virtual int get_nb_items_tot() const
Definition MD_Vector_base.h:41

MD_Vector_base::nb_items_seq_tot
virtual trustIdType nb_items_seq_tot() const
Definition MD_Vector_base.h:42

MD_Vector_base::get_nb_items_reels
virtual int get_nb_items_reels() const
Definition MD_Vector_base.h:40

MD_Vector_tools::creer_md_vect_renum_auto
static void creer_md_vect_renum_auto(IntVect &flags_renum, MD_Vector &md_vect)
Same as creer_md_vect_renum() but creates a default numbering.
Definition MD_Vector_tools.cpp:334

MD_Vector
: This class is an OWN_PTR but the pointed object is shared among multiple
Definition MD_Vector.h:48

Matrice_Base
Matrice_Base class - Base class of the matrix hierarchy.
Definition Matrice_Base.h:34

Matrice_Base::multvect
virtual DoubleVect & multvect(const DoubleVect &, DoubleVect &) const
Multiplication of a vector by the matrix.
Definition Matrice_Base.h:103

Matrice_Bloc
Definition Matrice_Bloc.h:51

Matrice_Bloc::get_bloc
virtual const Matrice & get_bloc(int i, int j) const
Definition Matrice_Bloc.cpp:601

Matrice_Morse_Sym
Matrice_Morse_Sym class - Represents a sparse symmetric matrix M stored in Morse format.
Definition Matrice_Morse_Sym.h:34

Matrice_Morse
Matrice_Morse class - Represents a (sparse) matrix M, not necessarily square,.
Definition Matrice_Morse.h:50

Matrice_Morse::get_tab2
const auto & get_tab2() const
Definition Matrice_Morse.h:111

Matrice_Morse::get_tab1
const auto & get_tab1() const
Definition Matrice_Morse.h:110

Matrice_Morse::get_coeff
const auto & get_coeff() const
Definition Matrice_Morse.h:112

Objet_U::readOn
virtual Entree & readOn(Entree &)
Reads an Objet_U from an input stream. Virtual method to override.
Definition Objet_U.cpp:289

Objet_U::printOn
virtual Sortie & printOn(Sortie &) const
Writes the object to an output stream. Virtual method to override.
Definition Objet_U.cpp:278

Param::REQUIRED
@ REQUIRED
Definition Param.h:115

Process::mp_sum_for_each
static void mp_sum_for_each(T &arg1, T &arg2)
C++14 compatible mp_sum_for_each: combine multiple mp_sum calls into one collective operation Usage: ...
Definition Process.cpp:208

Process::Journal
static Sortie & Journal(int message_level=0)
Returns a static Sortie object used as an event journal.
Definition Process.cpp:592

Process::mp_sum
static double mp_sum(double)
Computes the sum of x over all processors in the current group.
Definition Process.cpp:145

Process::exit
static void exit(int exit_code=-1)
Exit routine for TRUST within a Kokkos region.
Definition Process.cpp:466

Solv_GCP
Definition Solv_GCP.h:26

Solv_GCP::resu_
DoubleVect resu_
Definition Solv_GCP.h:62

Solv_GCP::optimized_
bool optimized_
Definition Solv_GCP.h:47

Solv_GCP::tmp_solution_
DoubleVect tmp_solution_
Definition Solv_GCP.h:69

Solv_GCP::nb_it_max_
int nb_it_max_
Definition Solv_GCP.h:87

Solv_GCP::residu_
DoubleVect residu_
Definition Solv_GCP.h:63

Solv_GCP::tmp_data_block_
ArrOfDouble tmp_data_block_
Definition Solv_GCP.h:77

Solv_GCP::tmp_p_
DoubleVect tmp_p_
Definition Solv_GCP.h:68

Solv_GCP::reinit
void reinit() override
Definition Solv_GCP.cpp:129

Solv_GCP::precond_diag_
bool precond_diag_
Definition Solv_GCP.h:57

Solv_GCP::tmp_mat_virt_
Matrice_SuperMorse tmp_mat_virt_
Definition Solv_GCP.h:73

Solv_GCP::renum_
IntVect renum_
Definition Solv_GCP.h:80

Solv_GCP::resoudre_
int resoudre_(const Matrice_Base &, const DoubleVect &, DoubleVect &, int)
Definition Solv_GCP.cpp:422

Solv_GCP::reinit_
int reinit_
Definition Solv_GCP.h:86

Solv_GCP::resoudre_systeme
int resoudre_systeme(const Matrice_Base &, const DoubleVect &, DoubleVect &) override
Definition Solv_GCP.cpp:111

Solv_GCP::Solv_GCP
Solv_GCP()
Definition Solv_GCP.cpp:30

Solv_GCP::prepare_data
void prepare_data(const Matrice_Base &matrice, const DoubleVect &secmem, DoubleVect &solution)
Definition Solv_GCP.cpp:138

Solv_GCP::tmp_p_avec_items_virt_
DoubleVect tmp_p_avec_items_virt_
Definition Solv_GCP.h:59

Solv_GCP::tmp_mat_
Matrice_Morse_Sym tmp_mat_
Definition Solv_GCP.h:71

SolveurSys_base::limpr
int limpr() const
Definition SolveurSys_base.h:41

SolveurSys_base::save_matrice_
int save_matrice_
Definition SolveurSys_base.h:79

SolveurSys_base::get_flag_updated_result
int get_flag_updated_result() const
Definition SolveurSys_base.h:60

SolveurSys_base::reinit
virtual void reinit()
Definition SolveurSys_base.h:48

SolveurSys_base::fixer_limpr
void fixer_limpr(int l)
Definition SolveurSys_base.h:38

Sortie
Base class for output streams.
Definition Sortie.h:52

TRUSTArray::size_array
_SIZE_ size_array() const
Definition TRUSTArray.tpp:187

TRUSTArray::addr
_TYPE_ * addr()
Definition TRUSTArray.tpp:159

TRUSTArray::inject_array
TRUSTArray & inject_array(const TRUSTArray &source, _SIZE_ nb_elements=-1, _SIZE_ first_element_dest=0, _SIZE_ first_element_source=0)
Definition TRUSTArray.cpp:195

TRUSTVect::size
_SIZE_ size() const
Definition TRUSTVect.tpp:45

TRUSTVect::size_totale
_SIZE_ size_totale() const
Definition TRUSTVect.tpp:61

TRUSTVect::line_size
int line_size() const
Definition TRUSTVect.tpp:67

TRUSTVect::size_reelle
_SIZE_ size_reelle() const
Definition TRUSTVect.tpp:27

TRUSTVect::size_reelle_ok
_SIZE_ size_reelle_ok() const
Definition TRUSTVect.tpp:38

TRUSTVect::get_md_vector
virtual const MD_Vector & get_md_vector() const
Definition TRUSTVect.h:123

TRUSTVect::echange_espace_virtuel
virtual void echange_espace_virtuel(IsExchangeBlocking exchange_type=IsExchangeBlocking::DefaultBlocking, const std::string kernel_name="noname")
Definition TRUSTVect.tpp:282

solv_iteratif
Definition solv_iteratif.h:22

solv_iteratif::seuil_
double seuil_
Definition solv_iteratif.h:31