17#include <TRUSTVect_tools.tpp>
20#include <View_Types.h>
23#include <MD_Vector_seq.h>
26#include <Perf_counters.h>
37template <
typename _SIZE_>
38Block_Iter<_SIZE_> determine_blocks(Mp_vect_options opt,
const MD_Vector& md,
const _SIZE_ vect_size_tot,
const int line_size,
int& nblocs_left)
48 const bool use_blocks = (opt != VECT_ALL_ITEMS && md && md->
use_blocks());
52 assert(opt == VECT_SEQUENTIAL_ITEMS || opt == VECT_REAL_ITEMS);
55 assert( (!std::is_same<_SIZE_,std::int64_t>::value) );
65 if (vect_size_tot > 0)
76template Block_Iter<int> determine_blocks(Mp_vect_options opt,
const MD_Vector& md,
const int vect_size_tot,
const int line_size,
int& nblocs_left);
78template Block_Iter<trustIdType> determine_blocks(Mp_vect_options opt,
const MD_Vector& md,
const trustIdType vect_size_tot,
const int line_size,
int& nblocs_left);
82template<
typename _TYPE_,
typename _SIZE_>
86 ToDo_Kokkos(
"critical");
101 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
103 if (bloc_itr.
empty())
return;
105 _TYPE_ *resu_base = resu.
addr();
106 const _TYPE_ *x_base = vx.
addr();
107 const _TYPE_ *y_base = vy.
addr();
108 for (; nblocs_left; nblocs_left--)
111 const int begin_bloc = (*(bloc_itr++)) * line_size, end_bloc = (*(bloc_itr++)) * line_size;
112 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
113 _TYPE_ *resu_ptr = resu_base + begin_bloc;
114 const _TYPE_ *x_ptr = x_base + begin_bloc;
115 const _TYPE_ *y_ptr = y_base + begin_bloc;
116 int count = end_bloc - begin_bloc;
117 for (; count; count--)
119 const _TYPE_ x = *x_ptr;
120 const _TYPE_ y = *(y_ptr++);
121 _TYPE_& p_resu = *(resu_ptr++);
122 p_resu += alpha * x * y;
128 invalidate_data(resu, opt);
144template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_,
bool IS_MUL>
146 Block_Iter<_SIZE_>& bloc_itr,
const int line_size_vx,
const _SIZE_ vect_size_tot,
const int delta_line_size)
148 auto vx_view= vx.template view_ro<1, ExecSpace>().
data();
149 auto resu_view= resu.template view_rw<1, ExecSpace>().
data();
151 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
153 for (; nblocs_left; nblocs_left--)
156 const int begin_bloc = (*(bloc_itr++)) * line_size_vx;
157 const int end_bloc = (*(bloc_itr++)) * line_size_vx;
159 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
162 const int resu_start_idx = begin_bloc * delta_line_size;
164 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
165 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
166 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
const int i)
168 const _TYPE_ x = vx_view[i];
171 for (
int j = 0; j < delta_line_size; ++j)
173 const int resu_idx = resu_start_idx + i * delta_line_size + j;
175 resu_view[resu_idx] *= x;
177 resu_view[resu_idx] *= ((_TYPE_)1 / x);
180 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
186template<TYPE_OPERATION_VECT_SPEC_GENERIC _TYPE_OP_,
typename _TYPE_,
typename _SIZE_>
192 static constexpr bool IS_MUL = (_TYPE_OP_ == TYPE_OPERATION_VECT_SPEC_GENERIC::MUL_);
198 assert(line_size > 0 && line_size_vx > 0 && line_size % line_size_vx == 0);
199 const int delta_line_size = line_size / line_size_vx;
200 assert(vx.
size_totale() * delta_line_size == vect_size_tot);
205 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
207 if (bloc_itr.
empty())
214 operation_speciale_tres_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, IS_MUL>(resu, vx, nblocs_left, bloc_itr, line_size_vx, vect_size_tot, delta_line_size);
216 operation_speciale_tres_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, IS_MUL>(resu, vx, nblocs_left, bloc_itr, line_size_vx, vect_size_tot, delta_line_size);
220 invalidate_data(resu, opt);
224 Cerr <<
"Error! operation_speciale_tres_generic can't be called in your project!" << finl;
239template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_,
bool IS_ADD>
243 auto vx_view= vx.template view_ro<1, ExecSpace>().
data();
244 auto resu_view= resu.template view_rw<1, ExecSpace>().
data();
246 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
248 for (; nblocs_left; nblocs_left--)
251 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
252 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
254 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
256 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
257 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
258 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
const int i)
260 const _TYPE_ x = vx_view[i];
263 resu_view[i] += alpha * x;
265 resu_view[i] += alpha * x * x;
267 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
273template <TYPE_OPERATION_VECT_SPEC _TYPE_OP_ ,
typename _TYPE_,
typename _SIZE_>
278 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATION_VECT_SPEC::ADD_);
287 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
289 if (bloc_itr.
empty())
return;
294 operation_speciale_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, IS_ADD>(resu, vx, alpha, nblocs_left, bloc_itr, vect_size_tot, line_size);
296 operation_speciale_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, IS_ADD>(resu, vx, alpha, nblocs_left, bloc_itr, vect_size_tot, line_size);
300 invalidate_data(resu, opt);
304 Cerr <<
"Error! ajoute_operation_speciale_generic can't be called in your project!" << finl;
318template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_, TYPE_OPERATOR_VECT _TYPE_OP_>
322 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATOR_VECT::ADD_), IS_SUB = (_TYPE_OP_ == TYPE_OPERATOR_VECT::SUB_),
323 IS_MULT = (_TYPE_OP_ == TYPE_OPERATOR_VECT::MULT_), IS_DIV = (_TYPE_OP_ == TYPE_OPERATOR_VECT::DIV_),
324 IS_EGAL = (_TYPE_OP_ == TYPE_OPERATOR_VECT::EGAL_);
327 auto vx_view= vx.template view_ro<1, ExecSpace>().
data();
328 auto resu_view= resu.template view_rw<1, ExecSpace>().
data();
330 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
332 for (; nblocs_left; nblocs_left--)
335 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
336 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
338 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
339 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
340 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
341 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
const _SIZE_ i)
343 const _TYPE_ x = vx_view[i];
344 if (IS_ADD) resu_view[i] += x;
345 if (IS_SUB) resu_view[i] -= x;
346 if (IS_MULT) resu_view[i] *= x;
347 if (IS_DIV) resu_view[i] /= x;
348 if (IS_EGAL) resu_view[i] = x;
350 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
354 _TYPE_ *resu_base = resu.
data();
355 const _TYPE_ *x_base = vx.
data();
356 for (; nblocs_left; nblocs_left--)
359 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size, end_bloc = (*(bloc_itr++)) * line_size;
360 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
361 _TYPE_ *resu_ptr = resu_base + begin_bloc;
362 const _TYPE_ *x_ptr = x_base + begin_bloc;
363 for (_SIZE_ count = 0; count < end_bloc - begin_bloc ; count++)
365 const _TYPE_& x = x_ptr[count];
366 _TYPE_ &p_resu = resu_ptr[count];
367 if (IS_ADD) p_resu += x;
368 if (IS_SUB) p_resu -= x;
369 if (IS_MULT) p_resu *= x;
370 if (IS_EGAL) p_resu = x;
371 if (IS_DIV) p_resu /= x;
380template <
typename _TYPE_,
typename _SIZE_, TYPE_OPERATOR_VECT _TYPE_OP_>
391 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
393 if (bloc_itr.
empty())
return;
398 operator_vect_vect_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, vx, nblocs_left, bloc_itr, vect_size_tot, line_size);
400 operator_vect_vect_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, vx, nblocs_left, bloc_itr, vect_size_tot, line_size);
403 invalidate_data(resu, opt);
407 Cerr <<
"Error! operator_vect_vect_generic can't be called in your project!" << finl;
431template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_, TYPE_OPERATOR_SINGLE _TYPE_OP_>
435 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::ADD_), IS_SUB = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SUB_),
436 IS_MULT = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::MULT_), IS_DIV = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::DIV_), IS_EGAL = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::EGAL_),
437 IS_NEGATE = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::NEGATE_), IS_INV = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::INV_), IS_ABS = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::ABS_),
438 IS_SQRT = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SQRT_), IS_SQUARE = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SQUARE_);
440 auto resu_view= resu.template view_rw<1, ExecSpace>().
data();
442 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
444 for (; nblocs_left; nblocs_left--)
447 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
448 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
450 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
451 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
452 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
453 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(
const _SIZE_ i)
455 if (IS_SUB) resu_view[i] -= x;
456 if (IS_ADD) resu_view[i] += x;
457 if (IS_MULT) resu_view[i] *= x;
458 if (IS_EGAL) resu_view[i] = x;
459 if (IS_NEGATE) resu_view[i] = -resu_view[i];
460 if (IS_ABS) resu_view[i] = (_TYPE_) Kokkos::abs(resu_view[i]);
461 if (IS_SQRT) resu_view[i] = (_TYPE_) Kokkos::sqrt(resu_view[i]);
462 if (IS_SQUARE) resu_view[i] = resu_view[i]*resu_view[i];
463 if (IS_DIV) resu_view[i] /= x;
464 if (IS_INV) resu_view[i] = (_TYPE_) ((_TYPE_)1 /resu_view[i]);
466 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
473template <
typename _TYPE_,
typename _SIZE_, TYPE_OPERATOR_SINGLE _TYPE_OP_ >
482 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
484 if (bloc_itr.
empty())
return;
489 operator_vect_single_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, x, nblocs_left, bloc_itr, vect_size_tot, line_size);
491 operator_vect_single_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, x, nblocs_left, bloc_itr, vect_size_tot, line_size);
495 invalidate_data(resu, opt);
499 Cerr <<
"Error! operator_vect_single_generic can't be called in your project!" << finl;
504template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
505template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
506template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
507template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
508template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
509template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
510template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::MULT_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
511template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::MULT_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
512template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::MULT_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
513template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::DIV_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
514template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::DIV_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
515template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::DIV_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
516template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::EGAL_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
517template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::EGAL_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
518template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::EGAL_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
519template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::NEGATE_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
520template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::NEGATE_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
521template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::NEGATE_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
522template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::INV_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
523template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::INV_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
524template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::INV_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
525template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::ABS_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
526template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::ABS_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
527template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::ABS_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
528template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SQRT_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
529template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SQRT_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
530template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SQRT_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
531template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SQUARE_>(
TRUSTVect<double, int>& resu,
const double x, Mp_vect_options opt);
532template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SQUARE_>(
TRUSTVect<int, int>& resu,
const int x, Mp_vect_options opt);
533template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SQUARE_>(
TRUSTVect<float, int>& resu,
const float x, Mp_vect_options opt);
536template void operator_vect_single_generic<trustIdType, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<trustIdType, trustIdType>& resu,
const trustIdType x, Mp_vect_options opt);
537template void operator_vect_single_generic<int, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<int, trustIdType>& resu,
const int x, Mp_vect_options opt);
538template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<float, trustIdType>& resu,
const float x, Mp_vect_options opt);
539template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(
TRUSTVect<double, trustIdType>& resu,
const double x, Mp_vect_options opt);
541template void operator_vect_single_generic<trustIdType, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<trustIdType, trustIdType>& resu,
const trustIdType x, Mp_vect_options opt);
542template void operator_vect_single_generic<int, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<int, trustIdType>& resu,
const int x, Mp_vect_options opt);
543template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<float, trustIdType>& resu,
const float x, Mp_vect_options opt);
544template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(
TRUSTVect<double, trustIdType>& resu,
const double x, Mp_vect_options opt);
546template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::MULT_>(
TRUSTVect<double, trustIdType>& resu,
const double x, Mp_vect_options opt);
547template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::MULT_>(
TRUSTVect<float, trustIdType>& resu,
const float x, Mp_vect_options opt);
549template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::DIV_>(
TRUSTVect<double, trustIdType>& resu,
const double x, Mp_vect_options opt);
557template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_,
typename _TYPE_RETURN_, TYPE_OPERATION_VECT _TYPE_OP_>
559 const _SIZE_ vect_size_tot,
const int line_size, _TYPE_& min_max_val,
int& i_min_max)
562 if (bloc_itr.
empty()) return ;
564 static constexpr bool IS_IMAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMAX_), IS_IMIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMIN_), IS_MAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::MAX_),
565 IS_MIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::MIN_), IS_MAX_ABS = (_TYPE_OP_ == TYPE_OPERATION_VECT::MAX_ABS_), IS_MIN_ABS = (_TYPE_OP_ == TYPE_OPERATION_VECT::MIN_ABS_);
568 static constexpr bool IS_MAXS = (IS_MAX || IS_MAX_ABS || IS_IMAX);
569 static constexpr bool IS_MINS = (IS_MIN || IS_MIN_ABS || IS_IMIN);
570 static constexpr bool IS_ABS = (IS_MAX_ABS || IS_MIN_ABS);
573 using reducer =
typename std::conditional<IS_MAXS, Kokkos::MaxLoc<_TYPE_, int>, Kokkos::MinLoc<_TYPE_, int>>::type;
575 using reducer_value_type =
typename reducer::value_type;
577 if (not(IS_MAXS || IS_MINS)) {
Process::exit(
"Wrong operation type in local_extrema_vect_generic_kernel");}
579 auto vx_view= vx.template view_ro<1, ExecSpace>().
data();
581 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
583 for (; nblocs_left; nblocs_left--)
586 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
587 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
590 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
593 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
596 reducer_value_type bloc_min_max;
599 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
600 Kokkos::parallel_reduce(policy,
601 KOKKOS_LAMBDA(
const int i, reducer_value_type& local_min_max)
603 const _TYPE_ val = (IS_ABS) ? Kokkos::abs(vx_view[i]) : vx_view[i];
605 if ( (IS_MAXS && val>local_min_max.val) || (IS_MINS && val<local_min_max.val) )
607 local_min_max.val=val;
611 ,reducer(bloc_min_max));
612 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
615 if ( (IS_MAXS && bloc_min_max.val > min_max_val) || (IS_MINS && bloc_min_max.val < min_max_val) )
617 min_max_val=bloc_min_max.val;
618 i_min_max= bloc_min_max.loc;
625template <
typename _TYPE_,
typename _SIZE_,
typename _TYPE_RETURN_, TYPE_OPERATION_VECT _TYPE_OP_ >
642 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
645 _TYPE_ min_max_val = neutral_value<_TYPE_,_TYPE_OP_>();
653 local_extrema_vect_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_RETURN_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, min_max_val, i_min_max);
655 local_extrema_vect_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_RETURN_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, min_max_val, i_min_max);
658 static constexpr bool IS_IMAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMAX_), IS_IMIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMIN_);
660 return (IS_IMAX || IS_IMIN) ? (_TYPE_RETURN_)i_min_max : (_TYPE_RETURN_)min_max_val;
663 Cerr <<
"Error! local_extrema_vect_generic can't be called in your project!" << finl;
665 return (_TYPE_RETURN_)0;
669template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::IMAX_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
670template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::IMIN_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
671template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MAX_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
672template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MIN_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
673template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MAX_ABS_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
674template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MIN_ABS_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
675template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::IMAX_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
676template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::IMIN_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
677template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MAX_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
678template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MIN_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
679template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MAX_ABS_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
680template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MIN_ABS_>(
const TRUSTVect<double, int>& vx, Mp_vect_options opt);
681template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::IMAX_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
682template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::IMIN_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
683template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MAX_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
684template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MIN_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
685template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MAX_ABS_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
686template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MIN_ABS_>(
const TRUSTVect<int, int>& vx, Mp_vect_options opt);
689template double local_extrema_vect_generic<double, trustIdType, double, TYPE_OPERATION_VECT::MAX_ABS_>(
const TRUSTVect<double, trustIdType>& vx, Mp_vect_options opt);
690template int local_extrema_vect_generic<int, trustIdType, int, TYPE_OPERATION_VECT::MAX_>(
const TRUSTVect<int, trustIdType>& vx, Mp_vect_options opt);
691template trustIdType local_extrema_vect_generic<trustIdType, trustIdType, trustIdType, TYPE_OPERATION_VECT::MAX_>(
const TRUSTVect<trustIdType, trustIdType>& vx, Mp_vect_options opt);
697template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_, TYPE_OPERATION_VECT_BIS _TYPE_OP_>
699 Block_Iter<_SIZE_>& bloc_itr,
const _SIZE_ vect_size_tot,
const int line_size, _TYPE_& sum)
701 static constexpr bool IS_SQUARE = (_TYPE_OP_ == TYPE_OPERATION_VECT_BIS::SQUARE_), IS_SUM = (_TYPE_OP_ == TYPE_OPERATION_VECT_BIS::SOMME_);
704 auto vx_view = vx.template view_ro<1, ExecSpace>().
data();
708 auto items = bloc_itr.items_->template view_ro<1, ExecSpace>().data();
710 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
711 Kokkos::parallel_reduce(__KERNEL_NAME__,
712 Kokkos::RangePolicy<ExecSpace>(0, bloc_itr.items_->size_array()),
713 KOKKOS_LAMBDA(
const int i, _TYPE_& local_sum)
715 _SIZE_ item = items[i] * line_size;
716 const _TYPE_ x = vx_view[item];
717 if (IS_SQUARE) local_sum += x * x;
718 if (IS_SUM) local_sum += x;
720 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
724 for (; nblocs_left; nblocs_left--)
727 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
728 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
730 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
732 Kokkos::RangePolicy <ExecSpace> policy(begin_bloc, end_bloc);
736 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
737 Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(
738 const _SIZE_ i, _TYPE_
741 const _TYPE_ x = vx_view[i];
742 if (IS_SQUARE) local_sum += x * x;
743 if (IS_SUM) local_sum += x;
746 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
756template <
typename _TYPE_,
typename _SIZE_, TYPE_OPERATION_VECT_BIS _TYPE_OP_ >
763 const int line_size = master_vect.
line_size();
764 const _SIZE_ vect_size_tot = master_vect.
size_totale();
771 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
773 if (bloc_itr.
empty())
return sum;
778 local_operations_vect_bis_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, sum);
780 local_operations_vect_bis_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, sum);
784 Cerr <<
"Error! local_operations_vect_bis_generic can't be called in your project!" << finl;
790template double local_operations_vect_bis_generic<double, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(
const TRUSTVect<double, int>& vx,Mp_vect_options opt);
791template int local_operations_vect_bis_generic<int, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(
const TRUSTVect<int, int>& vx,Mp_vect_options opt);
792template float local_operations_vect_bis_generic<float, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(
const TRUSTVect<float, int>& vx,Mp_vect_options opt);
793template double local_operations_vect_bis_generic<double, int, TYPE_OPERATION_VECT_BIS::SOMME_>(
const TRUSTVect<double, int>& vx,Mp_vect_options opt);
794template int local_operations_vect_bis_generic<int, int, TYPE_OPERATION_VECT_BIS::SOMME_>(
const TRUSTVect<int, int>& vx,Mp_vect_options opt);
795template float local_operations_vect_bis_generic<float, int, TYPE_OPERATION_VECT_BIS::SOMME_>(
const TRUSTVect<float, int>& vx,Mp_vect_options opt);
798template double local_operations_vect_bis_generic<double, trustIdType, TYPE_OPERATION_VECT_BIS::SOMME_>(
const TRUSTVect<double, trustIdType>& vx,Mp_vect_options opt);
809template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_>
811 const ArrOfInt& items_blocs,
const int line_size,
const int blocs_size)
813 _TYPE_ invalid = (_TYPE_)-987654321;
814 auto resu_view= resu.template view_rw<1, ExecSpace>().
data();
817 for (
int blocs_idx = 0; blocs_idx < blocs_size; blocs_idx += 2)
819 const int bloc_end = line_size * items_blocs[blocs_idx];
821 Kokkos::RangePolicy<ExecSpace> policy(i, bloc_end);
823 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
824 Kokkos::parallel_for(policy,KOKKOS_LAMBDA(
const int count)
826 resu_view[count]=invalid;
828 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
829 i = items_blocs[blocs_idx+1] * line_size;
833 Kokkos::RangePolicy<ExecSpace> policy(i, bloc_end);
835 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
836 Kokkos::parallel_for(policy,KOKKOS_LAMBDA(
const int count)
838 resu_view[count]=invalid;
840 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
845template <
typename _TYPE_,
typename _SIZE_>
853 if (opt == VECT_ALL_ITEMS || (!md))
return;
854 assert(opt == VECT_SEQUENTIAL_ITEMS || opt == VECT_REAL_ITEMS);
856 const int blocs_size = items_blocs.
size_array();
861 invalidate_data_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_>(resu, items_blocs, line_size, blocs_size);
863 invalidate_data_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_>(resu, items_blocs, line_size, blocs_size);
865 Cerr <<
"Error! invalidate_data can't be called in your project!" << finl;
880template<
typename ExecSpace,
typename _TYPE_,
typename _SIZE_>
882 Block_Iter<_SIZE_>& bloc_itr,
const int vect_size_tot,
const int line_size, _TYPE_& sum)
884 auto vx_view= vx.template view_ro<1, ExecSpace>().
data();
885 auto vy_view= vy.template view_ro<1, ExecSpace>().
data();
887 if (nblocs_left>3) ToDo_Kokkos(
"nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
889 for (; nblocs_left; nblocs_left--)
892 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
893 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
896 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
899 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
905 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
906 Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(
const _SIZE_ i, _TYPE_& local_sum)
908 local_sum += vx_view[i]*vy_view[i];
910 , Kokkos::Sum<_TYPE_>(bloc_sum));
913 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
922template<
typename _TYPE_,
typename _SIZE_>
937 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
939 if (bloc_itr.
empty())
return sum;
944 local_prodscal_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_>(vx, vy, nblocs_left, bloc_itr, vect_size_tot,line_size, sum);
946 local_prodscal_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_>(vx, vy, nblocs_left, bloc_itr, vect_size_tot,line_size, sum);
951 Cerr <<
"Error! local_prodscal can't be called in your project!" << finl;
virtual const ArrOfInt & get_blocs_items_to_sum() const =0
virtual const ArrOfInt & get_items_to_sum() const =0
virtual bool use_blocks() const =0
virtual const ArrOfInt & get_items_to_compute() const =0
virtual const ArrOfInt & get_blocs_items_to_compute() const =0
: Cette classe est un OWN_PTR mais l'objet pointe est partage entre plusieurs
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
static bool is_sequential()
_SIZE_ size_array() const
_SIZE_ size_totale() const
virtual const MD_Vector & get_md_vector() const