TrioCFD 1.9.8
TrioCFD documentation
Loading...
Searching...
No Matches
TRUSTVect_tools.cpp
1/****************************************************************************
2* Copyright (c) 2026, CEA
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9*
10* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
11* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
12* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*
14*****************************************************************************/
15
16#include <TRUSTVect.h>
17#include <TRUSTVect_tools.tpp>
18#include <TRUSTTabs.h>
19#ifndef LATATOOLS
20#include <View_Types.h>
21#include <TRUSTTrav.h>
22#endif
23#include <MD_Vector_seq.h>
24
25#ifndef LATATOOLS
26#include <Perf_counters.h>
27#endif
28
29/**************************************************************************************/
30/* Warning ! This kernels are critical for performance into several TRUST applications !
31 * Do not change implementation without using performance regression testing !
32 * You are warned.
33 */
34
35/*! Determine which blocks of indices should be used to perform an operation.
36 */
37template <typename _SIZE_>
38Block_Iter<_SIZE_> determine_blocks(Mp_vect_options opt, const MD_Vector& md, const _SIZE_ vect_size_tot, const int line_size, int& nblocs_left)
39{
40 nblocs_left = 1;
41#ifndef LATATOOLS
42 // Should we use the members bloc_items_to_* of the MD_Vector_* classes ?
43 // (this must be avoided when
44 // - md.valeur() is not defined (md is nul)
45 // - we want all items (VECT_SEQUENTIAL_ITEMS)
46 // - md.valeur() is a MD_Vector_seq
47 // - or md.valeur() is a MD_Vector_composite when it is an aggregation of several MD_Vector_seq)
48 const bool use_blocks = (opt != VECT_ALL_ITEMS && md && md->use_blocks());
49
50 if (use_blocks)
51 {
52 assert(opt == VECT_SEQUENTIAL_ITEMS || opt == VECT_REAL_ITEMS);
53#if INT_is_64_ == 2
54 // Should never use parallel patterns in 64b:
55 assert( (!std::is_same<_SIZE_,std::int64_t>::value) );
56#endif
57 const ArrOfInt& items_blocs = (opt == VECT_SEQUENTIAL_ITEMS) ? md->get_blocs_items_to_sum() : md->get_blocs_items_to_compute();
58 const ArrOfInt& items = (opt == VECT_SEQUENTIAL_ITEMS) ? md->get_items_to_sum() : md->get_items_to_compute();
59 assert(items_blocs.size_array() % 2 == 0);
60 nblocs_left = items_blocs.size_array() >> 1;
61 return Block_Iter<_SIZE_>(items_blocs, items); // iterator on int, but operator*() will cast and return a _SIZE_
62 }
63 else
64#endif
65 if (vect_size_tot > 0)
66 {
67 // Warning, if vect_size_tot is 0, line_size might be 0 too
68 // Compute all data, in the vector (including virtual data), build a big bloc:
69 nblocs_left = 1;
70 return Block_Iter<_SIZE_>(0, vect_size_tot / line_size); // iterator on a single (big) block
71 }
72 return Block_Iter<_SIZE_>();
73}
74
75// Explicit instanciations
76template Block_Iter<int> determine_blocks(Mp_vect_options opt, const MD_Vector& md, const int vect_size_tot, const int line_size, int& nblocs_left);
77#if INT_is_64_ == 2
78template Block_Iter<trustIdType> determine_blocks(Mp_vect_options opt, const MD_Vector& md, const trustIdType vect_size_tot, const int line_size, int& nblocs_left);
79#endif
80
81
82template<typename _TYPE_, typename _SIZE_>
83void ajoute_produit_scalaire(TRUSTVect<_TYPE_,_SIZE_>& resu, _TYPE_ alpha, const TRUSTVect<_TYPE_,_SIZE_>& vx, const TRUSTVect<_TYPE_,_SIZE_>& vy, Mp_vect_options opt)
84{
85#ifndef LATATOOLS
86 ToDo_Kokkos("critical"); // Ne semble pas utilise...
87 resu.ensureDataOnHost();
90 // Master vect donne la structure de reference, les autres vecteurs doivent avoir la meme structure.
91 const TRUSTVect<_TYPE_,_SIZE_>& master_vect = resu;
92 const int line_size = master_vect.line_size(), vect_size_tot = master_vect.size_totale();
93 const MD_Vector& md = master_vect.get_md_vector();
94 assert(vx.line_size() == line_size && vy.line_size() == line_size);
95 assert(vx.size_totale() == vect_size_tot && vy.size_totale() == vect_size_tot); // this test is necessary if md is null
96#ifndef LATATOOLS
97 assert(vx.get_md_vector() == md && vy.get_md_vector() == md);
98#endif
99 // Determine blocs of data to process, depending on " opt"
100 int nblocs_left;
101 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
102 // Shortcut for empty arrays (avoid case line_size == 0)
103 if (bloc_itr.empty()) return;
104
105 _TYPE_ *resu_base = resu.addr();
106 const _TYPE_ *x_base = vx.addr();
107 const _TYPE_ *y_base = vy.addr();
108 for (; nblocs_left; nblocs_left--)
109 {
110 // Get index of next bloc start:
111 const int begin_bloc = (*(bloc_itr++)) * line_size, end_bloc = (*(bloc_itr++)) * line_size;
112 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
113 _TYPE_ *resu_ptr = resu_base + begin_bloc;
114 const _TYPE_ *x_ptr = x_base + begin_bloc;
115 const _TYPE_ *y_ptr = y_base + begin_bloc;
116 int count = end_bloc - begin_bloc;
117 for (; count; count--)
118 {
119 const _TYPE_ x = *x_ptr;
120 const _TYPE_ y = *(y_ptr++);
121 _TYPE_& p_resu = *(resu_ptr++);
122 p_resu += alpha * x * y;
123 x_ptr++;
124 }
125 }
126 // In debug mode, put invalid values where data has not been computed
127#ifndef NDEBUG
128 invalidate_data(resu, opt);
129#endif
130 return;
131#endif // LATATOOLS
132}
133
134// Explicit instanciation for templates:
135template void ajoute_produit_scalaire<double, int>(TRUSTVect<double, int>& resu, double alpha, const TRUSTVect<double, int>& vx, const TRUSTVect<double, int>& vy, Mp_vect_options opt);
136template void ajoute_produit_scalaire<float, int>(TRUSTVect<float, int>& resu, float alpha, const TRUSTVect<float, int>& vx, const TRUSTVect<float, int>& vy, Mp_vect_options opt);
137
138
139//Process bloc function used below in operation_speciale_tres_generic
140//It is templated as a function of the in/out view location and execution spaces (Device/Host)
141#ifndef LATATOOLS
142namespace
143{
144template<typename ExecSpace, typename _TYPE_, typename _SIZE_, bool IS_MUL>
145void operation_speciale_tres_generic_kernel(TRUSTVect<_TYPE_, _SIZE_>& resu, const TRUSTVect<_TYPE_, _SIZE_>& vx, int nblocs_left,
146 Block_Iter<_SIZE_>& bloc_itr, const int line_size_vx, const _SIZE_ vect_size_tot, const int delta_line_size)
147{
148 auto vx_view= vx.template view_ro<1, ExecSpace>().data();
149 auto resu_view= resu.template view_rw<1, ExecSpace>().data();
150#ifdef TRUST_USE_GPU
151 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
152#endif
153 for (; nblocs_left; nblocs_left--)
154 {
155 // Get index of next bloc start:
156 const int begin_bloc = (*(bloc_itr++)) * line_size_vx;
157 const int end_bloc = (*(bloc_itr++)) * line_size_vx;
158
159 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
160
161 // Adjust pointers to indices
162 const int resu_start_idx = begin_bloc * delta_line_size;
163
164 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
165 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
166 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i)
167 {
168 const _TYPE_ x = vx_view[i];
169
170 //The // for could be also placed there
171 for (int j = 0; j < delta_line_size; ++j)
172 {
173 const int resu_idx = resu_start_idx + i * delta_line_size + j;
174 if (IS_MUL)
175 resu_view[resu_idx] *= x;
176 else //If it's not MUL, it's DIV
177 resu_view[resu_idx] *= ((_TYPE_)1 / x);
178 }
179 });
180 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
181 }
182}
183}
184#endif
185
186template<TYPE_OPERATION_VECT_SPEC_GENERIC _TYPE_OP_, typename _TYPE_, typename _SIZE_>
187void operation_speciale_tres_generic(TRUSTVect<_TYPE_, _SIZE_>& resu, const TRUSTVect<_TYPE_,_SIZE_>& vx, Mp_vect_options opt)
188{
189#ifndef LATATOOLS
190
191 // Check the nature of the operation
192 static constexpr bool IS_MUL = (_TYPE_OP_ == TYPE_OPERATION_VECT_SPEC_GENERIC::MUL_); //it's either MUL or DIV
193
194 // get info for computation
195 const int line_size = resu.line_size(), line_size_vx = vx.line_size(), vect_size_tot = resu.size_totale();
196 const MD_Vector& md = resu.get_md_vector();
197 // Le line_size du vecteur resu doit etre un multiple du line_size du vecteur vx
198 assert(line_size > 0 && line_size_vx > 0 && line_size % line_size_vx == 0);
199 const int delta_line_size = line_size / line_size_vx;
200 assert(vx.size_totale() * delta_line_size == vect_size_tot); // this test is necessary if md is null
201 assert(vx.get_md_vector() == md);
202
203 // Determine blocs of data to process, depending on " opt"
204 int nblocs_left;
205 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
206 // Shortcut for empty arrays (avoid case line_size == 0)
207 if (bloc_itr.empty())
208 return;
209
210 bool kernelOnDevice = resu.checkDataOnDevice(vx);
211
212 //Lauch computation with the execution space and view types as (template) parameters
213 if (kernelOnDevice)
214 operation_speciale_tres_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, IS_MUL>(resu, vx, nblocs_left, bloc_itr, line_size_vx, vect_size_tot, delta_line_size);
215 else
216 operation_speciale_tres_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, IS_MUL>(resu, vx, nblocs_left, bloc_itr, line_size_vx, vect_size_tot, delta_line_size);
217
218#ifndef NDEBUG
219 // In debug mode, put invalid values where data has not been computed
220 invalidate_data(resu, opt);
221#endif
222 return;
223#else
224 Cerr << "Error! operation_speciale_tres_generic can't be called in your project!" << finl;
226#endif
227}
228
229
230// Explicit instanciation for templates:
231template void operation_speciale_tres_generic<TYPE_OPERATION_VECT_SPEC_GENERIC::MUL_, double, int>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
232template void operation_speciale_tres_generic<TYPE_OPERATION_VECT_SPEC_GENERIC::MUL_, float, int>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
233template void operation_speciale_tres_generic<TYPE_OPERATION_VECT_SPEC_GENERIC::DIV_, double, int>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
234template void operation_speciale_tres_generic<TYPE_OPERATION_VECT_SPEC_GENERIC::DIV_, float, int>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
235
236#ifndef LATATOOLS
237namespace
238{
239template<typename ExecSpace, typename _TYPE_, typename _SIZE_, bool IS_ADD>
240void operation_speciale_generic_kernel(TRUSTVect<_TYPE_, _SIZE_>& resu, const TRUSTVect<_TYPE_, _SIZE_>& vx, _TYPE_ alpha, int nblocs_left,
241 Block_Iter<_SIZE_>& bloc_itr, const _SIZE_ vect_size_tot, const int line_size)
242{
243 auto vx_view= vx.template view_ro<1, ExecSpace>().data();
244 auto resu_view= resu.template view_rw<1, ExecSpace>().data();
245#ifdef TRUST_USE_GPU
246 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
247#endif
248 for (; nblocs_left; nblocs_left--)
249 {
250 // Get index of next bloc start:
251 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
252 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
253
254 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
255
256 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
257 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
258 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const int i)
259 {
260 const _TYPE_ x = vx_view[i];
261
262 if (IS_ADD) //done at compile time
263 resu_view[i] += alpha * x;
264 else //If it's not ADD, it's SQUARE
265 resu_view[i] += alpha * x * x;
266 });
267 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
268 }
269}
270}
271#endif
272
273template <TYPE_OPERATION_VECT_SPEC _TYPE_OP_ ,typename _TYPE_, typename _SIZE_>
274void ajoute_operation_speciale_generic(TRUSTVect<_TYPE_,_SIZE_>& resu, _TYPE_ alpha,
275 const TRUSTVect<_TYPE_,_SIZE_>& vx, Mp_vect_options opt)
276{
277#ifndef LATATOOLS
278 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATION_VECT_SPEC::ADD_);
279
280 const int line_size = resu.line_size();
281 const _SIZE_ vect_size_tot = resu.size_totale();
282 const MD_Vector& md = resu.get_md_vector();
283 assert(vx.line_size() == line_size);
284 assert(vx.size_totale() == vect_size_tot); // this test is necessary if md is null
285 // Determine blocs of data to process, depending on " opt"
286 int nblocs_left;
287 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
288 // Shortcut for empty arrays (avoid case line_size == 0)
289 if (bloc_itr.empty()) return;
290
291 bool kernelOnDevice = resu.checkDataOnDevice(vx);
292
293 if (kernelOnDevice)
294 operation_speciale_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, IS_ADD>(resu, vx, alpha, nblocs_left, bloc_itr, vect_size_tot, line_size);
295 else
296 operation_speciale_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, IS_ADD>(resu, vx, alpha, nblocs_left, bloc_itr, vect_size_tot, line_size);
297
298
299#ifndef NDEBUG
300 invalidate_data(resu, opt);
301#endif
302 return;
303#else
304 Cerr << "Error! ajoute_operation_speciale_generic can't be called in your project!" << finl;
306#endif
307}
308
309// Explicit instanciation for templates:
310template void ajoute_operation_speciale_generic<TYPE_OPERATION_VECT_SPEC::ADD_, double, int>(TRUSTVect<double, int>& resu, double alpha, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
311template void ajoute_operation_speciale_generic<TYPE_OPERATION_VECT_SPEC::ADD_, float, int>(TRUSTVect<float, int>& resu, float alpha, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
312template void ajoute_operation_speciale_generic<TYPE_OPERATION_VECT_SPEC::SQUARE_, double, int>(TRUSTVect<double, int>& resu, double alpha, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
313template void ajoute_operation_speciale_generic<TYPE_OPERATION_VECT_SPEC::SQUARE_, float, int>(TRUSTVect<float, int>& resu, float alpha, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
314
315#ifndef LATATOOLS
316namespace
317{
318template<typename ExecSpace, typename _TYPE_, typename _SIZE_, TYPE_OPERATOR_VECT _TYPE_OP_>
319void operator_vect_vect_generic_kernel(TRUSTVect<_TYPE_,_SIZE_>& resu, const TRUSTVect<_TYPE_, _SIZE_>& vx, int nblocs_left,
320 Block_Iter<_SIZE_>& bloc_itr, const _SIZE_ vect_size_tot, const int line_size)
321{
322 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATOR_VECT::ADD_), IS_SUB = (_TYPE_OP_ == TYPE_OPERATOR_VECT::SUB_),
323 IS_MULT = (_TYPE_OP_ == TYPE_OPERATOR_VECT::MULT_), IS_DIV = (_TYPE_OP_ == TYPE_OPERATOR_VECT::DIV_),
324 IS_EGAL = (_TYPE_OP_ == TYPE_OPERATOR_VECT::EGAL_);
325
326#ifdef TRUST_USE_GPU
327 auto vx_view= vx.template view_ro<1, ExecSpace>().data();
328 auto resu_view= resu.template view_rw<1, ExecSpace>().data();
329#ifdef TRUST_USE_GPU
330 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
331#endif
332 for (; nblocs_left; nblocs_left--)
333 {
334 // Get index of next bloc start:
335 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
336 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
337
338 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
339 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
340 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
341 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const _SIZE_ i)
342 {
343 const _TYPE_ x = vx_view[i];
344 if (IS_ADD) resu_view[i] += x;
345 if (IS_SUB) resu_view[i] -= x;
346 if (IS_MULT) resu_view[i] *= x;
347 if (IS_DIV) resu_view[i] /= x;
348 if (IS_EGAL) resu_view[i] = x;
349 });
350 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
351 }
352#else
353 // Need to keep C++ optimized (pointer) implementation for PolyMAC_CDO in Flica5
354 _TYPE_ *resu_base = resu.data();
355 const _TYPE_ *x_base = vx.data();
356 for (; nblocs_left; nblocs_left--)
357 {
358 // Get index of next bloc start:
359 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size, end_bloc = (*(bloc_itr++)) * line_size;
360 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
361 _TYPE_ *resu_ptr = resu_base + begin_bloc;
362 const _TYPE_ *x_ptr = x_base + begin_bloc;
363 for (_SIZE_ count = 0; count < end_bloc - begin_bloc ; count++)
364 {
365 const _TYPE_& x = x_ptr[count];
366 _TYPE_ &p_resu = resu_ptr[count];
367 if (IS_ADD) p_resu += x;
368 if (IS_SUB) p_resu -= x;
369 if (IS_MULT) p_resu *= x;
370 if (IS_EGAL) p_resu = x;
371 if (IS_DIV) p_resu /= x;
372 }
373 }
374#endif
375}
376}
377#endif
378
379
380template <typename _TYPE_, typename _SIZE_, TYPE_OPERATOR_VECT _TYPE_OP_>
381void operator_vect_vect_generic(TRUSTVect<_TYPE_,_SIZE_>& resu, const TRUSTVect<_TYPE_,_SIZE_>& vx, Mp_vect_options opt)
382{
383#ifndef LATATOOLS
384 const int line_size = resu.line_size();
385 const _SIZE_ vect_size_tot = resu.size_totale();
386 const MD_Vector& md = resu.get_md_vector();
387 assert(vx.line_size() == line_size);
388 assert(vx.size_totale() == vect_size_tot); // this test is necessary if md is null
389 // Determine blocs of data to process, depending on " opt"
390 int nblocs_left;
391 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
392 // Shortcut for empty arrays (avoid case line_size == 0)
393 if (bloc_itr.empty()) return;
394
395 bool kernelOnDevice = resu.checkDataOnDevice(vx);
396
397 if (kernelOnDevice)
398 operator_vect_vect_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, vx, nblocs_left, bloc_itr, vect_size_tot, line_size);
399 else
400 operator_vect_vect_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, vx, nblocs_left, bloc_itr, vect_size_tot, line_size);
401 // In debug mode, put invalid values where data has not been computed
402#ifndef NDEBUG
403 invalidate_data(resu, opt);
404#endif
405 return;
406#else
407 Cerr << "Error! operator_vect_vect_generic can't be called in your project!" << finl;
409#endif
410}
411// Explicit instanciation for templates:
412template void operator_vect_vect_generic<double, int, TYPE_OPERATOR_VECT::ADD_>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
413template void operator_vect_vect_generic<int, int, TYPE_OPERATOR_VECT::ADD_>(TRUSTVect<int, int>& resu, const TRUSTVect<int, int>& vx, Mp_vect_options opt);
414template void operator_vect_vect_generic<float, int, TYPE_OPERATOR_VECT::ADD_>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
415template void operator_vect_vect_generic<double, int, TYPE_OPERATOR_VECT::SUB_>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
416template void operator_vect_vect_generic<int, int, TYPE_OPERATOR_VECT::SUB_>(TRUSTVect<int, int>& resu, const TRUSTVect<int, int>& vx, Mp_vect_options opt);
417template void operator_vect_vect_generic<float, int, TYPE_OPERATOR_VECT::SUB_>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
418template void operator_vect_vect_generic<double, int, TYPE_OPERATOR_VECT::MULT_>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
419template void operator_vect_vect_generic<int, int, TYPE_OPERATOR_VECT::MULT_>(TRUSTVect<int, int>& resu, const TRUSTVect<int, int>& vx, Mp_vect_options opt);
420template void operator_vect_vect_generic<float, int, TYPE_OPERATOR_VECT::MULT_>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
421template void operator_vect_vect_generic<double, int, TYPE_OPERATOR_VECT::DIV_>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
422template void operator_vect_vect_generic<int, int, TYPE_OPERATOR_VECT::DIV_>(TRUSTVect<int, int>& resu, const TRUSTVect<int, int>& vx, Mp_vect_options opt);
423template void operator_vect_vect_generic<float, int, TYPE_OPERATOR_VECT::DIV_>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
424template void operator_vect_vect_generic<double, int, TYPE_OPERATOR_VECT::EGAL_>(TRUSTVect<double, int>& resu, const TRUSTVect<double, int>& vx, Mp_vect_options opt);
425template void operator_vect_vect_generic<int, int, TYPE_OPERATOR_VECT::EGAL_>(TRUSTVect<int, int>& resu, const TRUSTVect<int, int>& vx, Mp_vect_options opt);
426template void operator_vect_vect_generic<float, int, TYPE_OPERATOR_VECT::EGAL_>(TRUSTVect<float, int>& resu, const TRUSTVect<float, int>& vx, Mp_vect_options opt);
427
428#ifndef LATATOOLS
429namespace
430{
431template<typename ExecSpace, typename _TYPE_, typename _SIZE_, TYPE_OPERATOR_SINGLE _TYPE_OP_>
432void operator_vect_single_generic_kernel(TRUSTVect<_TYPE_,_SIZE_>& resu, const _TYPE_ x, int nblocs_left,
433 Block_Iter<_SIZE_>& bloc_itr, const _SIZE_ vect_size_tot, const int line_size)
434{
435 static constexpr bool IS_ADD = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::ADD_), IS_SUB = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SUB_),
436 IS_MULT = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::MULT_), IS_DIV = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::DIV_), IS_EGAL = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::EGAL_),
437 IS_NEGATE = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::NEGATE_), IS_INV = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::INV_), IS_ABS = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::ABS_),
438 IS_SQRT = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SQRT_), IS_SQUARE = (_TYPE_OP_ == TYPE_OPERATOR_SINGLE::SQUARE_);
439
440 auto resu_view= resu.template view_rw<1, ExecSpace>().data();
441#ifdef TRUST_USE_GPU
442 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
443#endif
444 for (; nblocs_left; nblocs_left--)
445 {
446 // Get index of next bloc start:
447 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
448 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
449
450 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
451 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
452 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
453 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const _SIZE_ i)
454 {
455 if (IS_SUB) resu_view[i] -= x;
456 if (IS_ADD) resu_view[i] += x;
457 if (IS_MULT) resu_view[i] *= x;
458 if (IS_EGAL) resu_view[i] = x;
459 if (IS_NEGATE) resu_view[i] = -resu_view[i];
460 if (IS_ABS) resu_view[i] = (_TYPE_) Kokkos::abs(resu_view[i]);
461 if (IS_SQRT) resu_view[i] = (_TYPE_) Kokkos::sqrt(resu_view[i]);
462 if (IS_SQUARE) resu_view[i] = resu_view[i]*resu_view[i];
463 if (IS_DIV) resu_view[i] /= x;
464 if (IS_INV) resu_view[i] = (_TYPE_) ((_TYPE_)1 /resu_view[i]);
465 });
466 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
467 }
468}
469}
470#endif
471
472
473template <typename _TYPE_, typename _SIZE_, TYPE_OPERATOR_SINGLE _TYPE_OP_ >
474void operator_vect_single_generic(TRUSTVect<_TYPE_,_SIZE_>& resu, const _TYPE_ x, Mp_vect_options opt)
475{
476#ifndef LATATOOLS
477 const int line_size = resu.line_size();
478 const _SIZE_ vect_size_tot = resu.size_totale();
479 const MD_Vector& md = resu.get_md_vector();
480 // Determine blocs of data to process, depending on " opt"
481 int nblocs_left;
482 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
483 // Shortcut for empty arrays (avoid case line_size == 0)
484 if (bloc_itr.empty()) return;
485
486 bool kernelOnDevice = resu.checkDataOnDevice();
487
488 if (kernelOnDevice)
489 operator_vect_single_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, x, nblocs_left, bloc_itr, vect_size_tot, line_size);
490 else
491 operator_vect_single_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(resu, x, nblocs_left, bloc_itr, vect_size_tot, line_size);
492
493 // In debug mode, put invalid values where data has not been computed
494#ifndef NDEBUG
495 invalidate_data(resu, opt);
496#endif
497 return;
498#else
499 Cerr << "Error! operator_vect_single_generic can't be called in your project!" << finl;
501#endif
502}
503// Explicit instanciation for templates:
504template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
505template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
506template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
507template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
508template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
509template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
510template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::MULT_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
511template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::MULT_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
512template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::MULT_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
513template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::DIV_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
514template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::DIV_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
515template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::DIV_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
516template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::EGAL_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
517template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::EGAL_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
518template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::EGAL_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
519template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::NEGATE_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
520template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::NEGATE_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
521template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::NEGATE_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
522template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::INV_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
523template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::INV_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
524template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::INV_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
525template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::ABS_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
526template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::ABS_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
527template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::ABS_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
528template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SQRT_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
529template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SQRT_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
530template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SQRT_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
531template void operator_vect_single_generic<double, int, TYPE_OPERATOR_SINGLE::SQUARE_>(TRUSTVect<double, int>& resu, const double x, Mp_vect_options opt);
532template void operator_vect_single_generic<int, int, TYPE_OPERATOR_SINGLE::SQUARE_>(TRUSTVect<int, int>& resu, const int x, Mp_vect_options opt);
533template void operator_vect_single_generic<float, int, TYPE_OPERATOR_SINGLE::SQUARE_>(TRUSTVect<float, int>& resu, const float x, Mp_vect_options opt);
534
535#if INT_is_64_ == 2
536template void operator_vect_single_generic<trustIdType, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<trustIdType, trustIdType>& resu, const trustIdType x, Mp_vect_options opt);
537template void operator_vect_single_generic<int, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<int, trustIdType>& resu, const int x, Mp_vect_options opt);
538template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<float, trustIdType>& resu, const float x, Mp_vect_options opt);
539template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::ADD_>(TRUSTVect<double, trustIdType>& resu, const double x, Mp_vect_options opt);
540
541template void operator_vect_single_generic<trustIdType, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<trustIdType, trustIdType>& resu, const trustIdType x, Mp_vect_options opt);
542template void operator_vect_single_generic<int, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<int, trustIdType>& resu, const int x, Mp_vect_options opt);
543template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<float, trustIdType>& resu, const float x, Mp_vect_options opt);
544template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::SUB_>(TRUSTVect<double, trustIdType>& resu, const double x, Mp_vect_options opt);
545
546template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::MULT_>(TRUSTVect<double, trustIdType>& resu, const double x, Mp_vect_options opt);
547template void operator_vect_single_generic<float, trustIdType, TYPE_OPERATOR_SINGLE::MULT_>(TRUSTVect<float, trustIdType>& resu, const float x, Mp_vect_options opt);
548
549template void operator_vect_single_generic<double, trustIdType, TYPE_OPERATOR_SINGLE::DIV_>(TRUSTVect<double, trustIdType>& resu, const double x, Mp_vect_options opt);
550
551
552#endif
553
554#ifndef LATATOOLS
555namespace
556{
557template<typename ExecSpace, typename _TYPE_, typename _SIZE_,typename _TYPE_RETURN_, TYPE_OPERATION_VECT _TYPE_OP_>
558void local_extrema_vect_generic_kernel(const TRUSTVect<_TYPE_,_SIZE_>& vx, int nblocs_left, Block_Iter<_SIZE_>& bloc_itr,
559 const _SIZE_ vect_size_tot, const int line_size, _TYPE_& min_max_val, int& i_min_max)
560{
561 // Shortcut for empty arrays (avoid case line_size == 0)
562 if (bloc_itr.empty()) return ;
563
564 static constexpr bool IS_IMAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMAX_), IS_IMIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMIN_), IS_MAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::MAX_),
565 IS_MIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::MIN_), IS_MAX_ABS = (_TYPE_OP_ == TYPE_OPERATION_VECT::MAX_ABS_), IS_MIN_ABS = (_TYPE_OP_ == TYPE_OPERATION_VECT::MIN_ABS_);
566
567 //For clearer code, is it max, is it a min, is it done on absolute values ?
568 static constexpr bool IS_MAXS = (IS_MAX || IS_MAX_ABS || IS_IMAX);
569 static constexpr bool IS_MINS = (IS_MIN || IS_MIN_ABS || IS_IMIN);
570 static constexpr bool IS_ABS = (IS_MAX_ABS || IS_MIN_ABS);
571
572 // Define the reducer, based on the reduction type
573 using reducer = typename std::conditional<IS_MAXS, Kokkos::MaxLoc<_TYPE_, int>, Kokkos::MinLoc<_TYPE_, int>>::type;
574 // Define the type of what the reducer will return ( a value + a index)
575 using reducer_value_type = typename reducer::value_type;
576
577 if (not(IS_MAXS || IS_MINS)) {Process::exit("Wrong operation type in local_extrema_vect_generic_kernel");}
578
579 auto vx_view= vx.template view_ro<1, ExecSpace>().data();
580#ifdef TRUST_USE_GPU
581 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
582#endif
583 for (; nblocs_left; nblocs_left--)
584 {
585 // Get index of next bloc start:
586 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
587 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
588
589 //Asserts
590 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
591
592 //Define Policy
593 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
594
595 // Define the object in which the reduction is saved
596 reducer_value_type bloc_min_max;
597
598 //Reduction
599 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
600 Kokkos::parallel_reduce(policy,
601 KOKKOS_LAMBDA(const int i, reducer_value_type& local_min_max)
602 {
603 const _TYPE_ val = (IS_ABS) ? Kokkos::abs(vx_view[i]) : vx_view[i];
604
605 if ( (IS_MAXS && val>local_min_max.val) || (IS_MINS && val<local_min_max.val) )
606 {
607 local_min_max.val=val;
608 local_min_max.loc=i; // not begin_bloc + i ? This seems to be what was done before, although this is weird to me (dont we want the global index ?)
609 }
610 }
611 ,reducer(bloc_min_max)); //Reduce in bloc_min_max
612 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
613
614 //Bloc-level reduction
615 if ( (IS_MAXS && bloc_min_max.val > min_max_val) || (IS_MINS && bloc_min_max.val < min_max_val) )
616 {
617 min_max_val=bloc_min_max.val;
618 i_min_max= bloc_min_max.loc;
619 }
620 }
621}
622}
623#endif
624
625template <typename _TYPE_, typename _SIZE_, typename _TYPE_RETURN_, TYPE_OPERATION_VECT _TYPE_OP_ >
626_TYPE_RETURN_ local_extrema_vect_generic(const TRUSTVect<_TYPE_,_SIZE_>& vx, Mp_vect_options opt)
627{
628#ifndef LATATOOLS
629
630 //Array info
631 const int line_size = vx.line_size();
632 const _SIZE_ vect_size_tot = vx.size_totale();
633 const MD_Vector& md = vx.get_md_vector();
634
635 //Asserts
636 assert(vx.line_size() == line_size);
637 assert(vx.size_totale() == vect_size_tot); // this test is necessary if md is null
638 assert(vx.get_md_vector() == md);
639
640 // Determine blocs of data to process, depending on " opt"
641 int nblocs_left;
642 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
643
644 //Initialize results
645 _TYPE_ min_max_val = neutral_value<_TYPE_,_TYPE_OP_>(); // _TYPE_ et pas _TYPE_RETURN_ desole ...
646 int i_min_max = -1 ; // seulement pour IMAX_ et IMIN_
647
648 //Localize data
649 bool kernelOnDevice = vx.checkDataOnDevice();
650
651 //Compute reduction
652 if (kernelOnDevice)
653 local_extrema_vect_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_RETURN_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, min_max_val, i_min_max);
654 else
655 local_extrema_vect_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_RETURN_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, min_max_val, i_min_max);
656
657 //Return index or value
658 static constexpr bool IS_IMAX = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMAX_), IS_IMIN = (_TYPE_OP_ == TYPE_OPERATION_VECT::IMIN_);
659
660 return (IS_IMAX || IS_IMIN) ? (_TYPE_RETURN_)i_min_max : (_TYPE_RETURN_)min_max_val;
661
662#else
663 Cerr << "Error! local_extrema_vect_generic can't be called in your project!" << finl;
665 return (_TYPE_RETURN_)0; // For compil in latatools
666#endif
667}
668// Explicit instanciation for templates:
669template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::IMAX_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
670template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::IMIN_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
671template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MAX_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
672template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MIN_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
673template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MAX_ABS_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
674template double local_extrema_vect_generic<double, int, double, TYPE_OPERATION_VECT::MIN_ABS_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
675template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::IMAX_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
676template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::IMIN_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
677template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MAX_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
678template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MIN_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
679template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MAX_ABS_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
680template int local_extrema_vect_generic<double, int, int, TYPE_OPERATION_VECT::MIN_ABS_>(const TRUSTVect<double, int>& vx, Mp_vect_options opt);
681template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::IMAX_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
682template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::IMIN_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
683template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MAX_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
684template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MIN_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
685template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MAX_ABS_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
686template int local_extrema_vect_generic<int, int, int, TYPE_OPERATION_VECT::MIN_ABS_>(const TRUSTVect<int, int>& vx, Mp_vect_options opt);
687
688#if INT_is_64_ == 2
689template double local_extrema_vect_generic<double, trustIdType, double, TYPE_OPERATION_VECT::MAX_ABS_>(const TRUSTVect<double, trustIdType>& vx, Mp_vect_options opt);
690template int local_extrema_vect_generic<int, trustIdType, int, TYPE_OPERATION_VECT::MAX_>(const TRUSTVect<int, trustIdType>& vx, Mp_vect_options opt);
691template trustIdType local_extrema_vect_generic<trustIdType, trustIdType, trustIdType, TYPE_OPERATION_VECT::MAX_>(const TRUSTVect<trustIdType, trustIdType>& vx, Mp_vect_options opt);
692#endif
693
694#ifndef LATATOOLS
695namespace
696{
697template<typename ExecSpace, typename _TYPE_, typename _SIZE_, TYPE_OPERATION_VECT_BIS _TYPE_OP_>
698void local_operations_vect_bis_generic_kernel(const TRUSTVect<_TYPE_,_SIZE_>& vx, int nblocs_left,
699 Block_Iter<_SIZE_>& bloc_itr, const _SIZE_ vect_size_tot, const int line_size, _TYPE_& sum)
700{
701 static constexpr bool IS_SQUARE = (_TYPE_OP_ == TYPE_OPERATION_VECT_BIS::SQUARE_), IS_SUM = (_TYPE_OP_ == TYPE_OPERATION_VECT_BIS::SOMME_);
702 // Performance important point for TRUSTArray dynamic kernel to have serial mode performance:
703 // Use pointer access into Kokkos loop with [] and getting raw pointer to view with .data() !
704 auto vx_view = vx.template view_ro<1, ExecSpace>().data();
705 if (nblocs_left>3)
706 {
707 // We use flattened items_blocs cause possible huge number in parallel of nblocs_left/kernel launch (e.g. during moyenne(Ps))
708 auto items = bloc_itr.items_->template view_ro<1, ExecSpace>().data();
709 // Reduction
710 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
711 Kokkos::parallel_reduce(__KERNEL_NAME__,
712 Kokkos::RangePolicy<ExecSpace>(0, bloc_itr.items_->size_array()),
713 KOKKOS_LAMBDA(const int i, _TYPE_& local_sum)
714 {
715 _SIZE_ item = items[i] * line_size;
716 const _TYPE_ x = vx_view[item];
717 if (IS_SQUARE) local_sum += x * x;
718 if (IS_SUM) local_sum += x;
719 },sum);
720 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
721 }
722 else
723 {
724 for (; nblocs_left; nblocs_left--)
725 {
726 // Get index of next bloc start:
727 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
728 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
729 //Asserts
730 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
731 //Define Policy
732 Kokkos::RangePolicy <ExecSpace> policy(begin_bloc, end_bloc);
733 // Define the bloc sum
734 _TYPE_ bloc_sum = 0;
735 //Reduction
736 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
737 Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(
738 const _SIZE_ i, _TYPE_
739 &local_sum)
740 {
741 const _TYPE_ x = vx_view[i];
742 if (IS_SQUARE) local_sum += x * x;
743 if (IS_SUM) local_sum += x;
744 }
745 ,bloc_sum); //Reduce in bloc_sum
746 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
747
748 //Bloc-level reduction
749 sum += bloc_sum;
750 }
751 }
752}
753}
754#endif
755
756template <typename _TYPE_, typename _SIZE_, TYPE_OPERATION_VECT_BIS _TYPE_OP_ >
757_TYPE_ local_operations_vect_bis_generic(const TRUSTVect<_TYPE_,_SIZE_>& vx,Mp_vect_options opt)
758{
759#ifndef LATATOOLS
760 _TYPE_ sum = 0;
761 // Master vect donne la structure de reference, les autres vecteurs doivent avoir la meme structure.
762 const TRUSTVect<_TYPE_,_SIZE_>& master_vect = vx;
763 const int line_size = master_vect.line_size();
764 const _SIZE_ vect_size_tot = master_vect.size_totale();
765 const MD_Vector& md = master_vect.get_md_vector();
766 assert(vx.line_size() == line_size);
767 assert(vx.size_totale() == vect_size_tot); // this test is necessary if md is null
768 assert(vx.get_md_vector() == md);
769 // Determine blocs of data to process, depending on " opt"
770 int nblocs_left;
771 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
772 // Shortcut for empty arrays (avoid case line_size == 0)
773 if (bloc_itr.empty()) return sum;
774
775 bool kernelOnDevice = vx.checkDataOnDevice();
776
777 if (kernelOnDevice)
778 local_operations_vect_bis_generic_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, sum);
779 else
780 local_operations_vect_bis_generic_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_, _TYPE_OP_>(vx, nblocs_left, bloc_itr, vect_size_tot, line_size, sum);
781
782 return sum;
783#else
784 Cerr << "Error! local_operations_vect_bis_generic can't be called in your project!" << finl;
786 return (_TYPE_)0; // For compil in latatools
787#endif
788}
789// Explicit instanciation for templates:
790template double local_operations_vect_bis_generic<double, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(const TRUSTVect<double, int>& vx,Mp_vect_options opt);
791template int local_operations_vect_bis_generic<int, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(const TRUSTVect<int, int>& vx,Mp_vect_options opt);
792template float local_operations_vect_bis_generic<float, int, TYPE_OPERATION_VECT_BIS::SQUARE_>(const TRUSTVect<float, int>& vx,Mp_vect_options opt);
793template double local_operations_vect_bis_generic<double, int, TYPE_OPERATION_VECT_BIS::SOMME_>(const TRUSTVect<double, int>& vx,Mp_vect_options opt);
794template int local_operations_vect_bis_generic<int, int, TYPE_OPERATION_VECT_BIS::SOMME_>(const TRUSTVect<int, int>& vx,Mp_vect_options opt);
795template float local_operations_vect_bis_generic<float, int, TYPE_OPERATION_VECT_BIS::SOMME_>(const TRUSTVect<float, int>& vx,Mp_vect_options opt);
796
797#if INT_is_64_ == 2
798template double local_operations_vect_bis_generic<double, trustIdType, TYPE_OPERATION_VECT_BIS::SOMME_>(const TRUSTVect<double, trustIdType>& vx,Mp_vect_options opt);
799#endif
800
801// ==================================================================================================================================
802// BEGIN code for debug
803#ifndef NDEBUG
804// INVALID_SCALAR is used to fill arrays when values are not computed (virtual space might not be computed by operators).
805// The value below probably triggers errors on parallel test cases but does not prevent from doing "useless" computations with it.
806#ifndef LATATOOLS
807namespace
808{
809template<typename ExecSpace, typename _TYPE_, typename _SIZE_>
810void invalidate_data_kernel(TRUSTVect<_TYPE_,_SIZE_>& resu,
811 const ArrOfInt& items_blocs, const int line_size, const int blocs_size)
812{
813 _TYPE_ invalid = (_TYPE_)-987654321;
814 auto resu_view= resu.template view_rw<1, ExecSpace>().data();
815
816 int i = 0;
817 for (int blocs_idx = 0; blocs_idx < blocs_size; blocs_idx += 2) // process data until beginning of next bloc, or end of array
818 {
819 const int bloc_end = line_size * items_blocs[blocs_idx];
820 //Define Policy
821 Kokkos::RangePolicy<ExecSpace> policy(i, bloc_end);
822 //Loop
823 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
824 Kokkos::parallel_for(policy,KOKKOS_LAMBDA(const int count)
825 {
826 resu_view[count]=invalid;
827 });
828 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
829 i = items_blocs[blocs_idx+1] * line_size;
830 }
831 const _SIZE_ bloc_end = resu.size_array(); // Process until end of vector
832 //Define Policy
833 Kokkos::RangePolicy<ExecSpace> policy(i, bloc_end);
834 //Loop
835 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
836 Kokkos::parallel_for(policy,KOKKOS_LAMBDA(const int count)
837 {
838 resu_view[count]=invalid;
839 });
840 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
841}
842}
843#endif
844
845template <typename _TYPE_, typename _SIZE_>
846void invalidate_data(TRUSTVect<_TYPE_,_SIZE_>& resu, Mp_vect_options opt)
847{
848#ifndef LATATOOLS
849 if (Process::is_sequential()) return; // no invalid values in sequential
850
851 const MD_Vector& md = resu.get_md_vector();
852 const int line_size = resu.line_size();
853 if (opt == VECT_ALL_ITEMS || (!md)) return; // no invalid values
854 assert(opt == VECT_SEQUENTIAL_ITEMS || opt == VECT_REAL_ITEMS);
855 const ArrOfInt& items_blocs = (opt == VECT_SEQUENTIAL_ITEMS) ? md->get_blocs_items_to_sum() : md->get_blocs_items_to_compute();
856 const int blocs_size = items_blocs.size_array();
857
858 bool kernelOnDevice = resu.checkDataOnDevice();
859
860 if (kernelOnDevice)
861 invalidate_data_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_>(resu, items_blocs, line_size, blocs_size);
862 else
863 invalidate_data_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_>(resu, items_blocs, line_size, blocs_size);
864#else
865 Cerr << "Error! invalidate_data can't be called in your project!" << finl;
867#endif
868}
869//END code for debug
870// ==================================================================================================================================
871// Explicit instanciation for templates:
872template void invalidate_data<double>(TRUSTVect<double, int>& resu, Mp_vect_options opt);
873template void invalidate_data<float>(TRUSTVect<float, int>& resu, Mp_vect_options opt);
874template void invalidate_data<int>(TRUSTVect<int, int>& resu, Mp_vect_options opt);
875#endif /* NDEBUG */
876
877#ifndef LATATOOLS
878namespace
879{
880template<typename ExecSpace, typename _TYPE_, typename _SIZE_>
881void local_prodscal_kernel(const TRUSTVect<_TYPE_,_SIZE_>& vx, const TRUSTVect<_TYPE_,_SIZE_>& vy, int nblocs_left,
882 Block_Iter<_SIZE_>& bloc_itr, const int vect_size_tot, const int line_size, _TYPE_& sum)
883{
884 auto vx_view= vx.template view_ro<1, ExecSpace>().data();
885 auto vy_view= vy.template view_ro<1, ExecSpace>().data();
886#ifdef TRUST_USE_GPU
887 if (nblocs_left>3) ToDo_Kokkos("nblocs_left too high, optimize by rewriting as local_operations_vect_bis_generic_kernel");
888#endif
889 for (; nblocs_left; nblocs_left--)
890 {
891 // Get index of next bloc start:
892 const _SIZE_ begin_bloc = (*(bloc_itr++)) * line_size;
893 const _SIZE_ end_bloc = (*(bloc_itr++)) * line_size;
894
895 //Asserts
896 assert(begin_bloc >= 0 && end_bloc <= vect_size_tot && end_bloc >= begin_bloc);
897
898 //Define Policy
899 Kokkos::RangePolicy<ExecSpace> policy(begin_bloc, end_bloc);
900
901 // Define the bloc sum
902 _TYPE_ bloc_sum=0;
903
904 //Reduction
905 if (statistics().get_use_gpu()) start_gpu_timer(__KERNEL_NAME__);
906 Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(const _SIZE_ i, _TYPE_& local_sum)
907 {
908 local_sum += vx_view[i]*vy_view[i];
909 }
910 , Kokkos::Sum<_TYPE_>(bloc_sum)); //Reduce in bloc_sum
911
912 //timer
913 if (statistics().get_use_gpu()) end_gpu_timer(__KERNEL_NAME__, is_default_exec_space<ExecSpace>);
914
915 //Bloc-level reduction
916 sum += bloc_sum;
917 }
918}
919}
920#endif
921
922template<typename _TYPE_, typename _SIZE_>
923_TYPE_ local_prodscal(const TRUSTVect<_TYPE_,_SIZE_>& vx, const TRUSTVect<_TYPE_,_SIZE_>& vy, Mp_vect_options opt)
924{
925#ifndef LATATOOLS
926 _TYPE_ sum = 0;
927
928 const int line_size = vx.line_size();
929 const _SIZE_ vect_size_tot = vx.size_totale();
930 const MD_Vector& md = vx.get_md_vector();
931
932 assert(vx.line_size() == line_size && vy.line_size() == line_size);
933 assert(vx.size_totale() == vect_size_tot && vy.size_totale() == vect_size_tot); // this test is necessary if md is null
934 assert(vx.get_md_vector() == md && vy.get_md_vector() == md);
935 // Determine blocs of data to process, depending on " opt"
936 int nblocs_left;
937 Block_Iter<_SIZE_> bloc_itr = ::determine_blocks(opt, md, vect_size_tot, line_size, nblocs_left);
938 // Shortcut for empty arrays (avoid case line_size == 0)
939 if (bloc_itr.empty()) return sum;
940
941 bool kernelOnDevice = const_cast<TRUSTVect<_TYPE_,_SIZE_>&>(vx).checkDataOnDevice(vy);
942
943 if (kernelOnDevice)
944 local_prodscal_kernel<Kokkos::DefaultExecutionSpace, _TYPE_, _SIZE_>(vx, vy, nblocs_left, bloc_itr, vect_size_tot,line_size, sum);
945 else
946 local_prodscal_kernel<Kokkos::DefaultHostExecutionSpace, _TYPE_, _SIZE_>(vx, vy, nblocs_left, bloc_itr, vect_size_tot,line_size, sum);
947
948 return sum;
949
950#else
951 Cerr << "Error! local_prodscal can't be called in your project!" << finl;
953 return (_TYPE_)0; // For compil in latatools
954#endif
955}
956// Explicit instanciation for templates:
957template double local_prodscal(const TRUSTVect<double, int>& vx, const TRUSTVect<double, int>& vy, Mp_vect_options opt);
958template float local_prodscal(const TRUSTVect<float, int>& vx, const TRUSTVect<float, int>& vy, Mp_vect_options opt);
virtual const ArrOfInt & get_blocs_items_to_sum() const =0
virtual const ArrOfInt & get_items_to_sum() const =0
virtual bool use_blocks() const =0
virtual const ArrOfInt & get_items_to_compute() const =0
virtual const ArrOfInt & get_blocs_items_to_compute() const =0
: Cette classe est un OWN_PTR mais l'objet pointe est partage entre plusieurs
Definition MD_Vector.h:48
static void exit(int exit_code=-1)
Routine de sortie de TRUST dans une region Kokkos.
Definition Process.cpp:455
static bool is_sequential()
Definition Process.cpp:115
_SIZE_ size_array() const
_TYPE_ * addr()
_TYPE_ * data()
_SIZE_ size_totale() const
Definition TRUSTVect.tpp:61
int line_size() const
Definition TRUSTVect.tpp:67
virtual const MD_Vector & get_md_vector() const
Definition TRUSTVect.h:123
bool empty() const