21 #ifndef ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ 22 #define ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ 24 #include <type_traits> 26 #include "../config.hpp" 27 #include "../detail/various.hpp" 29 #include "../intrinsics.hpp" 30 #include "../functional.hpp" 32 #include "detail/block_reduce_warp_reduce.hpp" 33 #include "detail/block_reduce_raking_reduce.hpp" 39 BEGIN_ROCPRIM_NAMESPACE
59 template<block_reduce_algorithm Algorithm>
65 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ>
72 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ>
79 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ>
134 unsigned int BlockSizeX,
136 unsigned int BlockSizeY = 1,
137 unsigned int BlockSizeZ = 1
140 #ifndef DOXYGEN_SHOULD_SKIP_THIS
202 template<
class BinaryFunction = ::rocprim::plus<T>>
203 ROCPRIM_DEVICE ROCPRIM_INLINE
207 BinaryFunction reduce_op = BinaryFunction())
227 template<
class BinaryFunction = ::rocprim::plus<T>>
228 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
231 BinaryFunction reduce_op = BinaryFunction())
284 unsigned int ItemsPerThread,
285 class BinaryFunction = ::rocprim::plus<T>
287 ROCPRIM_DEVICE ROCPRIM_INLINE
291 BinaryFunction reduce_op = BinaryFunction())
313 unsigned int ItemsPerThread,
314 class BinaryFunction = ::rocprim::plus<T>
316 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
319 BinaryFunction reduce_op = BinaryFunction())
370 template<
class BinaryFunction = ::rocprim::plus<T>>
371 ROCPRIM_DEVICE ROCPRIM_INLINE
374 unsigned int valid_items,
376 BinaryFunction reduce_op = BinaryFunction())
398 template<
class BinaryFunction = ::rocprim::plus<T>>
399 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
402 unsigned int valid_items,
403 BinaryFunction reduce_op = BinaryFunction())
409 END_ROCPRIM_NAMESPACE
414 #endif // ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ Definition: block_reduce_raking_reduce.hpp:118
ROCPRIM_DEVICE ROCPRIM_INLINE void reduce(T input, T &output, storage_type &storage, BinaryFunction reduce_op=BinaryFunction())
Performs reduction across threads in a block.
Definition: block_reduce.hpp:204
Default block_reduce algorithm.
A warp_reduce based algorithm.
Definition: block_reduce_warp_reduce.hpp:45
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void reduce(T input, T &output, BinaryFunction reduce_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_reduce.hpp:229
block_reduce_algorithm
Available algorithms for block_reduce primitive.
Definition: block_reduce.hpp:42
hipError_t reduce(void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, const InitValueType initial_value, const size_t size, BinaryFunction reduce_op=BinaryFunction(), const hipStream_t stream=0, bool debug_synchronous=false)
Parallel reduction primitive for device level.
Definition: device_reduce.hpp:374
An algorithm which limits calculations to a single hardware warp.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void reduce(T(&input)[ItemsPerThread], T &output, BinaryFunction reduce_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_reduce.hpp:317
ROCPRIM_DEVICE ROCPRIM_INLINE void reduce(T(&input)[ItemsPerThread], T &output, storage_type &storage, BinaryFunction reduce_op=BinaryFunction())
Performs reduction across threads in a block.
Definition: block_reduce.hpp:288
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
The block_reduce class is a block level parallel primitive which provides methods for performing redu...
Definition: block_reduce.hpp:139
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void reduce(T input, T &output, unsigned int valid_items, BinaryFunction reduce_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_reduce.hpp:400
raking reduce that supports only commutative operators
Definition: block_reduce.hpp:60
Default block_histogram algorithm.
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_reduce.hpp:154
ROCPRIM_DEVICE ROCPRIM_INLINE void reduce(T input, T &output, unsigned int valid_items, storage_type &storage, BinaryFunction reduce_op=BinaryFunction())
Performs reduction across threads in a block.
Definition: block_reduce.hpp:372