21 #ifndef ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ 22 #define ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ 24 #include <type_traits> 26 #include "../config.hpp" 27 #include "../detail/various.hpp" 29 #include "../intrinsics.hpp" 30 #include "../functional.hpp" 32 #include "detail/block_scan_warp_scan.hpp" 33 #include "detail/block_scan_reduce_then_scan.hpp" 38 BEGIN_ROCPRIM_NAMESPACE
56 template<block_scan_algorithm Algorithm>
62 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ>
69 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ>
72 using type =
typename std::conditional<
129 unsigned int BlockSizeX,
131 unsigned int BlockSizeY = 1,
132 unsigned int BlockSizeZ = 1
135 #ifndef DOXYGEN_SHOULD_SKIP_THIS
197 template<
class BinaryFunction = ::rocprim::plus<T>>
198 ROCPRIM_DEVICE ROCPRIM_INLINE
202 BinaryFunction scan_op = BinaryFunction())
222 template<
class BinaryFunction = ::rocprim::plus<T>>
223 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
226 BinaryFunction scan_op = BinaryFunction())
281 template<
class BinaryFunction = ::rocprim::plus<T>>
282 ROCPRIM_DEVICE ROCPRIM_INLINE
287 BinaryFunction scan_op = BinaryFunction())
308 template<
class BinaryFunction = ::rocprim::plus<T>>
309 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
313 BinaryFunction scan_op = BinaryFunction())
395 class PrefixCallback,
396 class BinaryFunction = ::rocprim::plus<T>
398 ROCPRIM_DEVICE ROCPRIM_INLINE
402 PrefixCallback& prefix_callback_op,
403 BinaryFunction scan_op)
456 unsigned int ItemsPerThread,
457 class BinaryFunction = ::rocprim::plus<T>
459 ROCPRIM_DEVICE ROCPRIM_INLINE
461 T (&output)[ItemsPerThread],
463 BinaryFunction scan_op = BinaryFunction())
465 if(ItemsPerThread == 1)
492 unsigned int ItemsPerThread,
493 class BinaryFunction = ::rocprim::plus<T>
495 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
497 T (&output)[ItemsPerThread],
498 BinaryFunction scan_op = BinaryFunction())
500 if(ItemsPerThread == 1)
561 unsigned int ItemsPerThread,
562 class BinaryFunction = ::rocprim::plus<T>
564 ROCPRIM_DEVICE ROCPRIM_INLINE
566 T (&output)[ItemsPerThread],
569 BinaryFunction scan_op = BinaryFunction())
571 if(ItemsPerThread == 1)
599 unsigned int ItemsPerThread,
600 class BinaryFunction = ::rocprim::plus<T>
602 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
604 T (&output)[ItemsPerThread],
606 BinaryFunction scan_op = BinaryFunction())
608 if(ItemsPerThread == 1)
696 unsigned int ItemsPerThread,
697 class PrefixCallback,
700 ROCPRIM_DEVICE ROCPRIM_INLINE
702 T (&output)[ItemsPerThread],
704 PrefixCallback& prefix_callback_op,
705 BinaryFunction scan_op)
707 if(ItemsPerThread == 1)
767 template<
class BinaryFunction = ::rocprim::plus<T>>
768 ROCPRIM_DEVICE ROCPRIM_INLINE
773 BinaryFunction scan_op = BinaryFunction())
795 template<
class BinaryFunction = ::rocprim::plus<T>>
796 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
800 BinaryFunction scan_op = BinaryFunction())
859 template<
class BinaryFunction = ::rocprim::plus<T>>
860 ROCPRIM_DEVICE ROCPRIM_INLINE
866 BinaryFunction scan_op = BinaryFunction())
889 template<
class BinaryFunction = ::rocprim::plus<T>>
890 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
895 BinaryFunction scan_op = BinaryFunction())
977 class PrefixCallback,
978 class BinaryFunction = ::rocprim::plus<T>
980 ROCPRIM_DEVICE ROCPRIM_INLINE
984 PrefixCallback& prefix_callback_op,
985 BinaryFunction scan_op)
1042 unsigned int ItemsPerThread,
1043 class BinaryFunction = ::rocprim::plus<T>
1045 ROCPRIM_DEVICE ROCPRIM_INLINE
1047 T (&output)[ItemsPerThread],
1050 BinaryFunction scan_op = BinaryFunction())
1052 if(ItemsPerThread == 1)
1081 unsigned int ItemsPerThread,
1082 class BinaryFunction = ::rocprim::plus<T>
1084 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
1086 T (&output)[ItemsPerThread],
1088 BinaryFunction scan_op = BinaryFunction())
1090 if(ItemsPerThread == 1)
1156 unsigned int ItemsPerThread,
1157 class BinaryFunction = ::rocprim::plus<T>
1159 ROCPRIM_DEVICE ROCPRIM_INLINE
1161 T (&output)[ItemsPerThread],
1165 BinaryFunction scan_op = BinaryFunction())
1167 if(ItemsPerThread == 1)
1197 unsigned int ItemsPerThread,
1198 class BinaryFunction = ::rocprim::plus<T>
1200 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
1202 T (&output)[ItemsPerThread],
1205 BinaryFunction scan_op = BinaryFunction())
1207 if(ItemsPerThread == 1)
1295 unsigned int ItemsPerThread,
1296 class PrefixCallback,
1297 class BinaryFunction
1299 ROCPRIM_DEVICE ROCPRIM_INLINE
1301 T (&output)[ItemsPerThread],
1303 PrefixCallback& prefix_callback_op,
1304 BinaryFunction scan_op)
1306 if(ItemsPerThread == 1)
1317 END_ROCPRIM_NAMESPACE
1322 #endif // ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void inclusive_scan(T input, T &output, T &reduction, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:310
An algorithm which limits calculations to a single hardware warp.
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_scan.hpp:149
Default block_scan algorithm.
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T &reduction, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs inclusive scan and reduction across threads in a block.
Definition: block_scan.hpp:565
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void exclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T init, T &reduction, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:1201
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T input, T &output, T init, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs exclusive scan across threads in a block.
Definition: block_scan.hpp:769
Definition: block_scan_warp_scan.hpp:45
The block_scan class is a block level parallel primitive which provides methods for performing inclus...
Definition: block_scan.hpp:134
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void inclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T &reduction, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:603
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target.
Definition: thread.hpp:70
Definition: block_scan_reduce_then_scan.hpp:45
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void exclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T init, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:1085
Definition: block_scan.hpp:57
A warp_scan based algorithm.
hipError_t exclusive_scan(void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, const InitValueType initial_value, const size_t size, BinaryFunction scan_op=BinaryFunction(), const hipStream_t stream=0, bool debug_synchronous=false)
Parallel exclusive scan primitive for device level.
Definition: device_scan.hpp:651
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T input, T &output, storage_type &storage, PrefixCallback &prefix_callback_op, BinaryFunction scan_op)
Performs exclusive scan across threads in a block, and uses prefix_callback_op to generate prefix val...
Definition: block_scan.hpp:981
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void exclusive_scan(T input, T &output, T init, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:797
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T init, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs exclusive scan across threads in a block.
Definition: block_scan.hpp:1046
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void inclusive_scan(T input, T &output, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:224
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T input, T &output, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs inclusive scan across threads in a block.
Definition: block_scan.hpp:199
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T input, T &output, T &reduction, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs inclusive scan and reduction across threads in a block.
Definition: block_scan.hpp:283
hipError_t inclusive_scan(void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, const size_t size, BinaryFunction scan_op=BinaryFunction(), const hipStream_t stream=0, bool debug_synchronous=false)
Parallel inclusive scan primitive for device level.
Definition: device_scan.hpp:539
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T input, T &output, storage_type &storage, PrefixCallback &prefix_callback_op, BinaryFunction scan_op)
Performs inclusive scan across threads in a block, and uses prefix_callback_op to generate prefix val...
Definition: block_scan.hpp:399
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void exclusive_scan(T input, T &output, T init, T &reduction, BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:891
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], storage_type &storage, PrefixCallback &prefix_callback_op, BinaryFunction scan_op)
Performs exclusive scan across threads in a block, and uses prefix_callback_op to generate prefix val...
Definition: block_scan.hpp:1300
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T input, T &output, T init, T &reduction, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs exclusive scan and reduction across threads in a block.
Definition: block_scan.hpp:861
ROCPRIM_DEVICE ROCPRIM_INLINE void exclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T init, T &reduction, storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs exclusive scan and reduction across threads in a block.
Definition: block_scan.hpp:1160
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], storage_type &storage, BinaryFunction scan_op=BinaryFunction())
Performs inclusive scan across threads in a block.
Definition: block_scan.hpp:460
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void inclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], BinaryFunction scan_op=BinaryFunction())
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_scan.hpp:496
Default block_histogram algorithm.
block_scan_algorithm
Available algorithms for block_scan primitive.
Definition: block_scan.hpp:41
ROCPRIM_DEVICE ROCPRIM_INLINE void inclusive_scan(T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], storage_type &storage, PrefixCallback &prefix_callback_op, BinaryFunction scan_op)
Performs inclusive scan across threads in a block, and uses prefix_callback_op to generate prefix val...
Definition: block_scan.hpp:701