21 #ifndef ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_ 22 #define ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../intrinsics.hpp" 28 #include "../../types.hpp" 29 #include "../../detail/various.hpp" 31 #include "warp_reduce_shuffle.hpp" 33 BEGIN_ROCPRIM_NAMESPACE
40 unsigned int WarpSize,
46 static_assert(detail::is_power_of_two(WarpSize),
"WarpSize must be power of 2");
50 template<
class BinaryFunction>
51 ROCPRIM_DEVICE ROCPRIM_INLINE
52 void reduce(T input, T& output, BinaryFunction reduce_op)
59 output = reduce_op(warp_move_dpp<T, 0xb1>(output), output);
64 output = reduce_op(warp_move_dpp<T, 0x4e>(output), output);
69 output = reduce_op(warp_move_dpp<T, 0x114>(output), output);
74 output = reduce_op(warp_move_dpp<T, 0x118>(output), output);
80 output = reduce_op(warp_swizzle<T, 0x1e0>(output), output);
86 output = reduce_op(warp_move_dpp<T, 0x142>(output), output);
91 output = reduce_op(warp_move_dpp<T, 0x143>(output), output);
98 template<
class BinaryFunction>
99 ROCPRIM_DEVICE ROCPRIM_INLINE
103 this->
reduce(input, output, reduce_op);
106 template<
class BinaryFunction>
107 ROCPRIM_DEVICE ROCPRIM_INLINE
108 void reduce(T input, T& output,
unsigned int valid_items, BinaryFunction reduce_op)
112 .
reduce(input, output, valid_items, reduce_op);
115 template<
class BinaryFunction>
116 ROCPRIM_DEVICE ROCPRIM_INLINE
117 void reduce(T input, T& output,
unsigned int valid_items,
121 this->
reduce(input, output, valid_items, reduce_op);
124 template<
class Flag,
class BinaryFunction>
125 ROCPRIM_DEVICE ROCPRIM_INLINE
126 void head_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
130 .head_segmented_reduce(input, output, flag, reduce_op);
133 template<
class Flag,
class BinaryFunction>
134 ROCPRIM_DEVICE ROCPRIM_INLINE
135 void tail_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
139 .tail_segmented_reduce(input, output, flag, reduce_op);
142 template<
class Flag,
class BinaryFunction>
143 ROCPRIM_DEVICE ROCPRIM_INLINE
144 void head_segmented_reduce(T input, T& output, Flag flag,
149 .head_segmented_reduce(input, output, flag, storage, reduce_op);
152 template<
class Flag,
class BinaryFunction>
153 ROCPRIM_DEVICE ROCPRIM_INLINE
154 void tail_segmented_reduce(T input, T& output, Flag flag,
159 .tail_segmented_reduce(input, output, flag, storage, reduce_op);
165 END_ROCPRIM_NAMESPACE
167 #endif // ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_ Definition: warp_reduce_dpp.hpp:43
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle(const T &input, const int src_lane, const int width=device_warp_size())
Shuffle for any data type.
Definition: warp_shuffle.hpp:172
Definition: benchmark_block_reduce.cpp:63
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: warp_reduce_shuffle.hpp:43
Definition: various.hpp:52