21 #ifndef ROCPRIM_WARP_DETAIL_WARP_SCAN_DPP_HPP_ 22 #define ROCPRIM_WARP_DETAIL_WARP_SCAN_DPP_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../detail/various.hpp" 29 #include "../../intrinsics.hpp" 30 #include "../../types.hpp" 32 BEGIN_ROCPRIM_NAMESPACE
44 static_assert(detail::is_power_of_two(WarpSize),
"WarpSize must be power of 2");
48 template<
class BinaryFunction>
49 ROCPRIM_DEVICE ROCPRIM_INLINE
53 const unsigned int row_lane_id = lane_id %
::rocprim::min(16u, WarpSize);
59 T t = scan_op(warp_move_dpp<T, 0x111>(output), output);
60 if(row_lane_id >= 1) output = t;
64 T t = scan_op(warp_move_dpp<T, 0x112>(output), output);
65 if(row_lane_id >= 2) output = t;
69 T t = scan_op(warp_move_dpp<T, 0x114>(output), output);
70 if(row_lane_id >= 4) output = t;
74 T t = scan_op(warp_move_dpp<T, 0x118>(output), output);
75 if(row_lane_id >= 8) output = t;
80 T t = scan_op(warp_swizzle<T, 0x1e0>(output), output);
81 if(lane_id % 32 >= 16) output = t;
86 T t = scan_op(warp_move_dpp<T, 0x142>(output), output);
87 if(lane_id % 32 >= 16) output = t;
91 T t = scan_op(warp_move_dpp<T, 0x143>(output), output);
92 if(lane_id >= 32) output = t;
97 template<
class BinaryFunction>
98 ROCPRIM_DEVICE ROCPRIM_INLINE
106 template<
class BinaryFunction>
107 ROCPRIM_DEVICE ROCPRIM_INLINE
109 BinaryFunction scan_op)
116 template<
class BinaryFunction>
117 ROCPRIM_DEVICE ROCPRIM_INLINE
125 template<
class BinaryFunction>
126 ROCPRIM_DEVICE ROCPRIM_INLINE
127 void exclusive_scan(T input, T& output, T init, BinaryFunction scan_op)
131 to_exclusive(output, output, init, scan_op);
134 template<
class BinaryFunction>
135 ROCPRIM_DEVICE ROCPRIM_INLINE
143 template<
class BinaryFunction>
144 ROCPRIM_DEVICE ROCPRIM_INLINE
151 to_exclusive(output, output);
154 template<
class BinaryFunction>
155 ROCPRIM_DEVICE ROCPRIM_INLINE
157 BinaryFunction scan_op)
163 to_exclusive(output, output, init, scan_op);
166 template<
class BinaryFunction>
167 ROCPRIM_DEVICE ROCPRIM_INLINE
175 template<
class BinaryFunction>
176 ROCPRIM_DEVICE ROCPRIM_INLINE
177 void scan(T input, T& inclusive_output, T& exclusive_output, T init,
178 BinaryFunction scan_op)
182 to_exclusive(inclusive_output, exclusive_output, init, scan_op);
185 template<
class BinaryFunction>
186 ROCPRIM_DEVICE ROCPRIM_INLINE
187 void scan(T input, T& inclusive_output, T& exclusive_output, T init,
191 scan(input, inclusive_output, exclusive_output, init, scan_op);
194 template<
class BinaryFunction>
195 ROCPRIM_DEVICE ROCPRIM_INLINE
196 void scan(T input, T& inclusive_output, T& exclusive_output,
202 to_exclusive(inclusive_output, exclusive_output);
205 template<
class BinaryFunction>
206 ROCPRIM_DEVICE ROCPRIM_INLINE
207 void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
208 BinaryFunction scan_op)
212 reduction =
warp_shuffle(inclusive_output, WarpSize-1, WarpSize);
214 to_exclusive(inclusive_output, exclusive_output, init, scan_op);
217 template<
class BinaryFunction>
218 ROCPRIM_DEVICE ROCPRIM_INLINE
219 void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
223 scan(input, inclusive_output, exclusive_output, init, reduction, scan_op);
226 ROCPRIM_DEVICE ROCPRIM_INLINE
227 T broadcast(T input,
const unsigned int src_lane,
storage_type& storage)
234 ROCPRIM_DEVICE ROCPRIM_INLINE
235 void to_exclusive(T inclusive_input, T& exclusive_output,
storage_type& storage)
238 return to_exclusive(inclusive_input, exclusive_output);
243 template<
class BinaryFunction>
244 ROCPRIM_DEVICE ROCPRIM_INLINE
245 void to_exclusive(T inclusive_input, T& exclusive_output, T init,
246 BinaryFunction scan_op)
249 exclusive_output = scan_op(init, inclusive_input);
252 if(detail::logical_lane_id<WarpSize>() == 0)
254 exclusive_output = init;
258 ROCPRIM_DEVICE ROCPRIM_INLINE
259 void to_exclusive(T inclusive_input, T& exclusive_output)
268 END_ROCPRIM_NAMESPACE
270 #endif // ROCPRIM_WARP_DETAIL_WARP_SCAN_DPP_HPP_ Definition: benchmark_block_scan.cpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle(const T &input, const int src_lane, const int width=device_warp_size())
Shuffle for any data type.
Definition: warp_shuffle.hpp:172
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle_up(const T &input, const unsigned int delta, const int width=device_warp_size())
Shuffle up for any data type.
Definition: warp_shuffle.hpp:197
ROCPRIM_HOST_DEVICE constexpr T min(const T &a, const T &b)
Returns the minimum of its arguments.
Definition: functional.hpp:63
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: various.hpp:52
Definition: benchmark_block_scan.cpp:100
Definition: warp_scan_dpp.hpp:41
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int lane_id()
Returns thread identifier in a warp.
Definition: thread.hpp:93