21 #ifndef ROCPRIM_WARP_DETAIL_WARP_SCAN_SHUFFLE_HPP_ 22 #define ROCPRIM_WARP_DETAIL_WARP_SCAN_SHUFFLE_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../detail/various.hpp" 29 #include "../../intrinsics.hpp" 30 #include "../../types.hpp" 32 BEGIN_ROCPRIM_NAMESPACE
44 static_assert(detail::is_power_of_two(WarpSize),
"WarpSize must be power of 2");
48 template<
class BinaryFunction>
49 ROCPRIM_DEVICE ROCPRIM_INLINE
55 const unsigned int id = detail::logical_lane_id<WarpSize>();
57 for(
unsigned int offset = 1; offset < WarpSize; offset *= 2)
60 if(
id >= offset) output = scan_op(value, output);
64 template<
class BinaryFunction>
65 ROCPRIM_DEVICE ROCPRIM_INLINE
73 template<
class BinaryFunction>
74 ROCPRIM_DEVICE ROCPRIM_INLINE
76 BinaryFunction scan_op)
83 template<
class BinaryFunction>
84 ROCPRIM_DEVICE ROCPRIM_INLINE
92 template<
class BinaryFunction>
93 ROCPRIM_DEVICE ROCPRIM_INLINE
94 void exclusive_scan(T input, T& output, T init, BinaryFunction scan_op)
98 to_exclusive(output, output, init, scan_op);
101 template<
class BinaryFunction>
102 ROCPRIM_DEVICE ROCPRIM_INLINE
110 template<
class BinaryFunction>
111 ROCPRIM_DEVICE ROCPRIM_INLINE
118 to_exclusive(output, output);
121 template<
class BinaryFunction>
122 ROCPRIM_DEVICE ROCPRIM_INLINE
124 BinaryFunction scan_op)
130 to_exclusive(output, output, init, scan_op);
133 template<
class BinaryFunction>
134 ROCPRIM_DEVICE ROCPRIM_INLINE
142 template<
class BinaryFunction>
143 ROCPRIM_DEVICE ROCPRIM_INLINE
144 void scan(T input, T& inclusive_output, T& exclusive_output, T init,
145 BinaryFunction scan_op)
149 to_exclusive(inclusive_output, exclusive_output, init, scan_op);
152 template<
class BinaryFunction>
153 ROCPRIM_DEVICE ROCPRIM_INLINE
154 void scan(T input, T& inclusive_output, T& exclusive_output, T init,
158 scan(input, inclusive_output, exclusive_output, init, scan_op);
161 template<
class BinaryFunction>
162 ROCPRIM_DEVICE ROCPRIM_INLINE
163 void scan(T input, T& inclusive_output, T& exclusive_output,
169 to_exclusive(inclusive_output, exclusive_output);
172 template<
class BinaryFunction>
173 ROCPRIM_DEVICE ROCPRIM_INLINE
174 void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
175 BinaryFunction scan_op)
179 reduction =
warp_shuffle(inclusive_output, WarpSize-1, WarpSize);
181 to_exclusive(inclusive_output, exclusive_output, init, scan_op);
184 template<
class BinaryFunction>
185 ROCPRIM_DEVICE ROCPRIM_INLINE
186 void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
190 scan(input, inclusive_output, exclusive_output, init, reduction, scan_op);
193 ROCPRIM_DEVICE ROCPRIM_INLINE
194 T broadcast(T input,
const unsigned int src_lane,
storage_type& storage)
201 ROCPRIM_DEVICE ROCPRIM_INLINE
202 void to_exclusive(T inclusive_input, T& exclusive_output,
storage_type& storage)
205 return to_exclusive(inclusive_input, exclusive_output);
210 template<
class BinaryFunction>
211 ROCPRIM_DEVICE ROCPRIM_INLINE
212 void to_exclusive(T inclusive_input, T& exclusive_output, T init,
213 BinaryFunction scan_op)
216 exclusive_output = scan_op(init, inclusive_input);
219 if(detail::logical_lane_id<WarpSize>() == 0)
221 exclusive_output = init;
225 ROCPRIM_DEVICE ROCPRIM_INLINE
226 void to_exclusive(T inclusive_input, T& exclusive_output)
235 END_ROCPRIM_NAMESPACE
237 #endif // ROCPRIM_WARP_DETAIL_WARP_SCAN_SHUFFLE_HPP_ Definition: benchmark_block_scan.cpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle(const T &input, const int src_lane, const int width=device_warp_size())
Shuffle for any data type.
Definition: warp_shuffle.hpp:172
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle_up(const T &input, const unsigned int delta, const int width=device_warp_size())
Shuffle up for any data type.
Definition: warp_shuffle.hpp:197
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: warp_scan_shuffle.hpp:41
Definition: various.hpp:52
Definition: benchmark_block_scan.cpp:100