21 #ifndef ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_ 22 #define ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../detail/various.hpp" 29 #include "../../intrinsics.hpp" 30 #include "../../types.hpp" 32 BEGIN_ROCPRIM_NAMESPACE
50 template<
class BinaryFunction>
51 ROCPRIM_DEVICE ROCPRIM_INLINE
55 const unsigned int lid = detail::logical_lane_id<WarpSize>();
56 storage_type_& storage_ = storage.get();
59 storage_.threads[lid] = me;
61 for(
unsigned int i = 1; i < WarpSize; i *= 2)
63 const bool do_op = lid >= i;
66 T other = storage_.threads[lid - i];
67 me = scan_op(other, me);
72 storage_.threads[lid] = me;
79 template<
class BinaryFunction>
80 ROCPRIM_DEVICE ROCPRIM_INLINE
84 storage_type_& storage_ = storage.get();
86 reduction = storage_.threads[WarpSize - 1];
89 template<
class BinaryFunction>
90 ROCPRIM_DEVICE ROCPRIM_INLINE
95 to_exclusive(output, init, storage, scan_op);
98 template<
class BinaryFunction>
99 ROCPRIM_DEVICE ROCPRIM_INLINE
104 to_exclusive(output, storage);
107 template<
class BinaryFunction>
108 ROCPRIM_DEVICE ROCPRIM_INLINE
112 storage_type_& storage_ = storage.get();
114 reduction = storage_.threads[WarpSize - 1];
115 to_exclusive(output, init, storage, scan_op);
118 template<
class BinaryFunction>
119 ROCPRIM_DEVICE ROCPRIM_INLINE
120 void scan(T input, T& inclusive_output, T& exclusive_output, T init,
124 to_exclusive(exclusive_output, init, storage, scan_op);
127 template<
class BinaryFunction>
128 ROCPRIM_DEVICE ROCPRIM_INLINE
129 void scan(T input, T& inclusive_output, T& exclusive_output,
133 to_exclusive(exclusive_output, storage);
136 template<
class BinaryFunction>
137 ROCPRIM_DEVICE ROCPRIM_INLINE
138 void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
141 storage_type_& storage_ = storage.get();
143 reduction = storage_.threads[WarpSize - 1];
145 to_exclusive(exclusive_output, init, storage, scan_op);
148 ROCPRIM_DEVICE ROCPRIM_INLINE
149 T broadcast(T input,
const unsigned int src_lane,
storage_type& storage)
151 storage_type_& storage_ = storage.get();
152 if(src_lane == detail::logical_lane_id<WarpSize>())
154 storage_.threads[src_lane] = input;
157 return storage_.threads[src_lane];
161 ROCPRIM_DEVICE ROCPRIM_INLINE
162 void to_exclusive(T inclusive_input, T& exclusive_output,
storage_type& storage)
164 (void) inclusive_input;
165 return to_exclusive(exclusive_output, storage);
170 template<
class BinaryFunction>
171 ROCPRIM_DEVICE ROCPRIM_INLINE
172 void to_exclusive(T& exclusive_output, T init,
175 const unsigned int lid = detail::logical_lane_id<WarpSize>();
176 storage_type_& storage_ = storage.get();
177 exclusive_output = init;
180 exclusive_output = scan_op(init, storage_.threads[lid - 1]);
184 ROCPRIM_DEVICE ROCPRIM_INLINE
185 void to_exclusive(T& exclusive_output,
storage_type& storage)
187 const unsigned int lid = detail::logical_lane_id<WarpSize>();
188 storage_type_& storage_ = storage.get();
191 exclusive_output = storage_.threads[lid - 1];
198 END_ROCPRIM_NAMESPACE
200 #endif // ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_ Definition: benchmark_block_scan.cpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE void wave_barrier()
Synchronize all threads in the wavefront.
Definition: thread.hpp:235
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: warp_scan_shared_mem.hpp:41
Definition: benchmark_block_scan.cpp:100