21 #ifndef ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_    22 #define ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_    24 #include <type_traits>    26 #include "../../config.hpp"    27 #include "../../detail/various.hpp"    29 #include "../../intrinsics.hpp"    30 #include "../../types.hpp"    32 BEGIN_ROCPRIM_NAMESPACE
    50     template<
class BinaryFunction>
    51     ROCPRIM_DEVICE ROCPRIM_INLINE
    55         const unsigned int lid = detail::logical_lane_id<WarpSize>();
    56         storage_type_& storage_ = storage.get();
    59         storage_.threads[lid] = me;
    61         for(
unsigned int i = 1; i < WarpSize; i *= 2)
    63             const bool do_op = lid >= i;
    66                 T other = storage_.threads[lid - i];
    67                 me      = scan_op(other, me);
    72                 storage_.threads[lid] = me;
    79     template<
class BinaryFunction>
    80     ROCPRIM_DEVICE ROCPRIM_INLINE
    84         storage_type_& storage_ = storage.get();
    86         reduction = storage_.threads[WarpSize - 1];
    89     template<
class BinaryFunction>
    90     ROCPRIM_DEVICE ROCPRIM_INLINE
    95         to_exclusive(output, init, storage, scan_op);
    98     template<
class BinaryFunction>
    99     ROCPRIM_DEVICE ROCPRIM_INLINE
   104         to_exclusive(output, storage);
   107     template<
class BinaryFunction>
   108     ROCPRIM_DEVICE ROCPRIM_INLINE
   112         storage_type_& storage_ = storage.get();
   114         reduction = storage_.threads[WarpSize - 1];
   115         to_exclusive(output, init, storage, scan_op);
   118     template<
class BinaryFunction>
   119     ROCPRIM_DEVICE ROCPRIM_INLINE
   120     void scan(T input, T& inclusive_output, T& exclusive_output, T init,
   124         to_exclusive(exclusive_output, init, storage, scan_op);
   127     template<
class BinaryFunction>
   128     ROCPRIM_DEVICE ROCPRIM_INLINE
   129     void scan(T input, T& inclusive_output, T& exclusive_output,
   133         to_exclusive(exclusive_output, storage);
   136     template<
class BinaryFunction>
   137     ROCPRIM_DEVICE ROCPRIM_INLINE
   138     void scan(T input, T& inclusive_output, T& exclusive_output, T init, T& reduction,
   141         storage_type_& storage_ = storage.get();
   143         reduction = storage_.threads[WarpSize - 1];
   145         to_exclusive(exclusive_output, init, storage, scan_op);
   148     ROCPRIM_DEVICE ROCPRIM_INLINE
   149     T broadcast(T input, 
const unsigned int src_lane, 
storage_type& storage)
   151         storage_type_& storage_ = storage.get();
   152         if(src_lane == detail::logical_lane_id<WarpSize>())
   154             storage_.threads[src_lane] = input;
   157         return storage_.threads[src_lane];
   161     ROCPRIM_DEVICE ROCPRIM_INLINE
   162     void to_exclusive(T inclusive_input, T& exclusive_output, 
storage_type& storage)
   164         (void) inclusive_input;
   165         return to_exclusive(exclusive_output, storage);
   170     template<
class BinaryFunction>
   171     ROCPRIM_DEVICE ROCPRIM_INLINE
   172     void to_exclusive(T& exclusive_output, T init,
   175         const unsigned int lid = detail::logical_lane_id<WarpSize>();
   176         storage_type_& storage_ = storage.get();
   177         exclusive_output = init;
   180             exclusive_output = scan_op(init, storage_.threads[lid - 1]);
   184     ROCPRIM_DEVICE ROCPRIM_INLINE
   185     void to_exclusive(T& exclusive_output, 
storage_type& storage)
   187         const unsigned int lid = detail::logical_lane_id<WarpSize>();
   188         storage_type_& storage_ = storage.get();
   191             exclusive_output = storage_.threads[lid - 1];
   198 END_ROCPRIM_NAMESPACE
   200 #endif // ROCPRIM_WARP_DETAIL_WARP_SCAN_SHARED_MEM_HPP_ Definition: benchmark_block_scan.cpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE void wave_barrier()
Synchronize all threads in the wavefront. 
Definition: thread.hpp:235
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
Definition: warp_scan_shared_mem.hpp:41
Definition: benchmark_block_scan.cpp:100