21 #ifndef ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_    22 #define ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_    24 #include <type_traits>    26 #include "../../config.hpp"    27 #include "../../intrinsics.hpp"    28 #include "../../types.hpp"    29 #include "../../detail/various.hpp"    31 #include "warp_reduce_shuffle.hpp"    33 BEGIN_ROCPRIM_NAMESPACE
    40     unsigned int WarpSize,
    46     static_assert(detail::is_power_of_two(WarpSize), 
"WarpSize must be power of 2");
    50     template<
class BinaryFunction>
    51     ROCPRIM_DEVICE ROCPRIM_INLINE
    52     void reduce(T input, T& output, BinaryFunction reduce_op)
    59             output = reduce_op(warp_move_dpp<T, 0xb1>(output), output);
    64             output = reduce_op(warp_move_dpp<T, 0x4e>(output), output);
    69             output = reduce_op(warp_move_dpp<T, 0x114>(output), output);
    74             output = reduce_op(warp_move_dpp<T, 0x118>(output), output);
    80             output = reduce_op(warp_swizzle<T, 0x1e0>(output), output);
    86             output = reduce_op(warp_move_dpp<T, 0x142>(output), output);
    91             output = reduce_op(warp_move_dpp<T, 0x143>(output), output);
    98     template<
class BinaryFunction>
    99     ROCPRIM_DEVICE ROCPRIM_INLINE
   103         this->
reduce(input, output, reduce_op);
   106     template<
class BinaryFunction>
   107     ROCPRIM_DEVICE ROCPRIM_INLINE
   108     void reduce(T input, T& output, 
unsigned int valid_items, BinaryFunction reduce_op)
   112             .
reduce(input, output, valid_items, reduce_op);
   115     template<
class BinaryFunction>
   116     ROCPRIM_DEVICE ROCPRIM_INLINE
   117     void reduce(T input, T& output, 
unsigned int valid_items,
   121         this->
reduce(input, output, valid_items, reduce_op);
   124     template<
class Flag, 
class BinaryFunction>
   125     ROCPRIM_DEVICE ROCPRIM_INLINE
   126     void head_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
   130             .head_segmented_reduce(input, output, flag, reduce_op);
   133     template<
class Flag, 
class BinaryFunction>
   134     ROCPRIM_DEVICE ROCPRIM_INLINE
   135     void tail_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
   139             .tail_segmented_reduce(input, output, flag, reduce_op);
   142     template<
class Flag, 
class BinaryFunction>
   143     ROCPRIM_DEVICE ROCPRIM_INLINE
   144     void head_segmented_reduce(T input, T& output, Flag flag,
   149             .head_segmented_reduce(input, output, flag, storage, reduce_op);
   152     template<
class Flag, 
class BinaryFunction>
   153     ROCPRIM_DEVICE ROCPRIM_INLINE
   154     void tail_segmented_reduce(T input, T& output, Flag flag,
   159             .tail_segmented_reduce(input, output, flag, storage, reduce_op);
   165 END_ROCPRIM_NAMESPACE
   167 #endif // ROCPRIM_WARP_DETAIL_WARP_REDUCE_DPP_HPP_ Definition: warp_reduce_dpp.hpp:43
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle(const T &input, const int src_lane, const int width=device_warp_size())
Shuffle for any data type. 
Definition: warp_shuffle.hpp:172
Definition: benchmark_block_reduce.cpp:63
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
Definition: warp_reduce_shuffle.hpp:43
Definition: various.hpp:52