21 #ifndef ROCPRIM_WARP_DETAIL_WARP_REDUCE_SHUFFLE_HPP_    22 #define ROCPRIM_WARP_DETAIL_WARP_REDUCE_SHUFFLE_HPP_    24 #include <type_traits>    26 #include "../../config.hpp"    27 #include "../../intrinsics.hpp"    28 #include "../../types.hpp"    29 #include "../../detail/various.hpp"    31 #include "warp_segment_bounds.hpp"    33 BEGIN_ROCPRIM_NAMESPACE
    40     unsigned int WarpSize,
    46     static_assert(detail::is_power_of_two(WarpSize), 
"WarpSize must be power of 2");
    50     template<
class BinaryFunction>
    51     ROCPRIM_DEVICE ROCPRIM_INLINE
    52     void reduce(T input, T& output, BinaryFunction reduce_op)
    58         for(
unsigned int offset = 1; offset < WarpSize; offset *= 2)
    61             output = reduce_op(output, value);
    63         set_output<UseAllReduce>(output);
    66     template<
class BinaryFunction>
    67     ROCPRIM_DEVICE ROCPRIM_INLINE
    71         this->
reduce(input, output, reduce_op);
    74     template<
bool UseAllReduceDummy = UseAllReduce, 
class BinaryFunction>
    75     ROCPRIM_DEVICE ROCPRIM_INLINE
    76     void reduce(T input, T& output, 
unsigned int valid_items, BinaryFunction reduce_op)
    82         for(
unsigned int offset = 1; offset < WarpSize; offset *= 2)
    85             unsigned int id = detail::logical_lane_id<WarpSize>();
    86             if (
id + offset < valid_items) output = reduce_op(output, value);
    88         set_output<UseAllReduceDummy>(output);
    91     template<
class BinaryFunction>
    92     ROCPRIM_DEVICE ROCPRIM_INLINE
    93     void reduce(T input, T& output, 
unsigned int valid_items,
    97         this->
reduce(input, output, valid_items, reduce_op);
   100     template<
class Flag, 
class BinaryFunction>
   101     ROCPRIM_DEVICE ROCPRIM_INLINE
   102     void head_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
   104         this->segmented_reduce<true>(input, output, flag, reduce_op);
   107     template<
class Flag, 
class BinaryFunction>
   108     ROCPRIM_DEVICE ROCPRIM_INLINE
   109     void tail_segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
   111         this->segmented_reduce<false>(input, output, flag, reduce_op);
   114     template<
class Flag, 
class BinaryFunction>
   115     ROCPRIM_DEVICE ROCPRIM_INLINE
   116     void head_segmented_reduce(T input, T& output, Flag flag,
   120         this->segmented_reduce<true>(input, output, flag, reduce_op);
   123     template<
class Flag, 
class BinaryFunction>
   124     ROCPRIM_DEVICE ROCPRIM_INLINE
   125     void tail_segmented_reduce(T input, T& output, Flag flag,
   129         this->segmented_reduce<false>(input, output, flag, reduce_op);
   133     template<
bool HeadSegmented, 
class Flag, 
class BinaryFunction>
   134     ROCPRIM_DEVICE ROCPRIM_INLINE
   135     void segmented_reduce(T input, T& output, Flag flag, BinaryFunction reduce_op)
   139         auto valid_items_in_segment = last_in_warp_segment<HeadSegmented, WarpSize>(flag) + 1U;
   140         this->
reduce<false>(input, output, valid_items_in_segment, reduce_op);
   143     template<
bool Switch>
   144     ROCPRIM_DEVICE ROCPRIM_INLINE
   145     typename std::enable_if<(Switch == false)>::type
   146     set_output(T& output)
   152     template<
bool Switch>
   153     ROCPRIM_DEVICE ROCPRIM_INLINE
   154     typename std::enable_if<(Switch == true)>::type
   155     set_output(T& output)
   163 END_ROCPRIM_NAMESPACE
   165 #endif // ROCPRIM_WARP_DETAIL_WARP_REDUCE_SHUFFLE_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle(const T &input, const int src_lane, const int width=device_warp_size())
Shuffle for any data type. 
Definition: warp_shuffle.hpp:172
Definition: benchmark_block_reduce.cpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE T warp_shuffle_down(const T &input, const unsigned int delta, const int width=device_warp_size())
Shuffle down for any data type. 
Definition: warp_shuffle.hpp:222
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
Definition: warp_reduce_shuffle.hpp:43
Definition: various.hpp:52