21 #ifndef ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_    22 #define ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_    24 #include <type_traits>    26 #include "../../config.hpp"    27 #include "../../detail/various.hpp"    29 #include "../../intrinsics.hpp"    30 #include "../../functional.hpp"    32 #include "../block_radix_sort.hpp"    33 #include "../block_discontinuity.hpp"    35 BEGIN_ROCPRIM_NAMESPACE
    42     unsigned int BlockSizeX,
    43     unsigned int BlockSizeY,
    44     unsigned int BlockSizeZ,
    45     unsigned int ItemsPerThread,
    50     static constexpr 
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
    52         std::is_convertible<T, unsigned int>::value,
    53         "T must be convertible to unsigned int"    67             unsigned int start[Bins];
    68             unsigned int end[Bins];
    74     template<
class Counter>
    75     ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
    76     void composite(T (&input)[ItemsPerThread],
    80         this->composite(input, hist, storage);
    83     template<
class Counter>
    84     ROCPRIM_DEVICE ROCPRIM_INLINE
    85     void composite(T (&input)[ItemsPerThread],
    94         constexpr 
auto tile_size = BlockSize * ItemsPerThread;
    95         const auto flat_tid = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
    96         unsigned int head_flags[ItemsPerThread];
    97         discontinuity_op flags_op(storage);
   104         for(
unsigned int offset = 0; offset < Bins; offset += BlockSize)
   106             const unsigned int offset_tid = offset + flat_tid;
   107             if(offset_tid < Bins)
   109                 storage_.start[offset_tid] = tile_size;
   110                 storage_.end[offset_tid] = tile_size;
   124             storage_.start[
static_cast<unsigned int>(input[0])] = 0;
   129         for(
unsigned int offset = 0; offset < Bins; offset += BlockSize)
   131             const unsigned int offset_tid = offset + flat_tid;
   132             if(offset_tid < Bins)
   134                 Counter count = 
static_cast<Counter
>(storage_.end[offset_tid] - storage_.start[offset_tid]);
   135                 hist[offset_tid] += count;
   141     struct discontinuity_op
   145         ROCPRIM_DEVICE ROCPRIM_INLINE
   146         discontinuity_op(
storage_type &storage) : storage(storage)
   150         ROCPRIM_DEVICE ROCPRIM_INLINE
   151         bool operator()(
const T& a, 
const T& b, 
unsigned int b_index)
 const   154             if(static_cast<unsigned int>(a) != static_cast<unsigned int>(b))
   156                 storage_.start[
static_cast<unsigned int>(b)] = b_index;
   157                 storage_.end[
static_cast<unsigned int>(a)] = b_index;
   170 END_ROCPRIM_NAMESPACE
   172 #endif // ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_ The block_discontinuity class is a block level parallel primitive which provides methods for flagging...
Definition: block_discontinuity.hpp:82
Definition: block_histogram_sort.hpp:48
The block_radix_sort class is a block level parallel primitive which provides methods for sorting of ...
Definition: block_radix_sort.hpp:97
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:156
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile) 
Definition: thread.hpp:216
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key(&keys)[ItemsPerThread], storage_type &storage, unsigned int begin_bit=0, unsigned int end_bit=8 *sizeof(Key))
Performs ascending radix sort over keys partitioned across threads in a block. 
Definition: block_radix_sort.hpp:179
Definition: block_histogram_sort.hpp:61