21 #ifndef ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_ 22 #define ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../detail/various.hpp" 29 #include "../../intrinsics.hpp" 30 #include "../../functional.hpp" 32 #include "../block_radix_sort.hpp" 33 #include "../block_discontinuity.hpp" 35 BEGIN_ROCPRIM_NAMESPACE
42 unsigned int BlockSizeX,
43 unsigned int BlockSizeY,
44 unsigned int BlockSizeZ,
45 unsigned int ItemsPerThread,
50 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
52 std::is_convertible<T, unsigned int>::value,
53 "T must be convertible to unsigned int" 67 unsigned int start[Bins];
68 unsigned int end[Bins];
74 template<
class Counter>
75 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
76 void composite(T (&input)[ItemsPerThread],
80 this->composite(input, hist, storage);
83 template<
class Counter>
84 ROCPRIM_DEVICE ROCPRIM_INLINE
85 void composite(T (&input)[ItemsPerThread],
94 constexpr
auto tile_size = BlockSize * ItemsPerThread;
95 const auto flat_tid = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
96 unsigned int head_flags[ItemsPerThread];
97 discontinuity_op flags_op(storage);
104 for(
unsigned int offset = 0; offset < Bins; offset += BlockSize)
106 const unsigned int offset_tid = offset + flat_tid;
107 if(offset_tid < Bins)
109 storage_.start[offset_tid] = tile_size;
110 storage_.end[offset_tid] = tile_size;
124 storage_.start[
static_cast<unsigned int>(input[0])] = 0;
129 for(
unsigned int offset = 0; offset < Bins; offset += BlockSize)
131 const unsigned int offset_tid = offset + flat_tid;
132 if(offset_tid < Bins)
134 Counter count =
static_cast<Counter
>(storage_.end[offset_tid] - storage_.start[offset_tid]);
135 hist[offset_tid] += count;
141 struct discontinuity_op
145 ROCPRIM_DEVICE ROCPRIM_INLINE
146 discontinuity_op(
storage_type &storage) : storage(storage)
150 ROCPRIM_DEVICE ROCPRIM_INLINE
151 bool operator()(
const T& a,
const T& b,
unsigned int b_index)
const 154 if(static_cast<unsigned int>(a) != static_cast<unsigned int>(b))
156 storage_.start[
static_cast<unsigned int>(b)] = b_index;
157 storage_.end[
static_cast<unsigned int>(a)] = b_index;
170 END_ROCPRIM_NAMESPACE
172 #endif // ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_SORT_HPP_ The block_discontinuity class is a block level parallel primitive which provides methods for flagging...
Definition: block_discontinuity.hpp:82
Definition: block_histogram_sort.hpp:48
The block_radix_sort class is a block level parallel primitive which provides methods for sorting of ...
Definition: block_radix_sort.hpp:97
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:156
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile)
Definition: thread.hpp:216
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key(&keys)[ItemsPerThread], storage_type &storage, unsigned int begin_bit=0, unsigned int end_bit=8 *sizeof(Key))
Performs ascending radix sort over keys partitioned across threads in a block.
Definition: block_radix_sort.hpp:179
Definition: block_histogram_sort.hpp:61