21 #ifndef ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ 22 #define ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ 24 #include <type_traits> 26 #include "../config.hpp" 27 #include "../detail/various.hpp" 29 #include "../intrinsics.hpp" 30 #include "../functional.hpp" 32 #include "detail/block_histogram_atomic.hpp" 33 #include "detail/block_histogram_sort.hpp" 38 BEGIN_ROCPRIM_NAMESPACE
67 template<block_histogram_algorithm Algorithm>
73 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ,
unsigned int ItemsPerThread,
unsigned int Bins>
80 template<
class T,
unsigned int BlockSizeX,
unsigned int BlockSizeY,
unsigned int BlockSizeZ,
unsigned int ItemsPerThread,
unsigned int Bins>
128 unsigned int BlockSizeX,
129 unsigned int ItemsPerThread,
132 unsigned int BlockSizeY = 1,
133 unsigned int BlockSizeZ = 1
136 #ifndef DOXYGEN_SHOULD_SKIP_THIS
141 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
158 template<
class Counter>
159 ROCPRIM_DEVICE ROCPRIM_INLINE
162 const auto flat_tid = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
165 for(
unsigned int offset = 0; offset < Bins; offset += BlockSize)
167 const unsigned int offset_tid = offset + flat_tid;
168 if(offset_tid < Bins)
170 hist[offset_tid] = Counter();
222 template<
class Counter>
223 ROCPRIM_DEVICE ROCPRIM_INLINE
228 base_type::composite(input, hist, storage);
242 template<
class Counter>
243 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
247 base_type::composite(input, hist);
290 template<
class Counter>
291 ROCPRIM_DEVICE ROCPRIM_INLINE
296 init_histogram(hist);
298 composite(input, hist, storage);
312 template<
class Counter>
313 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
317 init_histogram(hist);
319 composite(input, hist);
323 END_ROCPRIM_NAMESPACE
328 #endif // ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ The block_histogram class is a block level parallel primitive which provides methods for constructing...
Definition: block_histogram.hpp:135
Definition: block_histogram_sort.hpp:48
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: block_histogram_atomic.hpp:45
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void histogram(T(&input)[ItemsPerThread], Counter hist[Bins])
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_histogram.hpp:314
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile)
Definition: thread.hpp:216
ROCPRIM_DEVICE ROCPRIM_INLINE void histogram(T(&input)[ItemsPerThread], Counter hist[Bins], storage_type &storage)
Construct a new block-wide histogram.
Definition: block_histogram.hpp:292
A two-phase operation is used:-.
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_histogram.hpp:151
Definition: block_histogram.hpp:68
block_histogram_algorithm
Available algorithms for block_histogram primitive.
Definition: block_histogram.hpp:41
Atomic addition is used to update bin count directly.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void composite(T(&input)[ItemsPerThread], Counter hist[Bins])
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_histogram.hpp:244
ROCPRIM_DEVICE ROCPRIM_INLINE void init_histogram(Counter hist[Bins])
Initialize histogram counters to zero.
Definition: block_histogram.hpp:160
Default block_histogram algorithm.
ROCPRIM_DEVICE ROCPRIM_INLINE void composite(T(&input)[ItemsPerThread], Counter hist[Bins], storage_type &storage)
Update an existing block-wide histogram.
Definition: block_histogram.hpp:224