21 #ifndef ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_    22 #define ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_    24 #include <type_traits>    26 #include "../../config.hpp"    27 #include "../../detail/various.hpp"    29 #include "../../intrinsics.hpp"    30 #include "../../functional.hpp"    32 BEGIN_ROCPRIM_NAMESPACE
    39     unsigned int BlockSizeX,
    40     unsigned int BlockSizeY,
    41     unsigned int BlockSizeZ,
    42     unsigned int ItemsPerThread,
    47     static constexpr 
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
    49         std::is_convertible<T, unsigned int>::value,
    50         "T must be convertible to unsigned int"    54     using storage_type = typename ::rocprim::detail::empty_storage_type;
    56     template<
class Counter>
    57     ROCPRIM_DEVICE ROCPRIM_INLINE
    58     void composite(T (&input)[ItemsPerThread],
    62             std::is_same<Counter, unsigned int>::value || std::is_same<Counter, int>::value ||
    63             std::is_same<Counter, float>::value || std::is_same<Counter, unsigned long long>::value,
    64             "Counter must be type that is supported by atomics (float, int, unsigned int, unsigned long long)"    67         for(
unsigned int i = 0; i < ItemsPerThread; ++i)
    69             const unsigned int bin = 
static_cast<unsigned int>(input[i]);
    74             for(
unsigned int b = 1; b < Bins; b <<= 1)
    76                 const unsigned int bit_set      = bin & b;
    77                 const auto         bit_set_mask = 
ballot(bit_set);
    78                 peer_mask &= (bit_set ? bit_set_mask : ~bit_set_mask);
    82             const unsigned int bin_count = 
bit_count(peer_mask);
    89             if(peer_digit_prefix == 0)
    91                 detail::atomic_add(&hist[bin], Counter(bin_count));
    97     template<
class Counter>
    98     ROCPRIM_DEVICE ROCPRIM_INLINE
    99     void composite(T (&input)[ItemsPerThread],
   101                    storage_type& storage)
   104         this->composite(input, hist);
   110 END_ROCPRIM_NAMESPACE
   112 #endif // ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int masked_bit_count(lane_mask_type x, unsigned int add=0)
Masked bit count. 
Definition: warp.hpp:48
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
Definition: block_histogram_atomic.hpp:45
ROCPRIM_DEVICE ROCPRIM_INLINE lane_mask_type ballot(int predicate)
Evaluate predicate for all active work-items in the warp and return an integer whose i-th bit is set ...
Definition: warp.hpp:38
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile) 
Definition: thread.hpp:216
unsigned long long int lane_mask_type
The lane_mask_type is an integer that contains one bit per thread. 
Definition: types.hpp:164
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int bit_count(unsigned int x)
Bit count. 
Definition: bit.hpp:42