21 #ifndef ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_ 22 #define ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_ 24 #include <type_traits> 26 #include "../../config.hpp" 27 #include "../../detail/various.hpp" 29 #include "../../intrinsics.hpp" 30 #include "../../functional.hpp" 32 BEGIN_ROCPRIM_NAMESPACE
39 unsigned int BlockSizeX,
40 unsigned int BlockSizeY,
41 unsigned int BlockSizeZ,
42 unsigned int ItemsPerThread,
47 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
49 std::is_convertible<T, unsigned int>::value,
50 "T must be convertible to unsigned int" 54 using storage_type = typename ::rocprim::detail::empty_storage_type;
56 template<
class Counter>
57 ROCPRIM_DEVICE ROCPRIM_INLINE
58 void composite(T (&input)[ItemsPerThread],
62 std::is_same<Counter, unsigned int>::value || std::is_same<Counter, int>::value ||
63 std::is_same<Counter, float>::value || std::is_same<Counter, unsigned long long>::value,
64 "Counter must be type that is supported by atomics (float, int, unsigned int, unsigned long long)" 67 for(
unsigned int i = 0; i < ItemsPerThread; ++i)
69 const unsigned int bin =
static_cast<unsigned int>(input[i]);
74 for(
unsigned int b = 1; b < Bins; b <<= 1)
76 const unsigned int bit_set = bin & b;
77 const auto bit_set_mask =
ballot(bit_set);
78 peer_mask &= (bit_set ? bit_set_mask : ~bit_set_mask);
82 const unsigned int bin_count =
bit_count(peer_mask);
89 if(peer_digit_prefix == 0)
91 detail::atomic_add(&hist[bin], Counter(bin_count));
97 template<
class Counter>
98 ROCPRIM_DEVICE ROCPRIM_INLINE
99 void composite(T (&input)[ItemsPerThread],
101 storage_type& storage)
104 this->composite(input, hist);
110 END_ROCPRIM_NAMESPACE
112 #endif // ROCPRIM_BLOCK_DETAIL_BLOCK_HISTOGRAM_ATOMIC_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int masked_bit_count(lane_mask_type x, unsigned int add=0)
Masked bit count.
Definition: warp.hpp:48
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: block_histogram_atomic.hpp:45
ROCPRIM_DEVICE ROCPRIM_INLINE lane_mask_type ballot(int predicate)
Evaluate predicate for all active work-items in the warp and return an integer whose i-th bit is set ...
Definition: warp.hpp:38
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile)
Definition: thread.hpp:216
unsigned long long int lane_mask_type
The lane_mask_type is an integer that contains one bit per thread.
Definition: types.hpp:164
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int bit_count(unsigned int x)
Bit count.
Definition: bit.hpp:42