rocPRIM
block_histogram.hpp
1 // Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_
22 #define ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_
23 
24 #include <type_traits>
25 
26 #include "../config.hpp"
27 #include "../detail/various.hpp"
28 
29 #include "../intrinsics.hpp"
30 #include "../functional.hpp"
31 
32 #include "detail/block_histogram_atomic.hpp"
33 #include "detail/block_histogram_sort.hpp"
34 
37 
38 BEGIN_ROCPRIM_NAMESPACE
39 
42 {
49 
56  using_sort,
57 
60 };
61 
62 namespace detail
63 {
64 
65 // Selector for block_histogram algorithm which gives block histogram implementation
66 // type based on passed block_histogram_algorithm enum
67 template<block_histogram_algorithm Algorithm>
69 
70 template<>
72 {
73  template<class T, unsigned int BlockSizeX, unsigned int BlockSizeY, unsigned int BlockSizeZ, unsigned int ItemsPerThread, unsigned int Bins>
75 };
76 
77 template<>
79 {
80  template<class T, unsigned int BlockSizeX, unsigned int BlockSizeY, unsigned int BlockSizeZ, unsigned int ItemsPerThread, unsigned int Bins>
82 };
83 
84 } // end namespace detail
85 
126 template<
127  class T,
128  unsigned int BlockSizeX,
129  unsigned int ItemsPerThread,
130  unsigned int Bins,
132  unsigned int BlockSizeY = 1,
133  unsigned int BlockSizeZ = 1
134 >
136 #ifndef DOXYGEN_SHOULD_SKIP_THIS
137  : private detail::select_block_histogram_impl<Algorithm>::template type<T, BlockSizeX, BlockSizeY, BlockSizeZ, ItemsPerThread, Bins>
138 #endif
139 {
140  using base_type = typename detail::select_block_histogram_impl<Algorithm>::template type<T, BlockSizeX, BlockSizeY, BlockSizeZ, ItemsPerThread, Bins>;
141  static constexpr unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
142 public:
151  using storage_type = typename base_type::storage_type;
152 
158  template<class Counter>
159  ROCPRIM_DEVICE ROCPRIM_INLINE
160  void init_histogram(Counter hist[Bins])
161  {
162  const auto flat_tid = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
163 
164  ROCPRIM_UNROLL
165  for(unsigned int offset = 0; offset < Bins; offset += BlockSize)
166  {
167  const unsigned int offset_tid = offset + flat_tid;
168  if(offset_tid < Bins)
169  {
170  hist[offset_tid] = Counter();
171  }
172  }
173  }
174 
222  template<class Counter>
223  ROCPRIM_DEVICE ROCPRIM_INLINE
224  void composite(T (&input)[ItemsPerThread],
225  Counter hist[Bins],
226  storage_type& storage)
227  {
228  base_type::composite(input, hist, storage);
229  }
230 
242  template<class Counter>
243  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
244  void composite(T (&input)[ItemsPerThread],
245  Counter hist[Bins])
246  {
247  base_type::composite(input, hist);
248  }
249 
290  template<class Counter>
291  ROCPRIM_DEVICE ROCPRIM_INLINE
292  void histogram(T (&input)[ItemsPerThread],
293  Counter hist[Bins],
294  storage_type& storage)
295  {
296  init_histogram(hist);
298  composite(input, hist, storage);
299  }
300 
312  template<class Counter>
313  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
314  void histogram(T (&input)[ItemsPerThread],
315  Counter hist[Bins])
316  {
317  init_histogram(hist);
319  composite(input, hist);
320  }
321 };
322 
323 END_ROCPRIM_NAMESPACE
324 
326 // end of group blockmodule
327 
328 #endif // ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_
The block_histogram class is a block level parallel primitive which provides methods for constructing...
Definition: block_histogram.hpp:135
Definition: block_histogram_sort.hpp:48
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: block_histogram_atomic.hpp:45
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void histogram(T(&input)[ItemsPerThread], Counter hist[Bins])
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_histogram.hpp:314
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile)
Definition: thread.hpp:216
ROCPRIM_DEVICE ROCPRIM_INLINE void histogram(T(&input)[ItemsPerThread], Counter hist[Bins], storage_type &storage)
Construct a new block-wide histogram.
Definition: block_histogram.hpp:292
A two-phase operation is used:-.
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_histogram.hpp:151
Definition: block_histogram.hpp:68
block_histogram_algorithm
Available algorithms for block_histogram primitive.
Definition: block_histogram.hpp:41
Atomic addition is used to update bin count directly.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void composite(T(&input)[ItemsPerThread], Counter hist[Bins])
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_histogram.hpp:244
ROCPRIM_DEVICE ROCPRIM_INLINE void init_histogram(Counter hist[Bins])
Initialize histogram counters to zero.
Definition: block_histogram.hpp:160
Default block_histogram algorithm.
ROCPRIM_DEVICE ROCPRIM_INLINE void composite(T(&input)[ItemsPerThread], Counter hist[Bins], storage_type &storage)
Update an existing block-wide histogram.
Definition: block_histogram.hpp:224