rocPRIM
block_radix_rank.hpp
1 // Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_BLOCK_BLOCK_RADIX_RANK_HPP_
22 #define ROCPRIM_BLOCK_BLOCK_RADIX_RANK_HPP_
23 
24 #include "../config.hpp"
25 #include "../functional.hpp"
26 
27 #include "../detail/radix_sort.hpp"
28 
29 #include "block_scan.hpp"
30 
31 #include "detail/block_radix_rank_basic.hpp"
32 #include "detail/block_radix_rank_match.hpp"
33 
36 
37 BEGIN_ROCPRIM_NAMESPACE
38 
41 {
43  basic,
48  match,
51 };
52 
53 namespace detail
54 {
55 // Selector for block radix rank algorithm that gives the radix rank implementation.
56 template<block_radix_rank_algorithm Algorithm>
58 
59 template<>
61 {
62  template<unsigned int BlockSizeX,
63  unsigned int RadixBits,
64  unsigned int BlockSizeY,
65  unsigned int BlockSizeZ>
67 };
68 
69 template<>
71 {
72  template<unsigned int BlockSizeX,
73  unsigned int RadixBits,
74  unsigned int BlockSizeY,
75  unsigned int BlockSizeZ>
77 };
78 
79 template<>
81 {
82  template<unsigned int BlockSizeX,
83  unsigned int RadixBits,
84  unsigned int BlockSizeY,
85  unsigned int BlockSizeZ>
87 };
88 } // namespace detail
89 
138 template<unsigned int BlockSizeX,
139  unsigned int RadixBits,
141  unsigned int BlockSizeY = 1,
142  unsigned int BlockSizeZ = 1>
144 #ifndef DOXYGEN_SHOULD_SKIP_THIS
146  Algorithm>::template type<BlockSizeX, RadixBits, BlockSizeY, BlockSizeZ>
147 #endif
148 {
149  using base_type = typename detail::select_block_radix_rank_impl<
150  Algorithm>::template type<BlockSizeX, RadixBits, BlockSizeY, BlockSizeZ>;
151 
152 public:
154  static constexpr unsigned int digits_per_thread = base_type::digits_per_thread;
155 
164  using storage_type = typename base_type::storage_type;
165 
208  template<typename Key, unsigned ItemsPerThread>
209  ROCPRIM_DEVICE void rank_keys(const Key (&keys)[ItemsPerThread],
210  unsigned int (&ranks)[ItemsPerThread],
211  storage_type& storage,
212  unsigned int begin_bit = 0,
213  unsigned int pass_bits = RadixBits)
214  {
215  base_type::rank_keys(keys, ranks, storage, begin_bit, pass_bits);
216  }
217 
231  template<typename Key, unsigned ItemsPerThread>
232  ROCPRIM_DEVICE void rank_keys(const Key (&keys)[ItemsPerThread],
233  unsigned int (&ranks)[ItemsPerThread],
234  unsigned int begin_bit = 0,
235  unsigned int pass_bits = RadixBits)
236  {
237  ROCPRIM_SHARED_MEMORY storage_type storage;
238  base_type::rank_keys(keys, ranks, storage, begin_bit, pass_bits);
239  }
240 
283  template<typename Key, unsigned ItemsPerThread>
284  ROCPRIM_DEVICE void rank_keys_desc(const Key (&keys)[ItemsPerThread],
285  unsigned int (&ranks)[ItemsPerThread],
286  storage_type& storage,
287  unsigned int begin_bit = 0,
288  unsigned int pass_bits = RadixBits)
289  {
290  base_type::rank_keys_desc(keys, ranks, storage, begin_bit, pass_bits);
291  }
292 
306  template<typename Key, unsigned ItemsPerThread>
307  ROCPRIM_DEVICE void rank_keys_desc(const Key (&keys)[ItemsPerThread],
308  unsigned int (&ranks)[ItemsPerThread],
309  unsigned int begin_bit = 0,
310  unsigned int pass_bits = RadixBits)
311  {
312  ROCPRIM_SHARED_MEMORY storage_type storage;
313  base_type::rank_keys_desc(keys, ranks, storage, begin_bit, pass_bits);
314  }
315 
366  template<typename Key, unsigned ItemsPerThread, typename DigitExtractor>
367  ROCPRIM_DEVICE void rank_keys(const Key (&keys)[ItemsPerThread],
368  unsigned int (&ranks)[ItemsPerThread],
369  storage_type& storage,
370  DigitExtractor digit_extractor)
371  {
372  base_type::rank_keys(keys, ranks, storage, digit_extractor);
373  }
374 
393  template<typename Key, unsigned ItemsPerThread, typename DigitExtractor>
394  ROCPRIM_DEVICE void rank_keys(const Key (&keys)[ItemsPerThread],
395  unsigned int (&ranks)[ItemsPerThread],
396  DigitExtractor digit_extractor)
397  {
398  ROCPRIM_SHARED_MEMORY storage_type storage;
399  base_type::rank_keys(keys, ranks, storage, digit_extractor);
400  }
401 
452  template<typename Key, unsigned ItemsPerThread, typename DigitExtractor>
453  ROCPRIM_DEVICE void rank_keys_desc(const Key (&keys)[ItemsPerThread],
454  unsigned int (&ranks)[ItemsPerThread],
455  storage_type& storage,
456  DigitExtractor digit_extractor)
457  {
458  base_type::rank_keys_desc(keys, ranks, storage, digit_extractor);
459  }
460 
479  template<typename Key, unsigned ItemsPerThread, typename DigitExtractor>
480  ROCPRIM_DEVICE void rank_keys_desc(const Key (&keys)[ItemsPerThread],
481  unsigned int (&ranks)[ItemsPerThread],
482  DigitExtractor digit_extractor)
483  {
484  ROCPRIM_SHARED_MEMORY storage_type storage;
485  base_type::rank_keys_desc(keys, ranks, storage, digit_extractor);
486  }
487 
540  template<typename Key, unsigned ItemsPerThread, typename DigitExtractor>
541  ROCPRIM_DEVICE void rank_keys(const Key (&keys)[ItemsPerThread],
542  unsigned int (&ranks)[ItemsPerThread],
543  storage_type& storage,
544  DigitExtractor digit_extractor,
545  unsigned int (&prefix)[digits_per_thread],
546  unsigned int (&counts)[digits_per_thread])
547  {
548  base_type::rank_keys(keys, ranks, storage, digit_extractor, prefix, counts);
549  }
550 };
551 
552 END_ROCPRIM_NAMESPACE
553 
555 // end of group blockmodule
556 
557 #endif // ROCPRIM_BLOCK_BLOCK_RADIX_RANK_HPP_
block_radix_rank_algorithm
Available algorithms for the block_radix_rank primitive.
Definition: block_radix_rank.hpp:40
The basic block radix rank algorithm, configured to memoize intermediate values.
ROCPRIM_DEVICE void rank_keys_desc(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], DigitExtractor digit_extractor)
Perform descending radix rank over bit keys partitioned across threads in a block.
Definition: block_radix_rank.hpp:480
Definition: block_radix_rank.hpp:57
ROCPRIM_DEVICE void rank_keys_desc(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], storage_type &storage, unsigned int begin_bit=0, unsigned int pass_bits=RadixBits)
Perform ascending radix rank over bit keys partitioned across threads in a block. ...
Definition: block_radix_rank.hpp:284
Definition: block_radix_rank_match.hpp:42
ROCPRIM_DEVICE void rank_keys(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], storage_type &storage, unsigned int begin_bit=0, unsigned int pass_bits=RadixBits)
Perform ascending radix rank over keys partitioned across threads in a block.
Definition: block_radix_rank.hpp:209
Definition: block_radix_rank_basic.hpp:42
The block_radix_rank class is a block level parallel primitive that provides methods for ranking item...
Definition: block_radix_rank.hpp:143
The default radix ranking algorithm.
ROCPRIM_DEVICE void rank_keys(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], storage_type &storage, DigitExtractor digit_extractor, unsigned int(&prefix)[digits_per_thread], unsigned int(&counts)[digits_per_thread])
Perform ascending radix rank over bit keys partitioned across threads in a block. ...
Definition: block_radix_rank.hpp:541
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_radix_rank.hpp:164
ROCPRIM_DEVICE void rank_keys(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], DigitExtractor digit_extractor)
Perform ascending radix rank over bit keys partitioned across threads in a block. ...
Definition: block_radix_rank.hpp:394
The basic block radix rank algorithm. Keys and ranks are assumed in blocked order.
ROCPRIM_DEVICE void rank_keys(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], unsigned int begin_bit=0, unsigned int pass_bits=RadixBits)
Perform ascending radix rank over keys partitioned across threads in a block.
Definition: block_radix_rank.hpp:232
ROCPRIM_DEVICE void rank_keys_desc(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], storage_type &storage, DigitExtractor digit_extractor)
Perform descending radix rank over bit keys partitioned across threads in a block.
Definition: block_radix_rank.hpp:453
ROCPRIM_DEVICE void rank_keys(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], storage_type &storage, DigitExtractor digit_extractor)
Perform ascending radix rank over bit keys partitioned across threads in a block. ...
Definition: block_radix_rank.hpp:367
Warp-based radix ranking algorithm. Keys and ranks are assumed in warp-striped order for this algorit...
Default block_histogram algorithm.
ROCPRIM_DEVICE void rank_keys_desc(const Key(&keys)[ItemsPerThread], unsigned int(&ranks)[ItemsPerThread], unsigned int begin_bit=0, unsigned int pass_bits=RadixBits)
Perform descending radix rank over keys partitioned across threads in a block.
Definition: block_radix_rank.hpp:307