BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
More...
#include <block_radix_rank.hpp>
|
enum | { BINS_TRACKED_PER_THREAD = base_type::digits_per_thread
} |
|
using | TempStorage = typename base_type::storage_type |
|
|
|
|
HIPCUB_DEVICE | BlockRadixRank () |
| Collective constructor using a private static allocation of shared memory as temporary storage.
|
|
HIPCUB_DEVICE | BlockRadixRank (TempStorage &temp_storage) |
| Collective constructor using the specified memory allocation as temporary storage. More...
|
|
|
|
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT > |
HIPCUB_DEVICE void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor) |
| Rank keys. More...
|
|
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT > |
HIPCUB_DEVICE void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks) [KEYS_PER_THREAD], DigitExtractorT digit_extractor, int(&exclusive_digit_prefix) [BINS_TRACKED_PER_THREAD]) |
| Rank keys. More...
|
|
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = HIPCUB_ARCH>
class BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
- Template Parameters
-
BLOCK_DIM_X | The thread block length in threads along the X dimension |
RADIX_BITS | The number of radix bits per digit place |
IS_DESCENDING | Whether or not the sorted-order is high-to-low |
MEMOIZE_OUTER_SCAN | [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. |
INNER_SCAN_ALGORITHM | [optional] The hipcub::BlockScanAlgorithm algorithm to use (default: hipcub::BLOCK_SCAN_WARP_SCANS) |
SMEM_CONFIG | [optional] Shared memory bank mode (default: hipSharedMemBankSizeFourByte ) |
BLOCK_DIM_Y | [optional] The thread block length in threads along the Y dimension (default: 1) |
BLOCK_DIM_Z | [optional] The thread block length in threads along the Z dimension (default: 1) |
ARCH | [optional] ptx version |
- Overview
- Blah...
- Keys must be in a form suitable for radix ranking (i.e., unsigned bits).
- Performance Considerations
-
- Examples
- Example 1: Simple radix rank of 32-bit integer keys
template <int BLOCK_THREADS>
__global__ void ExampleKernel(...)
{
◆ anonymous enum
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = HIPCUB_ARCH>
Enumerator |
---|
BINS_TRACKED_PER_THREAD | Number of bin-starting offsets tracked per thread.
|
◆ BlockRadixRank()
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = HIPCUB_ARCH>
HIPCUB_DEVICE BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::BlockRadixRank |
( |
TempStorage & |
temp_storage | ) |
|
|
inline |
Collective constructor using the specified memory allocation as temporary storage.
- Parameters
-
[in] | temp_storage | Reference to memory allocation having layout type TempStorage |
◆ RankKeys() [1/2]
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = HIPCUB_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
HIPCUB_DEVICE void BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::RankKeys |
( |
UnsignedBits(&) |
keys[KEYS_PER_THREAD], |
|
|
int(&) |
ranks[KEYS_PER_THREAD], |
|
|
DigitExtractorT |
digit_extractor |
|
) |
| |
|
inline |
Rank keys.
- Parameters
-
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile |
[in] | digit_extractor | The digit extractor |
◆ RankKeys() [2/2]
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, hipSharedMemConfig SMEM_CONFIG = hipSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int ARCH = HIPCUB_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD, typename DigitExtractorT >
HIPCUB_DEVICE void BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, ARCH >::RankKeys |
( |
UnsignedBits(&) |
keys[KEYS_PER_THREAD], |
|
|
int(&) |
ranks[KEYS_PER_THREAD], |
|
|
DigitExtractorT |
digit_extractor, |
|
|
int(&) |
exclusive_digit_prefix[BINS_TRACKED_PER_THREAD] |
|
) |
| |
|
inline |
Rank keys.
For the lower RADIX_DIGITS
threads, digit counts for each digit are provided for the corresponding thread.
- Parameters
-
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile (out parameter) |
[in] | digit_extractor | The digit extractor |
[out] | exclusive_digit_prefix | The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] |
The documentation for this class was generated from the following file: