21 #ifndef ROCPRIM_DEVICE_DEVICE_REDUCE_BY_KEY_CONFIG_HPP_    22 #define ROCPRIM_DEVICE_DEVICE_REDUCE_BY_KEY_CONFIG_HPP_    24 #include "config_types.hpp"    26 #include "../block/block_load.hpp"    27 #include "../block/block_scan.hpp"    28 #include "../block/block_store.hpp"    30 #include "../config.hpp"    37 BEGIN_ROCPRIM_NAMESPACE
    50 template<
unsigned int         BlockSize,
    51          unsigned int         ItemsPerThread,
    55          unsigned int         TilesPerBlock    = 1,
    56          unsigned int         SizeLimit        = ROCPRIM_GRID_SIZE_LIMIT>
    93 template<
class ScanConfig, 
class ReduceConfig>
    98     using scan = ScanConfig;
   100     using reduce = ReduceConfig;
   109 template<
typename Key, 
typename Value>
   112     static constexpr 
unsigned int size_memory_per_item = 
std::max(
sizeof(Key), 
sizeof(Value));
   114     static constexpr 
unsigned int item_scale
   115         = 
static_cast<unsigned int>(ceiling_div(size_memory_per_item, 2 * 
sizeof(
int)));
   121                                                            items_per_thread * size_memory_per_item,
   122                                                            ROCPRIM_WARP_SIZE_64>::value,
   130 template<
unsigned int TargetArch, 
class Key, 
class Value>
   132     : std::conditional_t<std::max(sizeof(Key), sizeof(Value)) <= 16,
   133                          rocprim::reduce_by_key_config_v2<256,
   135                                                           block_load_method::block_load_transpose,
   136                                                           block_load_method::block_load_transpose,
   137                                                           block_scan_algorithm::using_warp_scan,
   138                                                           sizeof(Value) < 16 ? 1 : 2>,
   139                          typename reduce_by_key::fallback_config<Key, Value>::type>
   146 END_ROCPRIM_NAMESPACE
 ROCPRIM_HOST_DEVICE constexpr T max(const T &a, const T &b)
Returns the maximum of its arguments. 
Definition: functional.hpp:55
static constexpr block_load_method load_values_method
A rocprim::block_load_method emum value indicating how the values should be loaded. 
Definition: device_reduce_by_key_config.hpp:74
Definition: config_types.hpp:109
static constexpr unsigned int tiles_per_block
Number of tiles (BlockSize * ItemsPerThread items) to process per block. 
Definition: device_reduce_by_key_config.hpp:63
static constexpr unsigned int block_size
Number of threads in a block. 
Definition: device_reduce_by_key_config.hpp:60
Special type used to show that the given device-level operation will be executed with optimal configu...
Definition: config_types.hpp:45
A warp_scan based algorithm. 
Definition: benchmark_block_reduce.cpp:63
Definition: device_reduce_by_key_config.hpp:110
static constexpr block_scan_algorithm scan_algorithm
A rocprim::block_scan_algorithm enum value indicating how the reduction should be done...
Definition: device_reduce_by_key_config.hpp:78
struct deprecated("use reduce_by_key_config_v2")]] reduce_by_key_config
Legacy configuration of device-level reduce-by-key operation. 
Definition: device_reduce_by_key_config.hpp:94
Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
block_load_method
block_load_method enumerates the methods available to load data from continuous memory into a blocked...
Definition: block_load.hpp:41
A striped arrangement of data from continuous memory is locally transposed into a blocked arrangement...
static constexpr unsigned int items_per_thread
Number of items processed by each thread per tile. 
Definition: device_reduce_by_key_config.hpp:66
hipError_t reduce_by_key(void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, ValuesInputIterator values_input, const size_t size, UniqueOutputIterator unique_output, AggregatesOutputIterator aggregates_output, UniqueCountOutputIterator unique_count_output, BinaryFunction reduce_op=BinaryFunction(), KeyCompareFunction key_compare_op=KeyCompareFunction(), hipStream_t stream=0, bool debug_synchronous=false)
Parallel reduce-by-key primitive for device level. 
Definition: device_reduce_by_key.hpp:478
Configuration of device-level reduce-by-key operation. 
Definition: device_reduce_by_key_config.hpp:57
static constexpr block_load_method load_keys_method
A rocprim::block_load_method emum value indicating how the keys should be loaded. ...
Definition: device_reduce_by_key_config.hpp:70
static constexpr unsigned int size_limit
Maximum possible number of values. Defaults to ROCPRIM_GRID_SIZE_LIMIT. 
Definition: device_reduce_by_key_config.hpp:81
block_scan_algorithm
Available algorithms for block_scan primitive. 
Definition: block_scan.hpp:41