21 #ifndef ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_CONFIG_HPP_ 22 #define ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_CONFIG_HPP_ 25 #include <type_traits> 27 #include "../config.hpp" 28 #include "../detail/various.hpp" 29 #include "../functional.hpp" 31 #include "config_types.hpp" 36 BEGIN_ROCPRIM_NAMESPACE
56 template<
unsigned int LogicalWarpSizeSmall,
57 unsigned int ItemsPerThreadSmall,
58 unsigned int BlockSizeSmall = 256,
59 unsigned int PartitioningThreshold = 3000,
60 bool EnableUnpartitionedWarpSort =
true,
61 unsigned int LogicalWarpSizeMedium =
std::max(32u, LogicalWarpSizeSmall),
62 unsigned int ItemsPerThreadMedium =
std::max(4u, ItemsPerThreadSmall),
63 unsigned int BlockSizeMedium = 256>
66 static_assert(LogicalWarpSizeSmall * ItemsPerThreadSmall
67 <= LogicalWarpSizeMedium * ItemsPerThreadMedium,
68 "The number of items processed by a small warp cannot be larger than the number " 69 "of items processed by a medium warp");
118 template<
class Key,
unsigned int MediumWarpSize = ROCPRIM_WARP_SIZE_32>
120 = std::conditional_t<
sizeof(Key) < 2,
149 unsigned int LongRadixBits,
150 unsigned int ShortRadixBits,
157 static constexpr
unsigned int long_radix_bits = LongRadixBits;
159 static constexpr
unsigned int short_radix_bits = ShortRadixBits;
169 template<
class Key,
class Value>
172 static constexpr
unsigned int item_scale =
173 ::rocprim::detail::ceiling_div<unsigned int>(
::rocprim::max(
sizeof(Key),
sizeof(Value)),
sizeof(int));
175 using type = select_type<
177 (
sizeof(Key) == 1 &&
sizeof(Value) <= 8),
181 (
sizeof(Key) == 2 &&
sizeof(Value) <= 8),
185 (
sizeof(Key) == 4 &&
sizeof(Value) <= 8),
189 (
sizeof(Key) == 8 &&
sizeof(Value) <= 8),
199 select_type_case<sizeof(Key) == 1, segmented_radix_sort_config<8, 7, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
200 select_type_case<sizeof(Key) == 2, segmented_radix_sort_config<8, 7, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
201 select_type_case<sizeof(Key) == 4, segmented_radix_sort_config<7, 6, kernel_config<256, 9>, select_warp_sort_config_t<Key> > >,
202 select_type_case<sizeof(Key) == 8, segmented_radix_sort_config<7, 6, kernel_config<256, 7>, select_warp_sort_config_t<Key> > >
205 template<
class Key,
class Value>
208 static constexpr
unsigned int item_scale =
209 ::rocprim::detail::ceiling_div<unsigned int>(
::rocprim::max(
sizeof(Key),
sizeof(Value)),
sizeof(int));
211 using type = select_type<
213 (
sizeof(Key) == 1 &&
sizeof(Value) <= 8),
217 (
sizeof(Key) == 2 &&
sizeof(Value) <= 8),
221 (
sizeof(Key) == 4 &&
sizeof(Value) <= 8),
225 (
sizeof(Key) == 8 &&
sizeof(Value) <= 8),
235 select_type_case<sizeof(Key) == 1, segmented_radix_sort_config<4, 3, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
236 select_type_case<sizeof(Key) == 2, segmented_radix_sort_config<6, 5, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
237 select_type_case<sizeof(Key) == 4, segmented_radix_sort_config<7, 6, kernel_config<256, 17>, select_warp_sort_config_t<Key> > >,
238 select_type_case<sizeof(Key) == 8, segmented_radix_sort_config<7, 6, kernel_config<256, 15>, select_warp_sort_config_t<Key> > >
241 template<
class Key,
class Value>
244 static constexpr
unsigned int item_scale =
245 ::rocprim::detail::ceiling_div<unsigned int>(
::rocprim::max(
sizeof(Key),
sizeof(Value)),
sizeof(int));
247 using type = select_type<
249 (
sizeof(Key) == 1 &&
sizeof(Value) <= 8),
255 (
sizeof(Key) == 2 &&
sizeof(Value) <= 8),
261 (
sizeof(Key) == 4 &&
sizeof(Value) <= 8),
262 segmented_radix_sort_config<7,
265 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>,
267 (
sizeof(Key) == 8 &&
sizeof(Value) <= 8),
268 segmented_radix_sort_config<7,
271 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>,
272 segmented_radix_sort_config<7,
275 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>;
283 segmented_radix_sort_config<4,
285 kernel_config<256, 10>,
286 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>,
289 segmented_radix_sort_config<6,
291 kernel_config<256, 10>,
292 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>,
295 segmented_radix_sort_config<7,
297 kernel_config<256, 17>,
298 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>,
301 segmented_radix_sort_config<7,
303 kernel_config<256, 15>,
304 select_warp_sort_config_t<Key, ROCPRIM_WARP_SIZE_64>>>>
307 template<
class Key,
class Value>
310 static constexpr
unsigned int item_scale =
311 ::rocprim::detail::ceiling_div<unsigned int>(
::rocprim::max(
sizeof(Key),
sizeof(Value)),
sizeof(int));
313 using type = select_type<
315 (
sizeof(Key) == 1 &&
sizeof(Value) <= 8),
319 (
sizeof(Key) == 2 &&
sizeof(Value) <= 8),
323 (
sizeof(Key) == 4 &&
sizeof(Value) <= 8),
327 (
sizeof(Key) == 8 &&
sizeof(Value) <= 8),
337 select_type_case<sizeof(Key) == 1, segmented_radix_sort_config<4, 3, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
338 select_type_case<sizeof(Key) == 2, segmented_radix_sort_config<6, 5, kernel_config<256, 10>, select_warp_sort_config_t<Key> > >,
339 select_type_case<sizeof(Key) == 4, segmented_radix_sort_config<7, 6, kernel_config<256, 17>, select_warp_sort_config_t<Key> > >,
340 select_type_case<sizeof(Key) == 8, segmented_radix_sort_config<7, 6, kernel_config<256, 15>, select_warp_sort_config_t<Key> > >
343 template<
unsigned int TargetArch,
class Key,
class Value>
347 select_arch_case<803, detail::segmented_radix_sort_config_803<Key, Value>>,
348 select_arch_case<900, detail::segmented_radix_sort_config_900<Key, Value>>,
349 select_arch_case<906, detail::segmented_radix_sort_config_90a<Key, Value>>,
350 select_arch_case<908, detail::segmented_radix_sort_config_90a<Key, Value>>,
351 select_arch_case<ROCPRIM_ARCH_90a, detail::segmented_radix_sort_config_90a<Key, Value>>,
352 select_arch_case<1030, detail::segmented_radix_sort_config_1030<Key, Value>>,
353 detail::segmented_radix_sort_config_900<Key, Value>>
358 END_ROCPRIM_NAMESPACE
363 #endif // ROCPRIM_DEVICE_DEVICE_SEGMENTED_RADIX_SORT_CONFIG_HPP_ Empty type used as a placeholder, usually used to flag that given template parameter should not be us...
Definition: types.hpp:135
static constexpr unsigned int items_per_thread_small
The number of items processed by a thread in the small segment processing kernel. ...
Definition: device_segmented_radix_sort_config.hpp:73
Definition: device_segmented_radix_sort_config.hpp:206
ROCPRIM_HOST_DEVICE constexpr T max(const T &a, const T &b)
Returns the maximum of its arguments.
Definition: functional.hpp:55
Definition: device_segmented_radix_sort_config.hpp:344
static constexpr bool enable_unpartitioned_warp_sort
If set to true, warp sort can be used to sort the small segments, even if the total number of segment...
Definition: device_segmented_radix_sort_config.hpp:81
Indicates if the warp level sorting is disabled in the device segmented radix sort configuration...
Definition: device_segmented_radix_sort_config.hpp:92
static constexpr unsigned int partitioning_threshold
If the number of segments is at least partitioning_threshold, then the segments are partitioned into ...
Definition: device_segmented_radix_sort_config.hpp:78
Configuration of device-level segmented radix sort operation.
Definition: device_segmented_radix_sort_config.hpp:154
SortConfig sort
Configuration of radix sort kernel.
Definition: device_segmented_radix_sort_config.hpp:161
Definition: various.hpp:236
static constexpr unsigned int block_size_small
The number of threads per block in the small segment processing kernel.
Definition: device_segmented_radix_sort_config.hpp:75
Definition: device_segmented_radix_sort_config.hpp:170
Definition: device_segmented_radix_sort_config.hpp:242
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: device_segmented_radix_sort_config.hpp:308
static constexpr unsigned int logical_warp_size_small
The number of threads in the logical warp in the small segment processing kernel. ...
Definition: device_segmented_radix_sort_config.hpp:71
static constexpr unsigned int items_per_thread_medium
The number of items processed by a thread in the medium segment processing kernel.
Definition: device_segmented_radix_sort_config.hpp:85
Definition: config_types.hpp:140
Configuration of the warp sort part of the device segmented radix sort operation. ...
Definition: device_segmented_radix_sort_config.hpp:64
static constexpr unsigned int logical_warp_size_medium
The number of threads in the logical warp in the medium segment processing kernel.
Definition: device_segmented_radix_sort_config.hpp:83
static constexpr unsigned int block_size_medium
The number of threads per block in the medium segment processing kernel.
Definition: device_segmented_radix_sort_config.hpp:87
std::conditional_t< sizeof(Key)< 2, DisabledWarpSortConfig, WarpSortConfig< 32, 4, 256, 3000,(sizeof(Key) > 2), MediumWarpSize, 4, 256 > > select_warp_sort_config_t
Selects the appropriate WarpSortConfig based on the size of the key type.
Definition: device_segmented_radix_sort_config.hpp:130