21 #ifndef ROCPRIM_DEVICE_DETAIL_DEVICE_SEGMENTED_RADIX_SORT_HPP_ 22 #define ROCPRIM_DEVICE_DETAIL_DEVICE_SEGMENTED_RADIX_SORT_HPP_ 24 #include <type_traits> 27 #include "../../config.hpp" 28 #include "../../detail/various.hpp" 30 #include "../../intrinsics.hpp" 31 #include "../../functional.hpp" 32 #include "../../types.hpp" 34 #include "../../block/block_load.hpp" 35 #include "../../block/block_store.hpp" 36 #include "../../block/block_scan.hpp" 38 #include "../../warp/warp_load.hpp" 39 #include "../../warp/warp_sort.hpp" 40 #include "../../warp/warp_store.hpp" 42 #include "../device_segmented_radix_sort_config.hpp" 43 #include "device_radix_sort.hpp" 45 BEGIN_ROCPRIM_NAMESPACE
53 unsigned int WarpSize,
54 unsigned int BlockSize,
55 unsigned int ItemsPerThread,
56 unsigned int RadixBits,
61 static constexpr
unsigned int radix_size = 1 << RadixBits;
64 using value_type = Value;
67 using scan_type = typename ::rocprim::block_scan<unsigned int, radix_size>;
69 BlockSize, ItemsPerThread, RadixBits, Descending,
70 key_type, value_type,
unsigned int>;
81 class KeysInputIterator,
82 class KeysOutputIterator,
83 class ValuesInputIterator,
84 class ValuesOutputIterator
86 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
87 void sort(KeysInputIterator keys_input,
89 KeysOutputIterator keys_output,
90 ValuesInputIterator values_input,
91 value_type * values_tmp,
92 ValuesOutputIterator values_output,
94 unsigned int begin_offset,
95 unsigned int end_offset,
97 unsigned int begin_bit,
103 const unsigned int current_radix_bits =
::rocprim::min(RadixBits, end_bit - bit);
105 const bool is_first_iteration = (bit == begin_bit);
107 if(is_first_iteration)
112 keys_input, keys_output, values_input, values_output,
113 begin_offset, end_offset,
114 bit, current_radix_bits,
121 keys_input, keys_tmp, values_input, values_tmp,
122 begin_offset, end_offset,
123 bit, current_radix_bits,
133 keys_tmp, keys_output, values_tmp, values_output,
134 begin_offset, end_offset,
135 bit, current_radix_bits,
142 keys_output, keys_tmp, values_output, values_tmp,
143 begin_offset, end_offset,
144 bit, current_radix_bits,
152 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
153 void sort(key_type * keys_input,
155 key_type * keys_output,
156 value_type * values_input,
157 value_type * values_tmp,
158 value_type * values_output,
160 unsigned int begin_offset,
161 unsigned int end_offset,
163 unsigned int begin_bit,
164 unsigned int end_bit,
169 const unsigned int current_radix_bits =
::rocprim::min(RadixBits, end_bit - bit);
171 const bool is_first_iteration = (bit == begin_bit);
173 key_type * current_keys_input;
174 key_type * current_keys_output;
175 value_type * current_values_input;
176 value_type * current_values_output;
177 if(is_first_iteration)
181 current_keys_input = keys_input;
182 current_keys_output = keys_output;
183 current_values_input = values_input;
184 current_values_output = values_output;
188 current_keys_input = keys_input;
189 current_keys_output = keys_tmp;
190 current_values_input = values_input;
191 current_values_output = values_tmp;
198 current_keys_input = keys_tmp;
199 current_keys_output = keys_output;
200 current_values_input = values_tmp;
201 current_values_output = values_output;
205 current_keys_input = keys_output;
206 current_keys_output = keys_tmp;
207 current_values_input = values_output;
208 current_values_output = values_tmp;
212 current_keys_input, current_keys_output, current_values_input, current_values_output,
213 begin_offset, end_offset,
214 bit, current_radix_bits,
222 class KeysInputIterator,
223 class KeysOutputIterator,
224 class ValuesInputIterator,
225 class ValuesOutputIterator
227 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
228 void sort(KeysInputIterator keys_input,
229 KeysOutputIterator keys_output,
230 ValuesInputIterator values_input,
231 ValuesOutputIterator values_output,
232 unsigned int begin_offset,
233 unsigned int end_offset,
235 unsigned int current_radix_bits,
238 unsigned int digit_count;
241 begin_offset, end_offset,
242 bit, current_radix_bits,
243 storage.count_helper,
247 unsigned int digit_start;
248 scan_type().exclusive_scan(digit_count, digit_start, 0);
249 digit_start += begin_offset;
254 keys_input, keys_output, values_input, values_output,
255 begin_offset, end_offset,
256 bit, current_radix_bits,
258 storage.sort_and_scatter_helper
268 unsigned int BlockSize,
269 unsigned int ItemsPerThread,
274 using key_type = Key;
275 using value_type = Value;
278 using bit_key_type =
typename key_codec::bit_key_type;
279 using keys_load_type = ::rocprim::block_load<
280 key_type, BlockSize, ItemsPerThread,
281 ::rocprim::block_load_method::block_load_transpose>;
282 using values_load_type = ::rocprim::block_load<
283 value_type, BlockSize, ItemsPerThread,
284 ::rocprim::block_load_method::block_load_transpose>;
285 using sort_type = ::rocprim::block_radix_sort<key_type, BlockSize, ItemsPerThread, value_type>;
286 using keys_store_type = ::rocprim::block_store<
287 key_type, BlockSize, ItemsPerThread,
288 ::rocprim::block_store_method::block_store_transpose>;
289 using values_store_type = ::rocprim::block_store<
290 value_type, BlockSize, ItemsPerThread,
291 ::rocprim::block_store_method::block_store_transpose>;
293 static constexpr
bool with_values = !std::is_same<value_type, ::rocprim::empty_type>::value;
299 typename keys_load_type::storage_type keys_load;
300 typename values_load_type::storage_type values_load;
301 typename sort_type::storage_type sort;
302 typename keys_store_type::storage_type keys_store;
303 typename values_store_type::storage_type values_store;
307 class KeysInputIterator,
308 class KeysOutputIterator,
309 class ValuesInputIterator,
310 class ValuesOutputIterator
312 ROCPRIM_DEVICE ROCPRIM_INLINE
313 void sort(KeysInputIterator keys_input,
315 KeysOutputIterator keys_output,
316 ValuesInputIterator values_input,
317 value_type * values_tmp,
318 ValuesOutputIterator values_output,
320 unsigned int begin_offset,
321 unsigned int end_offset,
322 unsigned int begin_bit,
323 unsigned int end_bit,
329 keys_input, keys_output, values_input, values_output,
330 begin_offset, end_offset,
338 keys_input, keys_tmp, values_input, values_tmp,
339 begin_offset, end_offset,
347 ROCPRIM_DEVICE ROCPRIM_INLINE
348 void sort(key_type * keys_input,
350 key_type * keys_output,
351 value_type * values_input,
352 value_type * values_tmp,
353 value_type * values_output,
355 unsigned int begin_offset,
356 unsigned int end_offset,
357 unsigned int begin_bit,
358 unsigned int end_bit,
362 keys_input, (to_output ? keys_output : keys_tmp), values_input, (to_output ? values_output : values_tmp),
363 begin_offset, end_offset,
370 class KeysInputIterator,
371 class KeysOutputIterator,
372 class ValuesInputIterator,
373 class ValuesOutputIterator
375 ROCPRIM_DEVICE ROCPRIM_INLINE
376 bool sort(KeysInputIterator keys_input,
377 KeysOutputIterator keys_output,
378 ValuesInputIterator values_input,
379 ValuesOutputIterator values_output,
380 unsigned int begin_offset,
381 unsigned int end_offset,
382 unsigned int begin_bit,
383 unsigned int end_bit,
386 constexpr
unsigned int items_per_block = BlockSize * ItemsPerThread;
389 key_type, value_type,
390 BlockSize, ItemsPerThread / 2, Descending
394 if(end_offset - begin_offset > items_per_block)
400 const bool processed_by_shorter =
401 shorter_single_block_helper().sort(
402 keys_input, keys_output, values_input, values_output,
403 begin_offset, end_offset,
405 reinterpret_cast<typename shorter_single_block_helper::storage_type&>(storage)
407 if(processed_by_shorter)
412 key_type keys[ItemsPerThread];
413 value_type values[ItemsPerThread];
414 const unsigned int valid_count = end_offset - begin_offset;
416 const key_type out_of_bounds = key_codec::decode(bit_key_type(-1));
417 keys_load_type().load(keys_input + begin_offset, keys, valid_count, out_of_bounds, storage.keys_load);
421 values_load_type().load(values_input + begin_offset, values, valid_count, storage.values_load);
425 sort_block<Descending>(sort_type(), keys, values, storage.sort, begin_bit, end_bit);
428 keys_store_type().store(keys_output + begin_offset, keys, valid_count, storage.keys_store);
432 values_store_type().store(values_output + begin_offset, values, valid_count, storage.values_store);
442 unsigned int BlockSize,
449 struct storage_type { };
452 class KeysInputIterator,
453 class KeysOutputIterator,
454 class ValuesInputIterator,
455 class ValuesOutputIterator
457 ROCPRIM_DEVICE ROCPRIM_INLINE
458 bool sort(KeysInputIterator,
461 ValuesOutputIterator,
474 template<
unsigned int LogicalWarpSize,
unsigned int ItemsPerThread,
unsigned int BlockSize>
477 static constexpr
unsigned int logical_warp_size = LogicalWarpSize;
478 static constexpr
unsigned int items_per_thread = ItemsPerThread;
479 static constexpr
unsigned int block_size = BlockSize;
484 static constexpr
unsigned int logical_warp_size = 1;
485 static constexpr
unsigned int items_per_thread = 1;
489 template<
class Config>
490 using select_warp_sort_helper_config_small_t
491 = std::conditional_t<std::is_same<DisabledWarpSortConfig, Config>::value,
494 Config::items_per_thread_small,
495 Config::block_size_small>>;
497 template<
class Config>
498 using select_warp_sort_helper_config_medium_t
499 = std::conditional_t<std::is_same<DisabledWarpSortConfig, Config>::value,
500 DisabledWarpSortHelperConfig,
502 Config::items_per_thread_medium,
503 Config::block_size_medium>>;
514 static constexpr
unsigned int items_per_warp = 0;
515 using storage_type = ::rocprim::empty_type;
517 template<
class... Args>
518 ROCPRIM_DEVICE ROCPRIM_INLINE
524 template<
class Config,
class Key,
class Value,
bool Descending>
530 std::enable_if_t<!std::is_same<DisabledWarpSortHelperConfig, Config>::value>>
532 static constexpr
unsigned int logical_warp_size = Config::logical_warp_size;
533 static constexpr
unsigned int items_per_thread = Config::items_per_thread;
535 using key_type = Key;
536 using value_type = Value;
537 using key_codec = ::rocprim::detail::radix_key_codec<key_type, Descending>;
538 using bit_key_type =
typename key_codec::bit_key_type;
540 using keys_load_type = ::rocprim::warp_load<key_type, items_per_thread, logical_warp_size, ::rocprim::warp_load_method::warp_load_striped>;
541 using values_load_type = ::rocprim::warp_load<value_type, items_per_thread, logical_warp_size, ::rocprim::warp_load_method::warp_load_striped>;
542 using keys_store_type = ::rocprim::warp_store<key_type, items_per_thread, logical_warp_size>;
543 using values_store_type = ::rocprim::warp_store<value_type, items_per_thread, logical_warp_size>;
544 template<
bool UseRadixMask>
545 using radix_comparator_type = ::rocprim::detail::radix_merge_compare<Descending, UseRadixMask, key_type>;
546 using stable_key_type = ::rocprim::tuple<key_type, unsigned int>;
547 using sort_type = ::rocprim::warp_sort<stable_key_type, logical_warp_size, value_type>;
549 static constexpr
bool with_values = !std::is_same<value_type, ::rocprim::empty_type>::value;
551 template<
class ComparatorT>
552 ROCPRIM_DEVICE ROCPRIM_INLINE
553 decltype(
auto) make_stable_comparator(ComparatorT comparator)
555 return [comparator](
const stable_key_type& a,
const stable_key_type& b) ->
bool 557 const bool ab = comparator(rocprim::get<0>(a), rocprim::get<0>(b));
558 const bool ba = comparator(rocprim::get<0>(b), rocprim::get<0>(a));
559 return ab || (!ba && (rocprim::get<1>(a) < rocprim::get<1>(b)));
564 static constexpr
unsigned int items_per_warp = items_per_thread * logical_warp_size;
568 typename keys_load_type::storage_type keys_load;
569 typename values_load_type::storage_type values_load;
570 typename keys_store_type::storage_type keys_store;
571 typename values_store_type::storage_type values_store;
572 typename sort_type::storage_type sort;
576 class KeysInputIterator,
577 class KeysOutputIterator,
578 class ValuesInputIterator,
579 class ValuesOutputIterator
581 ROCPRIM_DEVICE ROCPRIM_INLINE
582 void sort(KeysInputIterator keys_input,
583 KeysOutputIterator keys_output,
584 ValuesInputIterator values_input,
585 ValuesOutputIterator values_output,
586 unsigned int begin_offset,
587 unsigned int end_offset,
588 unsigned int begin_bit,
589 unsigned int end_bit,
590 storage_type& storage)
592 const unsigned int num_items = end_offset - begin_offset;
593 const key_type out_of_bounds = key_codec::decode(bit_key_type(-1));
595 key_type keys[items_per_thread];
596 stable_key_type stable_keys[items_per_thread];
597 value_type values[items_per_thread];
598 keys_load_type().load(keys_input + begin_offset, keys, num_items, out_of_bounds, storage.keys_load);
601 for(
unsigned int i = 0; i < items_per_thread; i++)
603 ::rocprim::get<0>(stable_keys[i]) = keys[i];
604 ::rocprim::get<1>(stable_keys[i]) =
605 ::rocprim::detail::logical_lane_id<logical_warp_size>() + logical_warp_size * i;
611 values_load_type().load(values_input + begin_offset, values, num_items, storage.values_load);
615 if(begin_bit == 0 && end_bit == 8 *
sizeof(key_type))
617 sort_type().sort(stable_keys,
620 make_stable_comparator(radix_comparator_type<false>{}));
624 radix_comparator_type<true> comparator(begin_bit, end_bit - begin_bit);
625 sort_type().sort(stable_keys, values, storage.sort, make_stable_comparator(comparator));
629 for(
unsigned int i = 0; i < items_per_thread; i++)
631 keys[i] = ::rocprim::get<0>(stable_keys[i]);
634 keys_store_type().store(keys_output + begin_offset, keys, num_items, storage.keys_store);
639 values_store_type().store(values_output + begin_offset, values, num_items, storage.values_store);
644 class KeysInputIterator,
645 class KeysOutputIterator,
646 class ValuesInputIterator,
647 class ValuesOutputIterator
649 ROCPRIM_DEVICE ROCPRIM_INLINE
650 void sort(KeysInputIterator keys_input,
652 KeysOutputIterator keys_output,
653 ValuesInputIterator values_input,
654 value_type * values_tmp,
655 ValuesOutputIterator values_output,
657 unsigned int begin_offset,
658 unsigned int end_offset,
659 unsigned int begin_bit,
660 unsigned int end_bit,
661 storage_type& storage)
666 keys_input, keys_output, values_input, values_output,
667 begin_offset, end_offset,
675 keys_input, keys_tmp, values_input, values_tmp,
676 begin_offset, end_offset,
687 class KeysInputIterator,
688 class KeysOutputIterator,
689 class ValuesInputIterator,
690 class ValuesOutputIterator,
693 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
694 void segmented_sort(KeysInputIterator keys_input,
695 typename std::iterator_traits<KeysInputIterator>::value_type * keys_tmp,
696 KeysOutputIterator keys_output,
697 ValuesInputIterator values_input,
698 typename std::iterator_traits<ValuesInputIterator>::value_type * values_tmp,
699 ValuesOutputIterator values_output,
701 OffsetIterator begin_offsets,
702 OffsetIterator end_offsets,
703 unsigned int long_iterations,
704 unsigned int short_iterations,
705 unsigned int begin_bit,
706 unsigned int end_bit)
708 constexpr
unsigned int long_radix_bits = Config::long_radix_bits;
709 constexpr
unsigned int short_radix_bits = Config::short_radix_bits;
710 constexpr
unsigned int block_size = Config::sort::block_size;
711 constexpr
unsigned int items_per_thread = Config::sort::items_per_thread;
712 constexpr
unsigned int items_per_block = block_size * items_per_thread;
713 constexpr
bool warp_sort_enabled = Config::warp_sort_config::enable_unpartitioned_warp_sort;
715 using key_type =
typename std::iterator_traits<KeysInputIterator>::value_type;
716 using value_type =
typename std::iterator_traits<ValuesInputIterator>::value_type;
719 key_type, value_type,
724 key_type, value_type,
726 long_radix_bits, Descending
729 key_type, value_type,
731 short_radix_bits, Descending
734 select_warp_sort_helper_config_small_t<typename Config::warp_sort_config>,
738 static constexpr
unsigned int items_per_warp = warp_sort_helper_type::items_per_warp;
740 ROCPRIM_SHARED_MEMORY
union 742 typename single_block_helper_type::storage_type single_block_helper;
743 typename long_radix_helper_type::storage_type long_radix_helper;
744 typename short_radix_helper_type::storage_type short_radix_helper;
745 typename warp_sort_helper_type::storage_type warp_sort_helper;
748 const unsigned int segment_id = ::rocprim::detail::block_id<0>();
750 const unsigned int begin_offset = begin_offsets[segment_id];
751 const unsigned int end_offset = end_offsets[segment_id];
754 if(end_offset <= begin_offset)
759 if(end_offset - begin_offset > items_per_block)
762 unsigned int bit = begin_bit;
763 for(
unsigned int i = 0; i < long_iterations; i++)
765 long_radix_helper_type().sort(
766 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
768 begin_offset, end_offset,
769 bit, begin_bit, end_bit,
770 storage.long_radix_helper
773 to_output = !to_output;
774 bit += long_radix_bits;
776 for(
unsigned int i = 0; i < short_iterations; i++)
778 short_radix_helper_type().sort(
779 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
781 begin_offset, end_offset,
782 bit, begin_bit, end_bit,
783 storage.short_radix_helper
786 to_output = !to_output;
787 bit += short_radix_bits;
790 else if(!warp_sort_enabled || end_offset - begin_offset > items_per_warp)
793 single_block_helper_type().sort(
794 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
795 ((long_iterations + short_iterations) % 2 == 0) != to_output,
796 begin_offset, end_offset,
798 storage.single_block_helper
804 warp_sort_helper_type().sort(
805 keys_input, keys_tmp, keys_output,
806 values_input, values_tmp, values_output,
807 ((long_iterations + short_iterations) % 2 == 0) != to_output,
808 begin_offset, end_offset,
809 begin_bit, end_bit, storage.warp_sort_helper
817 class KeysInputIterator,
818 class KeysOutputIterator,
819 class ValuesInputIterator,
820 class ValuesOutputIterator,
821 class SegmentIndexIterator,
824 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
825 void segmented_sort_large(KeysInputIterator keys_input,
826 typename std::iterator_traits<KeysInputIterator>::value_type * keys_tmp,
827 KeysOutputIterator keys_output,
828 ValuesInputIterator values_input,
829 typename std::iterator_traits<ValuesInputIterator>::value_type * values_tmp,
830 ValuesOutputIterator values_output,
832 SegmentIndexIterator segment_indices,
833 OffsetIterator begin_offsets,
834 OffsetIterator end_offsets,
835 unsigned int long_iterations,
836 unsigned int short_iterations,
837 unsigned int begin_bit,
838 unsigned int end_bit)
840 constexpr
unsigned int long_radix_bits = Config::long_radix_bits;
841 constexpr
unsigned int short_radix_bits = Config::short_radix_bits;
842 constexpr
unsigned int block_size = Config::sort::block_size;
843 constexpr
unsigned int items_per_thread = Config::sort::items_per_thread;
844 constexpr
unsigned int items_per_block = block_size * items_per_thread;
846 using key_type =
typename std::iterator_traits<KeysInputIterator>::value_type;
847 using value_type =
typename std::iterator_traits<ValuesInputIterator>::value_type;
850 key_type, value_type,
855 key_type, value_type,
857 long_radix_bits, Descending
860 key_type, value_type,
862 short_radix_bits, Descending
865 ROCPRIM_SHARED_MEMORY
union 867 typename single_block_helper_type::storage_type single_block_helper;
868 typename long_radix_helper_type::storage_type long_radix_helper;
869 typename short_radix_helper_type::storage_type short_radix_helper;
872 const unsigned int block_id = ::rocprim::detail::block_id<0>();
873 const unsigned int segment_id = segment_indices[
block_id];
874 const unsigned int begin_offset = begin_offsets[segment_id];
875 const unsigned int end_offset = end_offsets[segment_id];
877 if(end_offset <= begin_offset)
882 if(end_offset - begin_offset > items_per_block)
884 unsigned int bit = begin_bit;
885 for(
unsigned int i = 0; i < long_iterations; i++)
887 long_radix_helper_type().sort(
888 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
890 begin_offset, end_offset,
891 bit, begin_bit, end_bit,
892 storage.long_radix_helper
895 to_output = !to_output;
896 bit += long_radix_bits;
898 for(
unsigned int i = 0; i < short_iterations; i++)
900 short_radix_helper_type().sort(
901 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
903 begin_offset, end_offset,
904 bit, begin_bit, end_bit,
905 storage.short_radix_helper
908 to_output = !to_output;
909 bit += short_radix_bits;
914 single_block_helper_type().sort(
915 keys_input, keys_tmp, keys_output, values_input, values_tmp, values_output,
916 ((long_iterations + short_iterations) % 2 == 0) != to_output,
917 begin_offset, end_offset,
919 storage.single_block_helper
927 class KeysInputIterator,
928 class KeysOutputIterator,
929 class ValuesInputIterator,
930 class ValuesOutputIterator,
931 class SegmentIndexIterator,
934 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
935 void segmented_sort_small(KeysInputIterator keys_input,
936 typename std::iterator_traits<KeysInputIterator>::value_type * keys_tmp,
937 KeysOutputIterator keys_output,
938 ValuesInputIterator values_input,
939 typename std::iterator_traits<ValuesInputIterator>::value_type * values_tmp,
940 ValuesOutputIterator values_output,
942 unsigned int num_segments,
943 SegmentIndexIterator segment_indices,
944 OffsetIterator begin_offsets,
945 OffsetIterator end_offsets,
946 unsigned int begin_bit,
947 unsigned int end_bit)
949 static constexpr
unsigned int block_size = Config::block_size;
950 static constexpr
unsigned int logical_warp_size = Config::logical_warp_size;
951 static_assert(block_size % logical_warp_size == 0,
"logical_warp_size must be a divisor of block_size");
952 static constexpr
unsigned int warps_per_block = block_size / logical_warp_size;
954 using key_type =
typename std::iterator_traits<KeysInputIterator>::value_type;
955 using value_type =
typename std::iterator_traits<ValuesInputIterator>::value_type;
958 Config, key_type, value_type, Descending
961 ROCPRIM_SHARED_MEMORY
typename warp_sort_helper_type::storage_type storage;
963 const unsigned int block_id = ::rocprim::detail::block_id<0>();
964 const unsigned int logical_warp_id = ::rocprim::detail::logical_warp_id<logical_warp_size>();
965 const unsigned int segment_index = block_id * warps_per_block + logical_warp_id;
966 if(segment_index >= num_segments)
971 const unsigned int segment_id = segment_indices[segment_index];
972 const unsigned int begin_offset = begin_offsets[segment_id];
973 const unsigned int end_offset = end_offsets[segment_id];
974 if(end_offset <= begin_offset)
978 warp_sort_helper_type().sort(
979 keys_input, keys_tmp, keys_output,
980 values_input, values_tmp, values_output,
981 to_output, begin_offset, end_offset,
982 begin_bit, end_bit, storage
988 END_ROCPRIM_NAMESPACE
990 #endif // ROCPRIM_DEVICE_DETAIL_DEVICE_SEGMENTED_RADIX_SORT_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int block_id()
Returns block identifier in a multidimensional grid by dimension.
Definition: thread.hpp:258
Definition: device_segmented_radix_sort.hpp:512
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int flat_block_thread_id()
Returns flat (linear, 1D) thread identifier in a multidimensional block (tile).
Definition: thread.hpp:106
Definition: device_radix_sort.hpp:312
Definition: device_segmented_radix_sort.hpp:482
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target.
Definition: thread.hpp:70
ROCPRIM_HOST_DEVICE constexpr T min(const T &a, const T &b)
Returns the minimum of its arguments.
Definition: functional.hpp:63
ROCPRIM_DEVICE ROCPRIM_INLINE void wave_barrier()
Synchronize all threads in the wavefront.
Definition: thread.hpp:235
Definition: device_radix_sort.hpp:335
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile)
Definition: thread.hpp:216
Definition: device_segmented_radix_sort.hpp:59
Definition: device_segmented_radix_sort.hpp:74
Definition: radix_sort.hpp:241
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int block_size()
Returns block size in a multidimensional grid by dimension.
Definition: thread.hpp:268
Definition: device_radix_sort.hpp:106
Definition: device_segmented_radix_sort.hpp:272
Definition: device_segmented_radix_sort.hpp:475
Definition: device_radix_sort.hpp:97
Definition: device_segmented_radix_sort.hpp:297