rocPRIM
block_sort.hpp
1 // Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_BLOCK_BLOCK_SORT_HPP_
22 #define ROCPRIM_BLOCK_BLOCK_SORT_HPP_
23 
24 #include <type_traits>
25 
26 #include "../config.hpp"
27 #include "../detail/various.hpp"
28 
29 #include "../intrinsics.hpp"
30 #include "../functional.hpp"
31 
32 #include "detail/block_sort_bitonic.hpp"
33 #include "detail/block_sort_merge.hpp"
34 
37 
38 BEGIN_ROCPRIM_NAMESPACE
39 
42 {
46  merge_sort,
51 };
52 
53 namespace detail
54 {
55 
56 // Selector for block_sort algorithm which gives block sort implementation
57 // type based on passed block_sort_algorithm enum
58 template<block_sort_algorithm Algorithm>
60 
61 template<>
63 {
64  template <class Key,
65  unsigned int BlockSizeX,
66  unsigned int BlockSizeY,
67  unsigned int BlockSizeZ,
68  unsigned int ItemsPerThread,
69  class Value>
71 };
72 
73 // Note: merge_sort and stable_merge_sort map to the same underlying algorithm.
74 template<>
76 {
77  template<class Key,
78  unsigned int BlockSizeX,
79  unsigned int BlockSizeY,
80  unsigned int BlockSizeZ,
81  unsigned int ItemsPerThread,
82  class Value>
84 };
85 
86 template<>
88 {
89  template<class Key,
90  unsigned int BlockSizeX,
91  unsigned int BlockSizeY,
92  unsigned int BlockSizeZ,
93  unsigned int ItemsPerThread,
94  class Value>
96 };
97 
98 } // end namespace detail
99 
142 template<
143  class Key,
144  unsigned int BlockSizeX,
145  unsigned int ItemsPerThread = 1,
146  class Value = empty_type,
148  unsigned int BlockSizeY = 1,
149  unsigned int BlockSizeZ = 1
150 >
152 #ifndef DOXYGEN_SHOULD_SKIP_THIS
153  : private detail::select_block_sort_impl<Algorithm>::template type<Key, BlockSizeX, BlockSizeY, BlockSizeZ, ItemsPerThread, Value>
154 #endif
155 {
156  using base_type = typename detail::select_block_sort_impl<Algorithm>::template type<Key, BlockSizeX, BlockSizeY, BlockSizeZ, ItemsPerThread, Value>;
157 public:
166  using storage_type = typename base_type::storage_type;
167 
179  template<class BinaryFunction = ::rocprim::less<Key>>
180  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
181  void sort(Key& thread_key,
182  BinaryFunction compare_function = BinaryFunction())
183  {
184  base_type::sort(thread_key, compare_function);
185  }
186 
189  template <class BinaryFunction = ::rocprim::less<Key>>
190  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
191  void sort(Key (&thread_keys)[ItemsPerThread],
192  BinaryFunction compare_function = BinaryFunction())
193  {
194  base_type::sort(thread_keys, compare_function);
195  }
196 
238  template<class BinaryFunction = ::rocprim::less<Key>>
239  ROCPRIM_DEVICE ROCPRIM_INLINE
240  void sort(Key& thread_key,
241  storage_type& storage,
242  BinaryFunction compare_function = BinaryFunction())
243  {
244  base_type::sort(thread_key, storage, compare_function);
245  }
246 
249  template<class BinaryFunction = ::rocprim::less<Key>>
250  ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key (&thread_keys)[ItemsPerThread],
251  storage_type& storage,
252  BinaryFunction compare_function = BinaryFunction())
253  {
254  base_type::sort(thread_keys, storage, compare_function);
255  }
256 
269  template<class BinaryFunction = ::rocprim::less<Key>>
270  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
271  void sort(Key& thread_key,
272  Value& thread_value,
273  BinaryFunction compare_function = BinaryFunction())
274  {
275  base_type::sort(thread_key, thread_value, compare_function);
276  }
277 
280  template<class BinaryFunction = ::rocprim::less<Key>>
281  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
282  void sort(Key (&thread_keys)[ItemsPerThread],
283  Value (&thread_values)[ItemsPerThread],
284  BinaryFunction compare_function = BinaryFunction())
285  {
286  base_type::sort(thread_keys, thread_values, compare_function);
287  }
288 
331  template<class BinaryFunction = ::rocprim::less<Key>>
332  ROCPRIM_DEVICE ROCPRIM_INLINE
333  void sort(Key& thread_key,
334  Value& thread_value,
335  storage_type& storage,
336  BinaryFunction compare_function = BinaryFunction())
337  {
338  base_type::sort(thread_key, thread_value, storage, compare_function);
339  }
340 
343  template<class BinaryFunction = ::rocprim::less<Key>>
344  ROCPRIM_DEVICE ROCPRIM_INLINE
345  void sort(Key (&thread_keys)[ItemsPerThread],
346  Value (&thread_values)[ItemsPerThread],
347  storage_type& storage,
348  BinaryFunction compare_function = BinaryFunction())
349  {
350  base_type::sort(thread_keys, thread_values, storage, compare_function);
351  }
352 
367  template<class BinaryFunction = ::rocprim::less<Key>>
368  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key& thread_key,
369  storage_type& storage,
370  const unsigned int size,
371  BinaryFunction compare_function
372  = BinaryFunction())
373  {
374  base_type::sort(thread_key, storage, size, compare_function);
375  }
376 
391  template<class BinaryFunction = ::rocprim::less<Key>>
392  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key (&thread_keys)[ItemsPerThread],
393  storage_type& storage,
394  const unsigned int size,
395  BinaryFunction compare_function
396  = BinaryFunction())
397  {
398  base_type::sort(thread_keys, storage, size, compare_function);
399  }
400 
416  template<class BinaryFunction = ::rocprim::less<Key>>
417  ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key& thread_key,
418  Value& thread_value,
419  storage_type& storage,
420  const unsigned int size,
421  BinaryFunction compare_function = BinaryFunction())
422  {
423  base_type::sort(thread_key, thread_value, storage, size, compare_function);
424  }
425 
441  template<class BinaryFunction = ::rocprim::less<Key>>
442  ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key (&thread_keys)[ItemsPerThread],
443  Value (&thread_values)[ItemsPerThread],
444  storage_type& storage,
445  const unsigned int size,
446  BinaryFunction compare_function = BinaryFunction())
447  {
448  base_type::sort(thread_keys, thread_values, storage, size, compare_function);
449  }
450 };
451 
452 END_ROCPRIM_NAMESPACE
453 
455 // end of group blockmodule
456 
457 #endif // ROCPRIM_BLOCK_BLOCK_SORT_HPP_
Empty type used as a placeholder, usually used to flag that given template parameter should not be us...
Definition: types.hpp:135
A bitonic sort based algorithm.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key &thread_key, Value &thread_value, BinaryFunction compare_function=BinaryFunction())
Block sort by key for any data type.
Definition: block_sort.hpp:271
A merged sort based algorithm which sorts stably.
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key &thread_key, Value &thread_value, storage_type &storage, const unsigned int size, BinaryFunction compare_function=BinaryFunction())
Block sort by key for any data type.
Definition: block_sort.hpp:417
The block_sort class is a block level parallel primitive which provides methods sorting items (keys o...
Definition: block_sort.hpp:151
Definition: block_sort_merge.hpp:41
Definition: block_sort_bitonic.hpp:47
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key &thread_key, BinaryFunction compare_function=BinaryFunction())
Block sort for any data type.
Definition: block_sort.hpp:181
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], storage_type &storage, BinaryFunction compare_function=BinaryFunction())
This overload allows an array of ItemsPerThread keys and values to be passed in so that each thread c...
Definition: block_sort.hpp:345
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key(&thread_keys)[ItemsPerThread], storage_type &storage, BinaryFunction compare_function=BinaryFunction())
This overload allows arrays of ItemsPerThread keys to be passed in so that each thread can process mu...
Definition: block_sort.hpp:250
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Default block_sort algorithm.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction())
This overload allows an array of ItemsPerThread keys and values to be passed in so that each thread c...
Definition: block_sort.hpp:282
typename base_type::storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_sort.hpp:166
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key &thread_key, Value &thread_value, storage_type &storage, BinaryFunction compare_function=BinaryFunction())
Block sort by key for any data type.
Definition: block_sort.hpp:333
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key(&thread_keys)[ItemsPerThread], storage_type &storage, const unsigned int size, BinaryFunction compare_function=BinaryFunction())
Block sort for any data type.
Definition: block_sort.hpp:392
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key &thread_key, storage_type &storage, const unsigned int size, BinaryFunction compare_function=BinaryFunction())
Block sort for any data type.
Definition: block_sort.hpp:368
Default block_histogram algorithm.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void sort(Key(&thread_keys)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction())
This overload allows an array of ItemsPerThread keys to be passed in so that each thread can process ...
Definition: block_sort.hpp:191
block_sort_algorithm
Available algorithms for block_sort primitive.
Definition: block_sort.hpp:41
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key &thread_key, storage_type &storage, BinaryFunction compare_function=BinaryFunction())
Block sort for any data type.
Definition: block_sort.hpp:240
Definition: block_sort.hpp:59
A merge sort based algorithm.
ROCPRIM_DEVICE ROCPRIM_INLINE void sort(Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], storage_type &storage, const unsigned int size, BinaryFunction compare_function=BinaryFunction())
Block sort by key for any data type.
Definition: block_sort.hpp:442