rocPRIM
device_select_config.hpp
1 // Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_DEVICE_DEVICE_SELECT_CONFIG_HPP_
22 #define ROCPRIM_DEVICE_DEVICE_SELECT_CONFIG_HPP_
23 
24 #include <type_traits>
25 
26 #include "../config.hpp"
27 #include "../detail/various.hpp"
28 #include "../functional.hpp"
29 
30 #include "../block/block_load.hpp"
31 #include "../block/block_scan.hpp"
32 
33 #include "config_types.hpp"
34 
37 
38 BEGIN_ROCPRIM_NAMESPACE
39 
49 template<
50  unsigned int BlockSize,
51  unsigned int ItemsPerThread,
52  ::rocprim::block_load_method KeyBlockLoadMethod,
53  ::rocprim::block_load_method ValueBlockLoadMethod,
54  ::rocprim::block_load_method FlagBlockLoadMethod,
55  ::rocprim::block_scan_algorithm BlockScanMethod,
56  unsigned int SizeLimit = ROCPRIM_GRID_SIZE_LIMIT
57 >
59 {
61  static constexpr unsigned int block_size = BlockSize;
63  static constexpr unsigned int items_per_thread = ItemsPerThread;
65  static constexpr block_load_method key_block_load_method = KeyBlockLoadMethod;
67  static constexpr block_load_method value_block_load_method = ValueBlockLoadMethod;
69  static constexpr block_load_method flag_block_load_method = FlagBlockLoadMethod;
71  static constexpr block_scan_algorithm block_scan_method = BlockScanMethod;
73  static constexpr unsigned int size_limit = SizeLimit;
74 };
75 
76 namespace detail
77 {
78 
79 template<class Key>
81 {
82  static constexpr unsigned int item_scale =
83  ::rocprim::detail::ceiling_div<unsigned int>(sizeof(Key), sizeof(int));
84 
85  using type = select_config<
87  ::rocprim::max(1u, 13u / item_scale),
88  ::rocprim::block_load_method::block_load_transpose,
89  ::rocprim::block_load_method::block_load_transpose,
90  ::rocprim::block_load_method::block_load_transpose,
91  ::rocprim::block_scan_algorithm::using_warp_scan
92  >;
93 };
94 
95 template<class Key>
97 {
98  static constexpr unsigned int item_scale =
99  ::rocprim::detail::ceiling_div<unsigned int>(sizeof(Key), sizeof(int));
100 
101  using type = select_config<
103  ::rocprim::max(1u, 15u / item_scale),
104  ::rocprim::block_load_method::block_load_transpose,
105  ::rocprim::block_load_method::block_load_transpose,
106  ::rocprim::block_load_method::block_load_transpose,
107  ::rocprim::block_scan_algorithm::using_warp_scan
108  >;
109 };
110 
111 template<class Value>
113 {
114  static constexpr unsigned int item_scale =
115  ::rocprim::detail::ceiling_div<unsigned int>(sizeof(Value), sizeof(int));
116 
117  using type = select_config<
119  ::rocprim::max(1u, 15u / item_scale),
120  ::rocprim::block_load_method::block_load_transpose,
121  ::rocprim::block_load_method::block_load_transpose,
122  ::rocprim::block_load_method::block_load_transpose,
123  ::rocprim::block_scan_algorithm::using_warp_scan
124  >;
125 };
126 
127 template<class Value>
129 {
130  static constexpr unsigned int item_scale =
131  ::rocprim::detail::ceiling_div<unsigned int>(sizeof(Value), sizeof(int));
132 
133  using type = select_config<
135  ::rocprim::max(1u, 15u / item_scale),
136  ::rocprim::block_load_method::block_load_transpose,
137  ::rocprim::block_load_method::block_load_transpose,
138  ::rocprim::block_load_method::block_load_transpose,
139  ::rocprim::block_scan_algorithm::using_warp_scan
140  >;
141 };
142 
143 
144 template<unsigned int TargetArch, class Key, class /*Value*/>
146  : select_arch<
147  TargetArch,
148  select_arch_case<803, select_config_803<Key>>,
149  select_arch_case<900, select_config_900<Key>>,
150  select_arch_case<ROCPRIM_ARCH_90a, select_config_90a<Key>>,
151  select_arch_case<1030, select_config_1030<Key>>,
152  select_config_803<Key>
153  > { };
154 
155 } // end namespace detail
156 
157 END_ROCPRIM_NAMESPACE
158 
160 // end of group primitivesmodule_deviceconfigs
161 
162 #endif // ROCPRIM_DEVICE_DEVICE_SELECT_CONFIG_HPP_
ROCPRIM_HOST_DEVICE constexpr T max(const T &a, const T &b)
Returns the maximum of its arguments.
Definition: functional.hpp:55
Definition: device_select_config.hpp:145
Definition: config_types.hpp:109
static constexpr block_load_method value_block_load_method
Method for loading input values.
Definition: device_select_config.hpp:67
Configuration of device-level select operation.
Definition: device_select_config.hpp:58
static constexpr unsigned int size_limit
Limit on the number of items for a single select kernel launch.
Definition: device_select_config.hpp:73
static constexpr unsigned int block_size
Number of threads in a block.
Definition: device_select_config.hpp:61
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
static constexpr block_load_method key_block_load_method
Method for loading input keys.
Definition: device_select_config.hpp:65
block_load_method
block_load_method enumerates the methods available to load data from continuous memory into a blocked...
Definition: block_load.hpp:41
Definition: device_select_config.hpp:112
Definition: config_types.hpp:140
static constexpr unsigned int items_per_thread
Number of items processed by each thread.
Definition: device_select_config.hpp:63
Definition: device_select_config.hpp:80
static constexpr block_scan_algorithm block_scan_method
Algorithm for block scan.
Definition: device_select_config.hpp:71
Definition: device_select_config.hpp:96
static constexpr block_load_method flag_block_load_method
Method for loading flag values.
Definition: device_select_config.hpp:69
Definition: device_select_config.hpp:128
block_scan_algorithm
Available algorithms for block_scan primitive.
Definition: block_scan.hpp:41