rocPRIM
various.hpp
1 // Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_DETAIL_VARIOUS_HPP_
22 #define ROCPRIM_DETAIL_VARIOUS_HPP_
23 
24 #include <type_traits>
25 
26 #include "../config.hpp"
27 #include "../types.hpp"
28 #include "../type_traits.hpp"
29 
30 #include <hip/hip_runtime.h>
31 
32 // Check for c++ standard library features, in a backwards compatible manner
33 #ifndef __has_include
34  #define __has_include(x) 0
35 #endif
36 
37 #if __has_include(<version>) // version is only mandated in c++20
38  #include <version>
39  #if __cpp_lib_as_const >= 201510L
40  #include <utility>
41  #endif
42 #else
43  #include <utility>
44 #endif
45 
46 // TODO: Refactor when it gets crowded
47 
48 BEGIN_ROCPRIM_NAMESPACE
49 namespace detail
50 {
51 
53 {
54 
55 };
56 
57 template<class T>
58 ROCPRIM_HOST_DEVICE inline
59 constexpr bool is_power_of_two(const T x)
60 {
61  static_assert(::rocprim::is_integral<T>::value, "T must be integer type");
62  return (x > 0) && ((x & (x - 1)) == 0);
63 }
64 
65 template<class T>
66 ROCPRIM_HOST_DEVICE inline
67 constexpr T next_power_of_two(const T x, const T acc = 1)
68 {
69  static_assert(::rocprim::is_unsigned<T>::value, "T must be unsigned type");
70  return acc >= x ? acc : next_power_of_two(x, 2 * acc);
71 }
72 
73 template <
74  typename T,
75  typename U,
76  std::enable_if_t<::rocprim::is_integral<T>::value && ::rocprim::is_unsigned<U>::value, int> = 0>
77 ROCPRIM_HOST_DEVICE inline constexpr auto ceiling_div(const T a, const U b)
78 {
79  return a / b + (a % b > 0 ? 1 : 0);
80 }
81 
82 ROCPRIM_HOST_DEVICE inline
83 size_t align_size(size_t size, size_t alignment = 256)
84 {
85  return ceiling_div(size, alignment) * alignment;
86 }
87 
88 // TOOD: Put the block algorithms with warp size variables at device side with macro.
89 // Temporary workaround
90 template<class T>
91 ROCPRIM_HOST_DEVICE inline
92 constexpr T warp_size_in_class(const T warp_size)
93 {
94  return warp_size;
95 }
96 
97 // Select the minimal warp size for block of size block_size, it's
98 // useful for blocks smaller than maximal warp size.
99 template<class T>
100 ROCPRIM_HOST_DEVICE inline
101 constexpr T get_min_warp_size(const T block_size, const T max_warp_size)
102 {
103  static_assert(::rocprim::is_unsigned<T>::value, "T must be unsigned type");
104  return block_size >= max_warp_size ? max_warp_size : next_power_of_two(block_size);
105 }
106 
107 template<unsigned int WarpSize>
109  static const bool value = detail::is_power_of_two(WarpSize);
110 };
111 
112 // Selects an appropriate vector_type based on the input T and size N.
113 // The byte size is calculated and used to select an appropriate vector_type.
114 template<class T, unsigned int N>
116 {
117  static constexpr unsigned int size = sizeof(T) * N;
118  using vector_base_type =
119  typename std::conditional<
120  sizeof(T) >= 4,
121  int,
122  typename std::conditional<
123  sizeof(T) >= 2,
124  short,
125  char
126  >::type
127  >::type;
128 
129  using vector_4 = typename make_vector_type<vector_base_type, 4>::type;
130  using vector_2 = typename make_vector_type<vector_base_type, 2>::type;
131  using vector_1 = typename make_vector_type<vector_base_type, 1>::type;
132 
133  using type =
134  typename std::conditional<
135  size % sizeof(vector_4) == 0,
136  vector_4,
137  typename std::conditional<
138  size % sizeof(vector_2) == 0,
139  vector_2,
140  vector_1
141  >::type
142  >::type;
143 };
144 
145 // Checks if Items is odd and ensures that size of T is smaller than vector_type.
146 template<class T, unsigned int Items>
147 struct is_vectorizable : std::integral_constant<bool, (Items % 2 == 0) &&(sizeof(T) < sizeof(typename match_vector_type<T, Items>::type))> {};
148 
149 // Returns the number of LDS (local data share) banks.
150 ROCPRIM_HOST_DEVICE
151 constexpr unsigned int get_lds_banks_no()
152 {
153  // Currently all devices supported by ROCm have 32 banks (4 bytes each)
154  return 32;
155 }
156 
157 // Finds biggest fundamental type for type T that sizeof(T) is
158 // a multiple of that type's size.
159 template<class T>
160 struct match_fundamental_type
161 {
162  using type =
163  typename std::conditional<
164  sizeof(T)%8 == 0,
165  unsigned long long,
166  typename std::conditional<
167  sizeof(T)%4 == 0,
168  unsigned int,
169  typename std::conditional<
170  sizeof(T)%2 == 0,
171  unsigned short,
172  unsigned char
173  >::type
174  >::type
175  >::type;
176 };
177 
178 // A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions
179 template <typename T>
180 struct raw_storage
181 {
182  // Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T
183  typedef typename detail::match_fundamental_type<T>::type device_word;
184 
185  // Backing storage
186  device_word storage[sizeof(T) / sizeof(device_word)];
187 
188  // Alias
189  ROCPRIM_HOST_DEVICE T& get()
190  {
191  return reinterpret_cast<T&>(*this);
192  }
193 
194  ROCPRIM_HOST_DEVICE const T& get() const
195  {
196  return reinterpret_cast<const T&>(*this);
197  }
198 };
199 
200 // Checks if two iterators can possibly alias
201 template<class Iterator1, class Iterator2>
202 inline bool can_iterators_alias(Iterator1, Iterator2, const size_t size)
203 {
204  (void)size;
205  return true;
206 }
207 
208 template<typename Value1, typename Value2>
209 inline bool can_iterators_alias(Value1* iter1, Value2* iter2, const size_t size)
210 {
211  const uintptr_t start1 = reinterpret_cast<uintptr_t>(iter1);
212  const uintptr_t start2 = reinterpret_cast<uintptr_t>(iter2);
213  const uintptr_t end1 = reinterpret_cast<uintptr_t>(iter1 + size);
214  const uintptr_t end2 = reinterpret_cast<uintptr_t>(iter2 + size);
215  return start1 < end2 && start2 < end1;
216 }
217 
218 template<class...>
219 using void_t = void;
220 
221 template<typename T>
222 struct type_identity {
223  using type = T;
224 };
225 
226 template<class T, class = void>
227 struct extract_type_impl : type_identity<T> { };
228 
229 template<class T>
230 struct extract_type_impl<T, void_t<typename T::type> > : extract_type_impl<typename T::type> { };
231 
232 template <typename T>
233 using extract_type = typename extract_type_impl<T>::type;
234 
235 template<bool Value, class T>
236 struct select_type_case
237 {
238  static constexpr bool value = Value;
239  using type = T;
240 };
241 
242 template<class Case, class... OtherCases>
243 struct select_type_impl
244  : std::conditional<
245  Case::value,
246  type_identity<extract_type<typename Case::type>>,
247  select_type_impl<OtherCases...>
248  >::type { };
249 
250 template<class T>
251 struct select_type_impl<select_type_case<true, T>> : type_identity<extract_type<T>> { };
252 
253 template<class T>
254 struct select_type_impl<select_type_case<false, T>>
255 {
256  static_assert(
257  sizeof(T) == 0,
258  "Cannot select any case. "
259  "The last case must have true condition or be a fallback type."
260  );
261 };
262 
263 template<class Fallback>
264 struct select_type_impl<Fallback> : type_identity<extract_type<Fallback>> { };
265 
266 template <typename... Cases>
267 using select_type = typename select_type_impl<Cases...>::type;
268 
269 template <bool Value>
270 using bool_constant = std::integral_constant<bool, Value>;
271 
286 inline hipError_t memcpy_and_sync(
287  void* dst, const void* src, size_t size_bytes, hipMemcpyKind kind, hipStream_t stream)
288 {
289  // hipMemcpyWithStream is only supported on rocm 3.1 and above
290 #if(HIP_VERSION_MAJOR == 3 && HIP_VERSION_MINOR >= 1) || HIP_VERSION_MAJOR > 3
291  return hipMemcpyWithStream(dst, src, size_bytes, kind, stream);
292 #else
293  const hipError_t result = hipMemcpyAsync(dst src, size_bytes, kind, stream);
294  if(hipSuccess != result)
295  {
296  return result;
297  }
298  return hipStreamSynchronize(stream);
299 #endif
300 }
301 
302 #if __cpp_lib_as_const >= 201510L
303 using ::std::as_const;
304 #else
305 template<typename T>
306 constexpr std::add_const_t<T>& as_const(T& t) noexcept
307 {
308  return t;
309 }
310 template<typename T>
311 void as_const(const T&& t) = delete;
312 #endif
313 
320 template<typename T>
321 constexpr std::add_const_t<T>* as_const_ptr(T* ptr)
322 {
323  return ptr;
324 }
325 
326 template<class... Types, class Function, size_t... Indices>
327 ROCPRIM_HOST_DEVICE inline void for_each_in_tuple_impl(::rocprim::tuple<Types...>& t,
328  Function f,
329  ::rocprim::index_sequence<Indices...>)
330 {
331  auto swallow = {(f(::rocprim::get<Indices>(t)), 0)...};
332  (void)swallow;
333 }
334 
335 template<class... Types, class Function>
336 ROCPRIM_HOST_DEVICE inline void for_each_in_tuple(::rocprim::tuple<Types...>& t, Function f)
337 {
338  for_each_in_tuple_impl(t, f, ::rocprim::index_sequence_for<Types...>());
339 }
340 
341 } // end namespace detail
342 END_ROCPRIM_NAMESPACE
343 
344 #endif // ROCPRIM_DETAIL_VARIOUS_HPP_
Definition: various.hpp:147
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
ROCPRIM_HOST_DEVICE constexpr unsigned int warp_size()
[DEPRECATED] Returns a number of threads in a hardware warp.
Definition: thread.hpp:42
Definition: various.hpp:52
Definition: various.hpp:115
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int block_size()
Returns block size in a multidimensional grid by dimension.
Definition: thread.hpp:268
constexpr std::add_const_t< T > * as_const_ptr(T *ptr)
Add const to the top level pointed to object type.
Definition: various.hpp:321
Definition: various.hpp:108