21 #ifndef ROCPRIM_WARP_WARP_LOAD_HPP_ 22 #define ROCPRIM_WARP_WARP_LOAD_HPP_ 24 #include "../config.hpp" 25 #include "../intrinsics.hpp" 26 #include "../detail/various.hpp" 28 #include "warp_exchange.hpp" 29 #include "../block/block_load_func.hpp" 34 BEGIN_ROCPRIM_NAMESPACE
118 unsigned int ItemsPerThread,
124 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
125 "Logical warp size must be a power of two.");
127 "Logical warp size cannot be larger than physical warp size.");
130 using storage_type_ = typename ::rocprim::detail::empty_storage_type;
141 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 160 template<
class InputIterator>
161 ROCPRIM_DEVICE ROCPRIM_INLINE
163 T (&items)[ItemsPerThread],
166 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
167 static_assert(std::is_convertible<value_type, T>::value,
168 "The type T must be such that an object of type InputIterator " 169 "can be dereferenced and then implicitly converted to T.");
170 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
188 template<
class InputIterator>
189 ROCPRIM_DEVICE ROCPRIM_INLINE
191 T (&items)[ItemsPerThread],
195 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
196 static_assert(std::is_convertible<value_type, T>::value,
197 "The type T must be such that an object of type InputIterator " 198 "can be dereferenced and then implicitly converted to T.");
199 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
222 ROCPRIM_DEVICE ROCPRIM_INLINE
224 T (&items)[ItemsPerThread],
226 Default out_of_bounds,
229 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
230 static_assert(std::is_convertible<value_type, T>::value,
231 "The type T must be such that an object of type InputIterator " 232 "can be dereferenced and then implicitly converted to T.");
233 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
239 #ifndef DOXYGEN_SHOULD_SKIP_THIS 243 unsigned int ItemsPerThread,
244 unsigned int WarpSize
248 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
249 "Logical warp size must be a power of two.");
251 "Logical warp size cannot be larger than physical warp size.");
254 using storage_type = typename ::rocprim::detail::empty_storage_type;
256 template<
class InputIterator>
257 ROCPRIM_DEVICE ROCPRIM_INLINE
258 void load(InputIterator input,
259 T (&items)[ItemsPerThread],
262 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
263 static_assert(std::is_convertible<value_type, T>::value,
264 "The type T must be such that an object of type InputIterator " 265 "can be dereferenced and then implicitly converted to T.");
266 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
267 block_load_direct_warp_striped<WarpSize>(flat_id, input, items);
270 template<
class InputIterator>
271 ROCPRIM_DEVICE ROCPRIM_INLINE
272 void load(InputIterator input,
273 T (&items)[ItemsPerThread],
277 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
278 static_assert(std::is_convertible<value_type, T>::value,
279 "The type T must be such that an object of type InputIterator " 280 "can be dereferenced and then implicitly converted to T.");
281 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
282 block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid);
289 ROCPRIM_DEVICE ROCPRIM_INLINE
290 void load(InputIterator input,
291 T (&items)[ItemsPerThread],
293 Default out_of_bounds,
296 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
297 static_assert(std::is_convertible<value_type, T>::value,
298 "The type T must be such that an object of type InputIterator " 299 "can be dereferenced and then implicitly converted to T.");
300 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
301 block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid,
308 unsigned int ItemsPerThread,
309 unsigned int WarpSize
313 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
314 "Logical warp size must be a power of two.");
316 "Logical warp size cannot be larger than physical warp size.");
319 using storage_type = typename ::rocprim::detail::empty_storage_type;
321 ROCPRIM_DEVICE ROCPRIM_INLINE
323 T (&items)[ItemsPerThread],
326 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
330 template<
class InputIterator>
331 ROCPRIM_DEVICE ROCPRIM_INLINE
332 void load(InputIterator input,
333 T (&items)[ItemsPerThread],
336 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
337 static_assert(std::is_convertible<value_type, T>::value,
338 "The type T must be such that an object of type InputIterator " 339 "can be dereferenced and then implicitly converted to T.");
340 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
344 template<
class InputIterator>
345 ROCPRIM_DEVICE ROCPRIM_INLINE
346 void load(InputIterator input,
347 T (&items)[ItemsPerThread],
351 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
352 static_assert(std::is_convertible<value_type, T>::value,
353 "The type T must be such that an object of type InputIterator " 354 "can be dereferenced and then implicitly converted to T.");
355 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
363 ROCPRIM_DEVICE ROCPRIM_INLINE
364 void load(InputIterator input,
365 T (&items)[ItemsPerThread],
367 Default out_of_bounds,
370 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
371 static_assert(std::is_convertible<value_type, T>::value,
372 "The type T must be such that an object of type InputIterator " 373 "can be dereferenced and then implicitly converted to T.");
374 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
382 unsigned int ItemsPerThread,
383 unsigned int WarpSize
387 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
388 "Logical warp size must be a power of two.");
390 "Logical warp size cannot be larger than physical warp size.");
393 using exchange_type = ::rocprim::warp_exchange<T, ItemsPerThread, WarpSize>;
396 using storage_type =
typename exchange_type::storage_type;
398 template<
class InputIterator>
399 ROCPRIM_DEVICE ROCPRIM_INLINE
400 void load(InputIterator input,
401 T (&items)[ItemsPerThread],
402 storage_type& storage)
404 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
405 static_assert(std::is_convertible<value_type, T>::value,
406 "The type T must be such that an object of type InputIterator " 407 "can be dereferenced and then implicitly converted to T.");
408 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
409 block_load_direct_warp_striped<WarpSize>(flat_id, input, items);
410 exchange_type().striped_to_blocked(items, items, storage);
413 template<
class InputIterator>
414 ROCPRIM_DEVICE ROCPRIM_INLINE
415 void load(InputIterator input,
416 T (&items)[ItemsPerThread],
418 storage_type& storage)
420 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
421 static_assert(std::is_convertible<value_type, T>::value,
422 "The type T must be such that an object of type InputIterator " 423 "can be dereferenced and then implicitly converted to T.");
424 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
425 block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid);
426 exchange_type().striped_to_blocked(items, items, storage);
433 ROCPRIM_DEVICE ROCPRIM_INLINE
434 void load(InputIterator input,
435 T (&items)[ItemsPerThread],
437 Default out_of_bounds,
438 storage_type& storage)
440 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
441 static_assert(std::is_convertible<value_type, T>::value,
442 "The type T must be such that an object of type InputIterator " 443 "can be dereferenced and then implicitly converted to T.");
444 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
445 block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid,
447 exchange_type().striped_to_blocked(items, items, storage);
451 #endif // DOXYGEN_SHOULD_SKIP_THIS 453 END_ROCPRIM_NAMESPACE
458 #endif // ROCPRIM_WARP_WARP_LOAD_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds, storage_type &)
Loads data from continuous memory into an arrangement of items across the warp.
Definition: warp_load.hpp:223
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target.
Definition: thread.hpp:70
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], unsigned int valid, storage_type &)
Loads data from continuous memory into an arrangement of items across the warp.
Definition: warp_load.hpp:190
Data from continuous memory is loaded into a blocked arrangement of items.
A striped arrangement of data is read directly from memory.
Defaults to block_load_direct.
ROCPRIM_DEVICE ROCPRIM_INLINE auto block_load_direct_blocked_vectorized(unsigned int flat_id, T *block_input, U(&items)[ItemsPerThread]) -> typename std::enable_if< detail::is_vectorizable< T, ItemsPerThread >::value >::type
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:186
typename ::rocprim::detail::empty_storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: warp_load.hpp:142
warp_load_method
warp_load_method enumerates the methods available to load data from continuous memory into a blocked/...
Definition: warp_load.hpp:38
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], storage_type &)
Loads data from continuous memory into an arrangement of items across the warp.
Definition: warp_load.hpp:162
Data from continuous memory is loaded into a blocked arrangement of items using vectorization as an o...
A striped arrangement of data from continuous memory is locally transposed into a blocked arrangement...
BEGIN_ROCPRIM_NAMESPACE ROCPRIM_DEVICE ROCPRIM_INLINE void block_load_direct_blocked(unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread])
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:58
The warp_load class is a warp level parallel primitive which provides methods for loading data from c...
Definition: warp_load.hpp:122