21 #ifndef ROCPRIM_WARP_WARP_LOAD_HPP_    22 #define ROCPRIM_WARP_WARP_LOAD_HPP_    24 #include "../config.hpp"    25 #include "../intrinsics.hpp"    26 #include "../detail/various.hpp"    28 #include "warp_exchange.hpp"    29 #include "../block/block_load_func.hpp"    34 BEGIN_ROCPRIM_NAMESPACE
   118     unsigned int ItemsPerThread,
   124     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   125                   "Logical warp size must be a power of two.");
   127                   "Logical warp size cannot be larger than physical warp size.");
   130     using storage_type_ = typename ::rocprim::detail::empty_storage_type;
   141     #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen   160     template<
class InputIterator>
   161     ROCPRIM_DEVICE ROCPRIM_INLINE
   163               T (&items)[ItemsPerThread],
   166         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   167         static_assert(std::is_convertible<value_type, T>::value,
   168                       "The type T must be such that an object of type InputIterator "   169                       "can be dereferenced and then implicitly converted to T.");
   170         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   188     template<
class InputIterator>
   189     ROCPRIM_DEVICE ROCPRIM_INLINE
   191               T (&items)[ItemsPerThread],
   195         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   196         static_assert(std::is_convertible<value_type, T>::value,
   197                       "The type T must be such that an object of type InputIterator "   198                       "can be dereferenced and then implicitly converted to T.");
   199         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   222     ROCPRIM_DEVICE ROCPRIM_INLINE
   224               T (&items)[ItemsPerThread],
   226               Default out_of_bounds,
   229         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   230         static_assert(std::is_convertible<value_type, T>::value,
   231                       "The type T must be such that an object of type InputIterator "   232                       "can be dereferenced and then implicitly converted to T.");
   233         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   239 #ifndef DOXYGEN_SHOULD_SKIP_THIS   243     unsigned int ItemsPerThread,
   244     unsigned int WarpSize
   248     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   249                   "Logical warp size must be a power of two.");
   251                   "Logical warp size cannot be larger than physical warp size.");
   254     using storage_type = typename ::rocprim::detail::empty_storage_type;
   256     template<
class InputIterator>
   257     ROCPRIM_DEVICE ROCPRIM_INLINE
   258     void load(InputIterator input,
   259               T (&items)[ItemsPerThread],
   262         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   263         static_assert(std::is_convertible<value_type, T>::value,
   264                       "The type T must be such that an object of type InputIterator "   265                       "can be dereferenced and then implicitly converted to T.");
   266         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   267         block_load_direct_warp_striped<WarpSize>(flat_id, input, items);
   270     template<
class InputIterator>
   271     ROCPRIM_DEVICE ROCPRIM_INLINE
   272     void load(InputIterator input,
   273               T (&items)[ItemsPerThread],
   277         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   278         static_assert(std::is_convertible<value_type, T>::value,
   279                       "The type T must be such that an object of type InputIterator "   280                       "can be dereferenced and then implicitly converted to T.");
   281         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   282         block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid);
   289     ROCPRIM_DEVICE ROCPRIM_INLINE
   290     void load(InputIterator input,
   291               T (&items)[ItemsPerThread],
   293               Default out_of_bounds,
   296         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   297         static_assert(std::is_convertible<value_type, T>::value,
   298                       "The type T must be such that an object of type InputIterator "   299                       "can be dereferenced and then implicitly converted to T.");
   300         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   301         block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid,
   308     unsigned int ItemsPerThread,
   309     unsigned int WarpSize
   313     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   314                   "Logical warp size must be a power of two.");
   316                   "Logical warp size cannot be larger than physical warp size.");
   319     using storage_type = typename ::rocprim::detail::empty_storage_type;
   321     ROCPRIM_DEVICE ROCPRIM_INLINE
   323               T (&items)[ItemsPerThread],
   326         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   330     template<
class InputIterator>
   331     ROCPRIM_DEVICE ROCPRIM_INLINE
   332     void load(InputIterator input,
   333               T (&items)[ItemsPerThread],
   336         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   337         static_assert(std::is_convertible<value_type, T>::value,
   338                       "The type T must be such that an object of type InputIterator "   339                       "can be dereferenced and then implicitly converted to T.");
   340         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   344     template<
class InputIterator>
   345     ROCPRIM_DEVICE ROCPRIM_INLINE
   346     void load(InputIterator input,
   347               T (&items)[ItemsPerThread],
   351         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   352         static_assert(std::is_convertible<value_type, T>::value,
   353                       "The type T must be such that an object of type InputIterator "   354                       "can be dereferenced and then implicitly converted to T.");
   355         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   363     ROCPRIM_DEVICE ROCPRIM_INLINE
   364     void load(InputIterator input,
   365               T (&items)[ItemsPerThread],
   367               Default out_of_bounds,
   370         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   371         static_assert(std::is_convertible<value_type, T>::value,
   372                       "The type T must be such that an object of type InputIterator "   373                       "can be dereferenced and then implicitly converted to T.");
   374         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   382     unsigned int ItemsPerThread,
   383     unsigned int WarpSize
   387     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   388                   "Logical warp size must be a power of two.");
   390                   "Logical warp size cannot be larger than physical warp size.");
   393     using exchange_type = ::rocprim::warp_exchange<T, ItemsPerThread, WarpSize>;
   396     using storage_type = 
typename exchange_type::storage_type;
   398     template<
class InputIterator>
   399     ROCPRIM_DEVICE ROCPRIM_INLINE
   400     void load(InputIterator input,
   401               T (&items)[ItemsPerThread],
   402               storage_type& storage)
   404         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   405         static_assert(std::is_convertible<value_type, T>::value,
   406                       "The type T must be such that an object of type InputIterator "   407                       "can be dereferenced and then implicitly converted to T.");
   408         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   409         block_load_direct_warp_striped<WarpSize>(flat_id, input, items);
   410         exchange_type().striped_to_blocked(items, items, storage);
   413     template<
class InputIterator>
   414     ROCPRIM_DEVICE ROCPRIM_INLINE
   415     void load(InputIterator input,
   416               T (&items)[ItemsPerThread],
   418               storage_type& storage)
   420         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   421         static_assert(std::is_convertible<value_type, T>::value,
   422                       "The type T must be such that an object of type InputIterator "   423                       "can be dereferenced and then implicitly converted to T.");
   424         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   425         block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid);
   426         exchange_type().striped_to_blocked(items, items, storage);
   433     ROCPRIM_DEVICE ROCPRIM_INLINE
   434     void load(InputIterator input,
   435               T (&items)[ItemsPerThread],
   437               Default out_of_bounds,
   438               storage_type& storage)
   440         using value_type = 
typename std::iterator_traits<InputIterator>::value_type;
   441         static_assert(std::is_convertible<value_type, T>::value,
   442                       "The type T must be such that an object of type InputIterator "   443                       "can be dereferenced and then implicitly converted to T.");
   444         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   445         block_load_direct_warp_striped<WarpSize>(flat_id, input, items, valid,
   447         exchange_type().striped_to_blocked(items, items, storage);
   451 #endif // DOXYGEN_SHOULD_SKIP_THIS   453 END_ROCPRIM_NAMESPACE
   458 #endif // ROCPRIM_WARP_WARP_LOAD_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds, storage_type &)
Loads data from continuous memory into an arrangement of items across the warp. 
Definition: warp_load.hpp:223
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target. 
Definition: thread.hpp:70
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], unsigned int valid, storage_type &)
Loads data from continuous memory into an arrangement of items across the warp. 
Definition: warp_load.hpp:190
Data from continuous memory is loaded into a blocked arrangement of items. 
A striped arrangement of data is read directly from memory. 
Defaults to block_load_direct. 
ROCPRIM_DEVICE ROCPRIM_INLINE auto block_load_direct_blocked_vectorized(unsigned int flat_id, T *block_input, U(&items)[ItemsPerThread]) -> typename std::enable_if< detail::is_vectorizable< T, ItemsPerThread >::value >::type
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:186
typename ::rocprim::detail::empty_storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: warp_load.hpp:142
warp_load_method
warp_load_method enumerates the methods available to load data from continuous memory into a blocked/...
Definition: warp_load.hpp:38
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator input, T(&items)[ItemsPerThread], storage_type &)
Loads data from continuous memory into an arrangement of items across the warp. 
Definition: warp_load.hpp:162
Data from continuous memory is loaded into a blocked arrangement of items using vectorization as an o...
A striped arrangement of data from continuous memory is locally transposed into a blocked arrangement...
BEGIN_ROCPRIM_NAMESPACE ROCPRIM_DEVICE ROCPRIM_INLINE void block_load_direct_blocked(unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread])
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:58
The warp_load class is a warp level parallel primitive which provides methods for loading data from c...
Definition: warp_load.hpp:122