21 #ifndef ROCPRIM_WARP_WARP_STORE_HPP_    22 #define ROCPRIM_WARP_WARP_STORE_HPP_    24 #include "../config.hpp"    25 #include "../intrinsics.hpp"    26 #include "../detail/various.hpp"    28 #include "warp_exchange.hpp"    29 #include "../block/block_store_func.hpp"    34 BEGIN_ROCPRIM_NAMESPACE
   120     unsigned int ItemsPerThread,
   126     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   127                   "Logical warp size must be a power of two.");
   129                   "Logical warp size cannot be larger than physical warp size.");
   132     using storage_type_ = typename ::rocprim::detail::empty_storage_type;
   143     #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen   165     template<
class OutputIterator>
   166     ROCPRIM_DEVICE ROCPRIM_INLINE
   168                T (&items)[ItemsPerThread],
   171         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   172         static_assert(std::is_convertible<T, value_type>::value,
   173                       "The type T must be such that an object of type OutputIterator "   174                       "can be dereferenced and then implicitly assigned from T.");
   175         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   197     template<
class OutputIterator>
   198     ROCPRIM_DEVICE ROCPRIM_INLINE
   200                T (&items)[ItemsPerThread],
   204         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   205         static_assert(std::is_convertible<T, value_type>::value,
   206                       "The type T must be such that an object of type OutputIterator "   207                       "can be dereferenced and then implicitly assigned from T.");
   208         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   213 #ifndef DOXYGEN_SHOULD_SKIP_THIS   217     unsigned int ItemsPerThread,
   218     unsigned int WarpSize
   222     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   223                   "Logical warp size must be a power of two.");
   225                   "Logical warp size cannot be larger than physical warp size.");
   228     using storage_type = typename ::rocprim::detail::empty_storage_type;
   230     template<
class OutputIterator>
   231     ROCPRIM_DEVICE ROCPRIM_INLINE
   232     void store(OutputIterator output,
   233                T (&items)[ItemsPerThread],
   236         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   237         static_assert(std::is_convertible<T, value_type>::value,
   238                       "The type T must be such that an object of type OutputIterator "   239                       "can be dereferenced and then implicitly assigned from T.");
   240         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   241         block_store_direct_warp_striped<WarpSize>(flat_id, output, items);
   244     template<
class OutputIterator>
   245     ROCPRIM_DEVICE ROCPRIM_INLINE
   246     void store(OutputIterator output,
   247                T (&items)[ItemsPerThread],
   251         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   252         static_assert(std::is_convertible<T, value_type>::value,
   253                       "The type T must be such that an object of type OutputIterator "   254                       "can be dereferenced and then implicitly assigned from T.");
   255         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   256         block_store_direct_warp_striped<WarpSize>(flat_id, output, items, valid);
   262     unsigned int ItemsPerThread,
   263     unsigned int WarpSize
   267     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   268                   "Logical warp size must be a power of two.");
   270                   "Logical warp size cannot be larger than physical warp size.");
   273     using storage_type = typename ::rocprim::detail::empty_storage_type;
   275     ROCPRIM_DEVICE ROCPRIM_INLINE
   276     void store(T* output,
   277                T (&items)[ItemsPerThread],
   280         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   284     template<
class OutputIterator>
   285     ROCPRIM_DEVICE ROCPRIM_INLINE
   286     void store(OutputIterator output,
   287                T (&items)[ItemsPerThread],
   290         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   291         static_assert(std::is_convertible<T, value_type>::value,
   292                       "The type T must be such that an object of type OutputIterator "   293                       "can be dereferenced and then implicitly assigned from T.");
   294         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   298     template<
class OutputIterator>
   299     ROCPRIM_DEVICE ROCPRIM_INLINE
   300     void store(OutputIterator output,
   301                T (&items)[ItemsPerThread],
   305         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   306         static_assert(std::is_convertible<T, value_type>::value,
   307                       "The type T must be such that an object of type OutputIterator "   308                       "can be dereferenced and then implicitly assigned from T.");
   309         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   316     unsigned int ItemsPerThread,
   317     unsigned int WarpSize
   321     static_assert(::rocprim::detail::is_power_of_two(WarpSize),
   322                   "Logical warp size must be a power of two.");
   324                   "Logical warp size cannot be larger than physical warp size.");
   327     using exchange_type = ::rocprim::warp_exchange<T, ItemsPerThread, WarpSize>;
   330     using storage_type = 
typename exchange_type::storage_type;
   332     template<
class OutputIterator>
   333     ROCPRIM_DEVICE ROCPRIM_INLINE
   334     void store(OutputIterator output,
   335                T (&items)[ItemsPerThread],
   336                storage_type& storage)
   338         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   339         static_assert(std::is_convertible<T, value_type>::value,
   340                       "The type T must be such that an object of type OutputIterator "   341                       "can be dereferenced and then implicitly assigned from T.");
   342         exchange_type().blocked_to_striped(items, items, storage);
   343         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   344         block_store_direct_warp_striped<WarpSize>(flat_id, output, items);
   347     template<
class OutputIterator>
   348     ROCPRIM_DEVICE ROCPRIM_INLINE
   349     void store(OutputIterator output,
   350                T (&items)[ItemsPerThread],
   352                storage_type& storage)
   354         using value_type = 
typename std::iterator_traits<OutputIterator>::value_type;
   355         static_assert(std::is_convertible<T, value_type>::value,
   356                       "The type T must be such that an object of type OutputIterator "   357                       "can be dereferenced and then implicitly assigned from T.");
   358         exchange_type().blocked_to_striped(items, items, storage);
   359         const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
   360         block_store_direct_warp_striped<WarpSize>(flat_id, output, items, valid);
   364 #endif // DOXYGEN_SHOULD_SKIP_THIS   366 END_ROCPRIM_NAMESPACE
   371 #endif // ROCPRIM_WARP_WARP_STORE_HPP_ typename ::rocprim::detail::empty_storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: warp_store.hpp:144
ROCPRIM_DEVICE ROCPRIM_INLINE auto block_store_direct_blocked_vectorized(unsigned int flat_id, T *block_output, U(&items)[ItemsPerThread]) -> typename std::enable_if< detail::is_vectorizable< T, ItemsPerThread >::value >::type
Stores a blocked arrangement of items from across the thread block into a blocked arrangement on cont...
Definition: block_store_func.hpp:151
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target. 
Definition: thread.hpp:70
warp_store_method
warp_store_method enumerates the methods available to store a blocked/striped arrangement of items in...
Definition: warp_store.hpp:38
Defaults to block_load_direct. 
ROCPRIM_DEVICE ROCPRIM_INLINE void store(OutputIterator output, T(&items)[ItemsPerThread], storage_type &)
Stores an arrangement of items from across the warp into an arrangement on continuous memory...
Definition: warp_store.hpp:167
A blocked arrangement of items is locally transposed and stored as a striped arrangement of data on c...
ROCPRIM_DEVICE ROCPRIM_INLINE void store(OutputIterator output, T(&items)[ItemsPerThread], unsigned int valid, storage_type &)
Stores an arrangement of items from across the warp into an arrangement on continuous memory...
Definition: warp_store.hpp:199
BEGIN_ROCPRIM_NAMESPACE ROCPRIM_DEVICE ROCPRIM_INLINE void block_store_direct_blocked(unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread])
Stores a blocked arrangement of items from across the thread block into a blocked arrangement on cont...
Definition: block_store_func.hpp:58
A blocked arrangement of items is stored into a blocked arrangement on continuous memory using vector...
A blocked arrangement of items is stored into a blocked arrangement on continuous memory...
The warp_store class is a warp level parallel primitive which provides methods for storing an arrange...
Definition: warp_store.hpp:124
A striped arrangement of items is stored into a blocked arrangement on continuous memory...