21 #ifndef ROCPRIM_WARP_WARP_STORE_HPP_ 22 #define ROCPRIM_WARP_WARP_STORE_HPP_ 24 #include "../config.hpp" 25 #include "../intrinsics.hpp" 26 #include "../detail/various.hpp" 28 #include "warp_exchange.hpp" 29 #include "../block/block_store_func.hpp" 34 BEGIN_ROCPRIM_NAMESPACE
120 unsigned int ItemsPerThread,
126 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
127 "Logical warp size must be a power of two.");
129 "Logical warp size cannot be larger than physical warp size.");
132 using storage_type_ = typename ::rocprim::detail::empty_storage_type;
143 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 165 template<
class OutputIterator>
166 ROCPRIM_DEVICE ROCPRIM_INLINE
168 T (&items)[ItemsPerThread],
171 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
172 static_assert(std::is_convertible<T, value_type>::value,
173 "The type T must be such that an object of type OutputIterator " 174 "can be dereferenced and then implicitly assigned from T.");
175 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
197 template<
class OutputIterator>
198 ROCPRIM_DEVICE ROCPRIM_INLINE
200 T (&items)[ItemsPerThread],
204 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
205 static_assert(std::is_convertible<T, value_type>::value,
206 "The type T must be such that an object of type OutputIterator " 207 "can be dereferenced and then implicitly assigned from T.");
208 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
213 #ifndef DOXYGEN_SHOULD_SKIP_THIS 217 unsigned int ItemsPerThread,
218 unsigned int WarpSize
222 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
223 "Logical warp size must be a power of two.");
225 "Logical warp size cannot be larger than physical warp size.");
228 using storage_type = typename ::rocprim::detail::empty_storage_type;
230 template<
class OutputIterator>
231 ROCPRIM_DEVICE ROCPRIM_INLINE
232 void store(OutputIterator output,
233 T (&items)[ItemsPerThread],
236 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
237 static_assert(std::is_convertible<T, value_type>::value,
238 "The type T must be such that an object of type OutputIterator " 239 "can be dereferenced and then implicitly assigned from T.");
240 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
241 block_store_direct_warp_striped<WarpSize>(flat_id, output, items);
244 template<
class OutputIterator>
245 ROCPRIM_DEVICE ROCPRIM_INLINE
246 void store(OutputIterator output,
247 T (&items)[ItemsPerThread],
251 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
252 static_assert(std::is_convertible<T, value_type>::value,
253 "The type T must be such that an object of type OutputIterator " 254 "can be dereferenced and then implicitly assigned from T.");
255 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
256 block_store_direct_warp_striped<WarpSize>(flat_id, output, items, valid);
262 unsigned int ItemsPerThread,
263 unsigned int WarpSize
267 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
268 "Logical warp size must be a power of two.");
270 "Logical warp size cannot be larger than physical warp size.");
273 using storage_type = typename ::rocprim::detail::empty_storage_type;
275 ROCPRIM_DEVICE ROCPRIM_INLINE
276 void store(T* output,
277 T (&items)[ItemsPerThread],
280 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
284 template<
class OutputIterator>
285 ROCPRIM_DEVICE ROCPRIM_INLINE
286 void store(OutputIterator output,
287 T (&items)[ItemsPerThread],
290 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
291 static_assert(std::is_convertible<T, value_type>::value,
292 "The type T must be such that an object of type OutputIterator " 293 "can be dereferenced and then implicitly assigned from T.");
294 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
298 template<
class OutputIterator>
299 ROCPRIM_DEVICE ROCPRIM_INLINE
300 void store(OutputIterator output,
301 T (&items)[ItemsPerThread],
305 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
306 static_assert(std::is_convertible<T, value_type>::value,
307 "The type T must be such that an object of type OutputIterator " 308 "can be dereferenced and then implicitly assigned from T.");
309 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
316 unsigned int ItemsPerThread,
317 unsigned int WarpSize
321 static_assert(::rocprim::detail::is_power_of_two(WarpSize),
322 "Logical warp size must be a power of two.");
324 "Logical warp size cannot be larger than physical warp size.");
327 using exchange_type = ::rocprim::warp_exchange<T, ItemsPerThread, WarpSize>;
330 using storage_type =
typename exchange_type::storage_type;
332 template<
class OutputIterator>
333 ROCPRIM_DEVICE ROCPRIM_INLINE
334 void store(OutputIterator output,
335 T (&items)[ItemsPerThread],
336 storage_type& storage)
338 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
339 static_assert(std::is_convertible<T, value_type>::value,
340 "The type T must be such that an object of type OutputIterator " 341 "can be dereferenced and then implicitly assigned from T.");
342 exchange_type().blocked_to_striped(items, items, storage);
343 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
344 block_store_direct_warp_striped<WarpSize>(flat_id, output, items);
347 template<
class OutputIterator>
348 ROCPRIM_DEVICE ROCPRIM_INLINE
349 void store(OutputIterator output,
350 T (&items)[ItemsPerThread],
352 storage_type& storage)
354 using value_type =
typename std::iterator_traits<OutputIterator>::value_type;
355 static_assert(std::is_convertible<T, value_type>::value,
356 "The type T must be such that an object of type OutputIterator " 357 "can be dereferenced and then implicitly assigned from T.");
358 exchange_type().blocked_to_striped(items, items, storage);
359 const unsigned int flat_id = ::rocprim::detail::logical_lane_id<WarpSize>();
360 block_store_direct_warp_striped<WarpSize>(flat_id, output, items, valid);
364 #endif // DOXYGEN_SHOULD_SKIP_THIS 366 END_ROCPRIM_NAMESPACE
371 #endif // ROCPRIM_WARP_WARP_STORE_HPP_ typename ::rocprim::detail::empty_storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: warp_store.hpp:144
ROCPRIM_DEVICE ROCPRIM_INLINE auto block_store_direct_blocked_vectorized(unsigned int flat_id, T *block_output, U(&items)[ItemsPerThread]) -> typename std::enable_if< detail::is_vectorizable< T, ItemsPerThread >::value >::type
Stores a blocked arrangement of items from across the thread block into a blocked arrangement on cont...
Definition: block_store_func.hpp:151
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target.
Definition: thread.hpp:70
warp_store_method
warp_store_method enumerates the methods available to store a blocked/striped arrangement of items in...
Definition: warp_store.hpp:38
Defaults to block_load_direct.
ROCPRIM_DEVICE ROCPRIM_INLINE void store(OutputIterator output, T(&items)[ItemsPerThread], storage_type &)
Stores an arrangement of items from across the warp into an arrangement on continuous memory...
Definition: warp_store.hpp:167
A blocked arrangement of items is locally transposed and stored as a striped arrangement of data on c...
ROCPRIM_DEVICE ROCPRIM_INLINE void store(OutputIterator output, T(&items)[ItemsPerThread], unsigned int valid, storage_type &)
Stores an arrangement of items from across the warp into an arrangement on continuous memory...
Definition: warp_store.hpp:199
BEGIN_ROCPRIM_NAMESPACE ROCPRIM_DEVICE ROCPRIM_INLINE void block_store_direct_blocked(unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread])
Stores a blocked arrangement of items from across the thread block into a blocked arrangement on cont...
Definition: block_store_func.hpp:58
A blocked arrangement of items is stored into a blocked arrangement on continuous memory using vector...
A blocked arrangement of items is stored into a blocked arrangement on continuous memory...
The warp_store class is a warp level parallel primitive which provides methods for storing an arrange...
Definition: warp_store.hpp:124
A striped arrangement of items is stored into a blocked arrangement on continuous memory...