21 #ifndef ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ 22 #define ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ 24 #include "../config.hpp" 25 #include "../detail/various.hpp" 27 #include "../intrinsics.hpp" 28 #include "../functional.hpp" 29 #include "../types.hpp" 31 #include "block_load_func.hpp" 32 #include "block_exchange.hpp" 37 BEGIN_ROCPRIM_NAMESPACE
127 unsigned int BlockSizeX,
128 unsigned int ItemsPerThread,
130 unsigned int BlockSizeY = 1,
131 unsigned int BlockSizeZ = 1
136 using storage_type_ = typename ::rocprim::detail::empty_storage_type;
147 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 165 template<
class InputIterator>
166 ROCPRIM_DEVICE ROCPRIM_INLINE
167 void load(InputIterator block_input,
168 T (&items)[ItemsPerThread])
170 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
171 static_assert(std::is_convertible<value_type, T>::value,
172 "The type T must be such that an object of type InputIterator " 173 "can be dereferenced and then implicitly converted to T.");
174 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
191 template<
class InputIterator>
192 ROCPRIM_DEVICE ROCPRIM_INLINE
193 void load(InputIterator block_input,
194 T (&items)[ItemsPerThread],
197 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
198 static_assert(std::is_convertible<value_type, T>::value,
199 "The type T must be such that an object of type InputIterator " 200 "can be dereferenced and then implicitly converted to T.");
201 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
225 ROCPRIM_DEVICE ROCPRIM_INLINE
226 void load(InputIterator block_input,
227 T (&items)[ItemsPerThread],
229 Default out_of_bounds)
231 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
232 static_assert(std::is_convertible<value_type, T>::value,
233 "The type T must be such that an object of type InputIterator " 234 "can be dereferenced and then implicitly converted to T.");
235 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
270 template<
class InputIterator>
271 ROCPRIM_DEVICE ROCPRIM_INLINE
272 void load(InputIterator block_input,
273 T (&items)[ItemsPerThread],
276 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
277 static_assert(std::is_convertible<value_type, T>::value,
278 "The type T must be such that an object of type InputIterator " 279 "can be dereferenced and then implicitly converted to T.");
281 load(block_input, items);
315 template<
class InputIterator>
316 ROCPRIM_DEVICE ROCPRIM_INLINE
317 void load(InputIterator block_input,
318 T (&items)[ItemsPerThread],
322 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
323 static_assert(std::is_convertible<value_type, T>::value,
324 "The type T must be such that an object of type InputIterator " 325 "can be dereferenced and then implicitly converted to T.");
327 load(block_input, items, valid);
368 ROCPRIM_DEVICE ROCPRIM_INLINE
369 void load(InputIterator block_input,
370 T (&items)[ItemsPerThread],
372 Default out_of_bounds,
375 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
376 static_assert(std::is_convertible<value_type, T>::value,
377 "The type T must be such that an object of type InputIterator " 378 "can be dereferenced and then implicitly converted to T.");
380 load(block_input, items, valid, out_of_bounds);
384 #ifndef DOXYGEN_SHOULD_SKIP_THIS 388 unsigned int BlockSizeX,
389 unsigned int ItemsPerThread,
390 unsigned int BlockSizeY,
391 unsigned int BlockSizeZ
395 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
398 using storage_type_ = typename ::rocprim::detail::empty_storage_type;
401 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 402 using storage_type = typename ::rocprim::detail::empty_storage_type;
404 using storage_type = storage_type_;
407 template<
class InputIterator>
408 ROCPRIM_DEVICE
inline 409 void load(InputIterator block_input,
410 T (&items)[ItemsPerThread])
412 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
413 static_assert(std::is_convertible<value_type, T>::value,
414 "The type T must be such that an object of type InputIterator " 415 "can be dereferenced and then implicitly converted to T.");
416 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
417 block_load_direct_striped<BlockSize>(flat_id, block_input, items);
420 template<
class InputIterator>
421 ROCPRIM_DEVICE
inline 422 void load(InputIterator block_input,
423 T (&items)[ItemsPerThread],
426 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
427 static_assert(std::is_convertible<value_type, T>::value,
428 "The type T must be such that an object of type InputIterator " 429 "can be dereferenced and then implicitly converted to T.");
430 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
431 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid);
438 ROCPRIM_DEVICE
inline 439 void load(InputIterator block_input,
440 T (&items)[ItemsPerThread],
442 Default out_of_bounds)
444 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
445 static_assert(std::is_convertible<value_type, T>::value,
446 "The type T must be such that an object of type InputIterator " 447 "can be dereferenced and then implicitly converted to T.");
448 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
449 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid,
453 template<
class InputIterator>
454 ROCPRIM_DEVICE
inline 455 void load(InputIterator block_input,
456 T (&items)[ItemsPerThread],
457 storage_type& storage)
459 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
460 static_assert(std::is_convertible<value_type, T>::value,
461 "The type T must be such that an object of type InputIterator " 462 "can be dereferenced and then implicitly converted to T.");
464 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
465 block_load_direct_striped<BlockSize>(flat_id, block_input, items);
468 template<
class InputIterator>
469 ROCPRIM_DEVICE
inline 470 void load(InputIterator block_input,
471 T (&items)[ItemsPerThread],
473 storage_type& storage)
475 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
476 static_assert(std::is_convertible<value_type, T>::value,
477 "The type T must be such that an object of type InputIterator " 478 "can be dereferenced and then implicitly converted to T.");
480 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
481 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid);
488 ROCPRIM_DEVICE
inline 489 void load(InputIterator block_input,
490 T (&items)[ItemsPerThread],
492 Default out_of_bounds,
493 storage_type& storage)
495 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
496 static_assert(std::is_convertible<value_type, T>::value,
497 "The type T must be such that an object of type InputIterator " 498 "can be dereferenced and then implicitly converted to T.");
500 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
501 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid,
508 unsigned int BlockSizeX,
509 unsigned int ItemsPerThread,
510 unsigned int BlockSizeY,
511 unsigned int BlockSizeZ
516 using storage_type_ = typename ::rocprim::detail::empty_storage_type;
519 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 520 using storage_type = typename ::rocprim::detail::empty_storage_type;
522 using storage_type = storage_type_;
525 ROCPRIM_DEVICE ROCPRIM_INLINE
526 void load(T* block_input,
527 T (&_items)[ItemsPerThread])
529 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
533 template<
class InputIterator,
class U>
534 ROCPRIM_DEVICE ROCPRIM_INLINE
535 void load(InputIterator block_input,
536 U (&items)[ItemsPerThread])
538 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
539 static_assert(std::is_convertible<value_type, T>::value,
540 "The type T must be such that an object of type InputIterator " 541 "can be dereferenced and then implicitly converted to T.");
542 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
546 template<
class InputIterator>
547 ROCPRIM_DEVICE ROCPRIM_INLINE
548 void load(InputIterator block_input,
549 T (&items)[ItemsPerThread],
552 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
553 static_assert(std::is_convertible<value_type, T>::value,
554 "The type T must be such that an object of type InputIterator " 555 "can be dereferenced and then implicitly converted to T.");
556 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
564 ROCPRIM_DEVICE ROCPRIM_INLINE
565 void load(InputIterator block_input,
566 T (&items)[ItemsPerThread],
568 Default out_of_bounds)
570 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
571 static_assert(std::is_convertible<value_type, T>::value,
572 "The type T must be such that an object of type InputIterator " 573 "can be dereferenced and then implicitly converted to T.");
574 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
579 ROCPRIM_DEVICE ROCPRIM_INLINE
580 void load(T* block_input,
581 T (&items)[ItemsPerThread],
582 storage_type& storage)
585 load(block_input, items);
588 template<
class InputIterator,
class U>
589 ROCPRIM_DEVICE ROCPRIM_INLINE
590 void load(InputIterator block_input,
591 U (&items)[ItemsPerThread],
592 storage_type& storage)
594 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
595 static_assert(std::is_convertible<value_type, T>::value,
596 "The type T must be such that an object of type InputIterator " 597 "can be dereferenced and then implicitly converted to T.");
599 load(block_input, items);
602 template<
class InputIterator>
603 ROCPRIM_DEVICE ROCPRIM_INLINE
604 void load(InputIterator block_input,
605 T (&items)[ItemsPerThread],
607 storage_type& storage)
609 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
610 static_assert(std::is_convertible<value_type, T>::value,
611 "The type T must be such that an object of type InputIterator " 612 "can be dereferenced and then implicitly converted to T.");
614 load(block_input, items, valid);
621 ROCPRIM_DEVICE ROCPRIM_INLINE
622 void load(InputIterator block_input,
623 T (&items)[ItemsPerThread],
625 Default out_of_bounds,
626 storage_type& storage)
628 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
629 static_assert(std::is_convertible<value_type, T>::value,
630 "The type T must be such that an object of type InputIterator " 631 "can be dereferenced and then implicitly converted to T.");
633 load(block_input, items, valid, out_of_bounds);
639 unsigned int BlockSizeX,
640 unsigned int ItemsPerThread,
641 unsigned int BlockSizeY,
642 unsigned int BlockSizeZ
646 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
654 template<
class InputIterator>
655 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
656 void load(InputIterator block_input,
657 T (&items)[ItemsPerThread])
659 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
660 static_assert(std::is_convertible<value_type, T>::value,
661 "The type T must be such that an object of type InputIterator " 662 "can be dereferenced and then implicitly converted to T.");
663 ROCPRIM_SHARED_MEMORY storage_type storage;
664 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
665 block_load_direct_striped<BlockSize>(flat_id, block_input, items);
669 template<
class InputIterator>
670 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
671 void load(InputIterator block_input,
672 T (&items)[ItemsPerThread],
675 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
676 static_assert(std::is_convertible<value_type, T>::value,
677 "The type T must be such that an object of type InputIterator " 678 "can be dereferenced and then implicitly converted to T.");
679 ROCPRIM_SHARED_MEMORY storage_type storage;
680 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
681 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid);
689 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
690 void load(InputIterator block_input,
691 T (&items)[ItemsPerThread],
693 Default out_of_bounds)
695 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
696 static_assert(std::is_convertible<value_type, T>::value,
697 "The type T must be such that an object of type InputIterator " 698 "can be dereferenced and then implicitly converted to T.");
699 ROCPRIM_SHARED_MEMORY storage_type storage;
700 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
701 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid,
706 template<
class InputIterator>
707 ROCPRIM_DEVICE ROCPRIM_INLINE
708 void load(InputIterator block_input,
709 T (&items)[ItemsPerThread],
710 storage_type& storage)
712 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
713 static_assert(std::is_convertible<value_type, T>::value,
714 "The type T must be such that an object of type InputIterator " 715 "can be dereferenced and then implicitly converted to T.");
716 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
717 block_load_direct_striped<BlockSize>(flat_id, block_input, items);
721 template<
class InputIterator>
722 ROCPRIM_DEVICE ROCPRIM_INLINE
723 void load(InputIterator block_input,
724 T (&items)[ItemsPerThread],
726 storage_type& storage)
728 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
729 static_assert(std::is_convertible<value_type, T>::value,
730 "The type T must be such that an object of type InputIterator " 731 "can be dereferenced and then implicitly converted to T.");
732 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
733 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid);
741 ROCPRIM_DEVICE ROCPRIM_INLINE
742 void load(InputIterator block_input,
743 T (&items)[ItemsPerThread],
745 Default out_of_bounds,
746 storage_type& storage)
748 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
749 static_assert(std::is_convertible<value_type, T>::value,
750 "The type T must be such that an object of type InputIterator " 751 "can be dereferenced and then implicitly converted to T.");
752 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
753 block_load_direct_striped<BlockSize>(flat_id, block_input, items, valid,
761 unsigned int BlockSizeX,
762 unsigned int ItemsPerThread,
763 unsigned int BlockSizeY,
764 unsigned int BlockSizeZ
768 static constexpr
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
774 "BlockSize must be a multiple of hardware warpsize");
778 template<
class InputIterator>
779 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
780 void load(InputIterator block_input,
781 T (&items)[ItemsPerThread])
783 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
784 static_assert(std::is_convertible<value_type, T>::value,
785 "The type T must be such that an object of type InputIterator " 786 "can be dereferenced and then implicitly converted to T.");
787 ROCPRIM_SHARED_MEMORY storage_type storage;
788 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
793 template<
class InputIterator>
794 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
795 void load(InputIterator block_input,
796 T (&items)[ItemsPerThread],
799 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
800 static_assert(std::is_convertible<value_type, T>::value,
801 "The type T must be such that an object of type InputIterator " 802 "can be dereferenced and then implicitly converted to T.");
803 ROCPRIM_SHARED_MEMORY storage_type storage;
804 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
814 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
815 void load(InputIterator block_input,
816 T (&items)[ItemsPerThread],
818 Default out_of_bounds)
820 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
821 static_assert(std::is_convertible<value_type, T>::value,
822 "The type T must be such that an object of type InputIterator " 823 "can be dereferenced and then implicitly converted to T.");
824 ROCPRIM_SHARED_MEMORY storage_type storage;
825 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
831 template<
class InputIterator>
832 ROCPRIM_DEVICE ROCPRIM_INLINE
833 void load(InputIterator block_input,
834 T (&items)[ItemsPerThread],
835 storage_type& storage)
837 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
838 static_assert(std::is_convertible<value_type, T>::value,
839 "The type T must be such that an object of type InputIterator " 840 "can be dereferenced and then implicitly converted to T.");
841 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
846 template<
class InputIterator>
847 ROCPRIM_DEVICE ROCPRIM_INLINE
848 void load(InputIterator block_input,
849 T (&items)[ItemsPerThread],
851 storage_type& storage)
853 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
854 static_assert(std::is_convertible<value_type, T>::value,
855 "The type T must be such that an object of type InputIterator " 856 "can be dereferenced and then implicitly converted to T.");
857 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
866 ROCPRIM_DEVICE ROCPRIM_INLINE
867 void load(InputIterator block_input,
868 T (&items)[ItemsPerThread],
870 Default out_of_bounds,
871 storage_type& storage)
873 using value_type =
typename std::iterator_traits<InputIterator>::value_type;
874 static_assert(std::is_convertible<value_type, T>::value,
875 "The type T must be such that an object of type InputIterator " 876 "can be dereferenced and then implicitly converted to T.");
877 const unsigned int flat_id = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
884 #endif // DOXYGEN_SHOULD_SKIP_THIS 886 END_ROCPRIM_NAMESPACE
891 #endif // ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ Data from continuous memory is loaded into a blocked arrangement of items using vectorization as an o...
ROCPRIM_DEVICE ROCPRIM_INLINE constexpr unsigned int device_warp_size()
Returns a number of threads in a hardware warp for the actual target.
Definition: thread.hpp:70
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid, storage_type &storage)
Loads data from continuous memory into an arrangement of items across the thread block, which is guarded by range valid, using temporary storage.
Definition: block_load.hpp:317
The block_exchange class is a block level parallel primitive which provides methods for rearranging i...
Definition: block_exchange.hpp:81
Defaults to block_load_direct.
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void striped_to_blocked(const T(&input)[ItemsPerThread], U(&output)[ItemsPerThread])
Transposes a striped arrangement of items to a blocked arrangement across the thread block...
Definition: block_exchange.hpp:196
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread])
Loads data from continuous memory into an arrangement of items across the thread block.
Definition: block_load.hpp:167
ROCPRIM_DEVICE ROCPRIM_INLINE auto block_load_direct_blocked_vectorized(unsigned int flat_id, T *block_input, U(&items)[ItemsPerThread]) -> typename std::enable_if< detail::is_vectorizable< T, ItemsPerThread >::value >::type
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:186
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid)
Loads data from continuous memory into an arrangement of items across the thread block, which is guarded by range valid.
Definition: block_load.hpp:193
A striped arrangement of data is read directly from memory.
block_load_method
block_load_method enumerates the methods available to load data from continuous memory into a blocked...
Definition: block_load.hpp:41
typename ::rocprim::detail::empty_storage_type storage_type
Struct used to allocate a temporary memory that is required for thread communication during operation...
Definition: block_load.hpp:148
A striped arrangement of data from continuous memory is locally transposed into a blocked arrangement...
ROCPRIM_DEVICE ROCPRIM_INLINE void block_load_direct_warp_striped(unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread])
Loads data from continuous memory into a warp-striped arrangement of items across the thread block...
Definition: block_load_func.hpp:378
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread], storage_type &storage)
Loads data from continuous memory into an arrangement of items across the thread block, using temporary storage.
Definition: block_load.hpp:272
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds, storage_type &storage)
Loads data from continuous memory into an arrangement of items across the thread block, which is guarded by range with a fall-back value for out-of-bound elements, using temporary storage.
Definition: block_load.hpp:369
Data from continuous memory is loaded into a blocked arrangement of items.
BEGIN_ROCPRIM_NAMESPACE ROCPRIM_DEVICE ROCPRIM_INLINE void block_load_direct_blocked(unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread])
Loads data from continuous memory into a blocked arrangement of items across the thread block...
Definition: block_load_func.hpp:58
ROCPRIM_DEVICE ROCPRIM_INLINE void load(InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds)
Loads data from continuous memory into an arrangement of items across the thread block, which is guarded by range with a fall-back value for out-of-bound elements.
Definition: block_load.hpp:226
A warp-striped arrangement of data from continuous memory is locally transposed into a blocked arrang...
The block_load class is a block level parallel primitive which provides methods for loading data from...
Definition: block_load.hpp:133
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void warp_striped_to_blocked(const T(&input)[ItemsPerThread], U(&output)[ItemsPerThread])
Transposes a warp-striped arrangement of items to a blocked arrangement across the thread block...
Definition: block_exchange.hpp:333