35 #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_RAKING_LAYOUT_HPP_ 36 #define HIPCUB_ROCPRIM_BLOCK_BLOCK_RAKING_LAYOUT_HPP_ 38 #include <type_traits> 40 #include "../../../config.hpp" 42 #include <rocprim/config.hpp> 43 #include <rocprim/detail/various.hpp> 45 BEGIN_HIPCUB_NAMESPACE
62 template<
typename T,
int BLOCK_THREADS,
int ARCH = HIPCUB_ARCH
77 static_cast<unsigned int>(BLOCK_THREADS), HIPCUB_DEVICE_WARP_THREADS),
95 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 114 unsigned int linear_tid)
117 unsigned int offset = linear_tid;
126 return temp_storage.
Alias().buff + offset;
134 unsigned int linear_tid)
142 #endif // HIPCUB_ROCPRIM_BLOCK_BLOCK_RAKING_LAYOUT_HPP_ HIPCUB_HOST_DEVICE __forceinline__ T & Alias()
Alias.
Definition: util_type.hpp:375
struct __align__(16) _TempStorage
Shared memory storage type.
Definition: block_raking_layout.hpp:99
The total number of elements that need to be cooperatively reduced.
Definition: block_raking_layout.hpp:73
Total number of elements in the raking grid.
Definition: block_raking_layout.hpp:89
static HIPCUB_DEVICE T * RakingPtr(TempStorage &temp_storage, unsigned int linear_tid)
Returns the location for the calling thread to begin sequential raking.
Definition: block_raking_layout.hpp:132
Whether or not we need bounds checking during raking (the number of reduction elements is not a multi...
Definition: block_raking_layout.hpp:92
Alias wrapper allowing storage to be unioned.
Definition: block_raking_layout.hpp:107
Number of raking elements per warp-synchronous raking thread (rounded up)
Definition: block_raking_layout.hpp:80
static HIPCUB_DEVICE T * PlacementPtr(TempStorage &temp_storage, unsigned int linear_tid)
Returns the location for the calling thread to place data into the grid.
Definition: block_raking_layout.hpp:112
Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LE...
Definition: block_raking_layout.hpp:83
BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thre...
Definition: block_raking_layout.hpp:64
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions...
Definition: util_type.hpp:362
Definition: benchmark_block_shuffle.cpp:41
Pad each segment length with one element if segment length is not relatively prime to warp size and c...
Definition: block_raking_layout.hpp:86
Maximum number of warp-synchronous raking threads.
Definition: block_raking_layout.hpp:76