|
template<typename RandomAccessIterator , typename Size , typename T > |
KAT_FD void | kat::linear_grid::collaborative::block::fill_n (RandomAccessIterator start, Size count, const T &value) |
|
template<typename RandomAccessIterator , typename T , typename Size = decltype(std::declval<RandomAccessIterator>() - std::declval<RandomAccessIterator>())> |
KAT_FD void | kat::linear_grid::collaborative::block::fill (RandomAccessIterator start, RandomAccessIterator end, const T &value) |
|
template<typename RandomAccessIterator , typename Size > |
KAT_FD void | kat::linear_grid::collaborative::block::memzero_n (RandomAccessIterator start, Size count) |
|
template<typename RandomAccessIterator , typename Size = decltype(std::declval<RandomAccessIterator>() - std::declval<RandomAccessIterator>())> |
KAT_FD void | kat::linear_grid::collaborative::block::memzero (RandomAccessIterator start, RandomAccessIterator end) |
|
template<typename T , typename S , typename UnaryOperation , typename Size > |
KAT_FD void | kat::linear_grid::collaborative::block::transform_n (const S *__restrict__ source, Size length, T *__restrict__ target, UnaryOperation unary_op) |
| apply a transformation to each element of an array, placing the results in another array. More...
|
|
template<typename S , typename T , typename UnaryOperation , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::linear_grid::collaborative::block::transform (const S *__restrict__ source_start, const S *__restrict__ source_end, T *__restrict__ target, UnaryOperation unary_op) |
|
template<typename S , typename T , typename Size > |
KAT_FD void | kat::linear_grid::collaborative::block::cast_and_copy_n (const S *__restrict__ source, Size length, T *__restrict__ target) |
| Have all warp threads collaborate in copying data between two memory locations (possibly not in the same memory space), while also converting types. More...
|
|
template<typename S , typename T , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::linear_grid::collaborative::block::cast_and_copy (const S *__restrict__ source_start, const S *__restrict__ source_end, T *__restrict__ target) |
|
template<typename T , typename Size > |
KAT_FD void | kat::linear_grid::collaborative::block::copy_n (const T *__restrict__ source, Size length, T *__restrict__ target) |
| block-collaboratively copy data between stretches of memory More...
|
|
template<typename T , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::linear_grid::collaborative::block::copy (const T *__restrict__ source_start, const T *__restrict__ source_end, T *__restrict__ target) |
| block-collaboratively copy data between stretches of memory More...
|
|
template<typename T , typename I , typename Size , typename U = T> |
KAT_FD void | kat::linear_grid::collaborative::block::lookup (T *__restrict__ target, const U *__restrict__ lookup_table, const I *__restrict__ indices, Size num_indices) |
| Use a lookup table to convert numeric indices to a sequence of values of any type.
|
|
template<typename T , typename AccumulationOp , bool AllThreadsObtainResult = false, T NeutralValue = T{}> |
KAT_DEV T | kat::linear_grid::collaborative::block::reduce (T value, AccumulationOp op) |
| Perform a reduction over a block's worth of data with a specific (asymmetric) accumulation operation, and maintaing the input element type. More...
|
|
template<typename T , bool AllThreadsObtainResult = false> |
KAT_DEV T | kat::linear_grid::collaborative::block::sum (T value) |
|
template<typename T , typename AccumulationOp , bool Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_DEV T | kat::linear_grid::collaborative::block::scan (T value, AccumulationOp op, T *__restrict__ scratch) |
|
template<typename T , typename AccumulationOp , bool Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_DEV T | kat::linear_grid::collaborative::block::scan (T value, AccumulationOp op) |
|
template<typename T , typename AccumulationOp , bool Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_DEV void | kat::linear_grid::collaborative::block::scan_and_reduce (T *__restrict__ scratch, T value, AccumulationOp op, T &scan_result, T &reduction_result) |
| Perform both a block-level scan and a block-level reduction, with each thread having the results of both. More...
|
|
template<typename T , typename AccumulationOp , bool Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_DEV void | kat::linear_grid::collaborative::block::scan_and_reduce (T value, AccumulationOp op, T &scan_result, T &reduction_result) |
|
template<typename D , typename RandomAccessIterator , typename AccumulatingOperation , typename Size > |
KAT_FD void | kat::linear_grid::collaborative::block::elementwise_accumulate_n (AccumulatingOperation op, D *__restrict__ destination, RandomAccessIterator __restrict__ source, Size length) |
| Perform an accumulation operation (e.g. More...
|
|
template<typename D , typename RandomAccessIterator , typename AccumulatingOperation , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::linear_grid::collaborative::block::elementwise_accumulate (AccumulatingOperation op, D *__restrict__ destination, RandomAccessIterator __restrict__ source_start, RandomAccessIterator __restrict__ source_end) |
|
template<typename Operation , typename Size , typename ResultDatum , typename... Args> |
KAT_FD void | kat::linear_grid::collaborative::block::elementwise_apply (ResultDatum *__restrict__ results, Size length, Operation op, const Args *__restrict__ ... arguments) |
|
GPU device-side versions of std::algorithm
-like functions, with block-level collaboration, i.e.
different CUDA blocks act independently, but all lanes in each warp collaborate on the same task.
- Note
- Most functions actually in
std::algorithm
are still missing; see the algorithm
page on cppreference.com for a full list of those.
-
some functions here are not actually in
std::algorithm
but might as well have been, e.g. memzero()
which is like std::memset()
with 0.