|
template<typename T , typename AccumulationOp > |
KAT_FD T | kat::collaborative::warp::reduce (T value, AccumulationOp op) |
| Performs a reduction (e.g. More...
|
|
template<typename T > |
KAT_FD T | kat::collaborative::warp::sum (T value) |
|
template<typename T , typename AccumulationOp , inclusivity_t Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_FD T | kat::collaborative::warp::scan (T value, AccumulationOp op) |
|
template<typename T , inclusivity_t Inclusivity = inclusivity_t::Inclusive, T NeutralValue = T{}> |
KAT_FD T | kat::collaborative::warp::prefix_sum (T value) |
|
template<typename T , T NeutralValue = T{}> |
KAT_FD T | kat::collaborative::warp::exclusive_prefix_sum (T value) |
|
template<typename RandomAccessIterator , typename Size , typename T > |
KAT_FD void | kat::collaborative::warp::fill_n (RandomAccessIterator start, Size count, const T &value) |
|
template<typename RandomAccessIterator , typename T , typename Size = decltype(std::declval<RandomAccessIterator>() - std::declval<RandomAccessIterator>())> |
KAT_FD void | kat::collaborative::warp::fill (RandomAccessIterator start, RandomAccessIterator end, const T &value) |
|
template<typename RandomAccessIterator , typename Size > |
KAT_FD void | kat::collaborative::warp::memzero_n (RandomAccessIterator start, Size count) |
|
template<typename RandomAccessIterator , typename Size = decltype(std::declval<RandomAccessIterator>() - std::declval<RandomAccessIterator>())> |
KAT_FD void | kat::collaborative::warp::memzero (RandomAccessIterator start, RandomAccessIterator end) |
|
template<typename T , typename S , typename UnaryOperation , typename Size > |
KAT_FD void | kat::collaborative::warp::transform_n (const S *__restrict__ source, Size length, T *__restrict__ target, UnaryOperation unary_op) |
| apply a transformation to each element of an array, placing the results in another array. More...
|
|
template<typename S , typename T , typename UnaryOperation , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::collaborative::warp::transform (const S *__restrict__ source_start, const S *__restrict__ source_end, T *__restrict__ target, UnaryOperation unary_op) |
|
template<typename S , typename T , typename Size > |
KAT_FD void | kat::collaborative::warp::cast_and_copy_n (const S *__restrict__ source, Size length, T *__restrict__ target) |
| Have all warp threads collaborate in copying data between two memory locations (possibly not in the same memory space), while also converting types. More...
|
|
template<typename T , typename U , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::collaborative::warp::cast_and_copy (const U *__restrict__ source_start, const U *__restrict__ source_end, T *__restrict__ target) |
|
template<typename T , typename Size > |
KAT_FD void | kat::collaborative::warp::detail::naive_copy (const T *__restrict__ source, Size length, T *__restrict__ target) |
| A version of kat::copy() which ignores pointer alignment, and the memory transaction size, simply making coalesced writes of warp_size elements at a time (except for the last range) More...
|
|
template<typename T > |
constexpr KAT_FHD T | kat::collaborative::warp::detail::clear_lower_bits (T x, unsigned k) |
|
template<typename T , typename Size , bool MayHaveSlack = true> |
KAT_FD void | kat::collaborative::warp::copy_n (const T *__restrict__ source, Size length, T *__restrict__ target) |
| Has the warp copy data from one place to another. More...
|
|
template<typename T , bool MayHaveSlack = true, typename Size = std::ptrdiff_t> |
KAT_FD void | kat::collaborative::warp::copy (const T *__restrict__ source_start, const T *__restrict__ source_end, T *__restrict__ target_start) |
|
template<typename T , typename I , typename Size , typename U = T> |
KAT_FD void | kat::collaborative::warp::lookup (T *__restrict__ target, const U *__restrict__ lookup_table, const I *__restrict__ indices, Size num_indices) |
| Use a lookup table to convert numeric indices to a sequence of values of any type.
|
|
template<typename D , typename RandomAccessIterator , typename AccumulatingOperation , typename Size > |
KAT_FD void | kat::collaborative::warp::elementwise_accumulate_n (AccumulatingOperation op, D *__restrict__ destination, RandomAccessIterator __restrict__ source, Size length) |
| Perform an accumulation operation (e.g. More...
|
|
template<typename D , typename RandomAccessIterator , typename AccumulatingOperation , typename Size = std::ptrdiff_t> |
KAT_FD void | kat::collaborative::warp::elementwise_accumulate (AccumulatingOperation op, D *__restrict__ destination, RandomAccessIterator __restrict__ source_start, RandomAccessIterator __restrict__ source_end) |
|
GPU device-side versions of std::algorithm
-like functions, with warp-level collaboration, i.e.
different CUDA warps act independently, but all lanes in each warp collaborate on the same task.
- Note
- Most functions actually in
std::algorithm
are still missing; see the cppreference page for <algorithm>
for a full list of those.
-
some functions here are not actually in
std::algorithm
but might as well have been, e.g. memzero()
which is like std::memset()
with 0.
-
This is the most-divergent version of std-algorithm-like functions, i.e. don't go looking for thread-level implementations (which would, in fact, be the same as a straightforward CPU-side implementation of
std::algorithm
); if you find yourself needing them, it's possible - perhaps likely - that you're doing something wrong.
- Todo:
- Some inclusions in the warp-primitives might only be relevant to the functions here; double-check.