cuda-kat
CUDA kernel author's tools
|
Miscellaneous functions provided by cuda-kat which are not a good fit in any other header. More...
#include "common.cuh"
#include <kat/detail/pointers.cuh>
#include <type_traits>
#include <limits>
#include <cassert>
Typedefs | |
template<std::size_t NumBits> | |
using | kat::detail::int_t = typename detail::integer_type_struct< true, NumBits >::type |
A templating by size of the signed integer types. | |
template<std::size_t NumBits> | |
using | kat::detail::uint_t = typename detail::integer_type_struct< false, NumBits >::type |
A templating by size of the unsigned integer types. | |
Functions | |
KAT_FD void | kat::detail::copy (uint32_t *__restrict__ destination, const uint32_t *__restrict__ source, std::size_t num_elements_to_copy) |
KAT_FD void | kat::detail::copy (uint16_t *__restrict__ destination, const uint16_t *__restrict__ source, std::size_t num_elements_to_copy) |
KAT_FD void | kat::detail::copy (uint8_t *__restrict__ destination, const uint8_t *__restrict__ source, std::size_t num_elements_to_copy) |
template<typename T , bool AssumeSameAlignmentWithinWord = false> | |
KAT_FD T * | kat::copy (T *__restrict__ destination, const T *__restrict__ source, std::size_t num_elements_to_copy) |
Copies some data from one location to another - using the native register size for individual elements on CUDA GPUs, i.e. More... | |
template<typename I > | |
constexpr KAT_FHD I | kat::num_warp_sizes_to_cover (I number_of_threads) |
Return the number of full warps in a linear grid which would, overall, contain at least a given number of threads. More... | |
Miscellaneous functions provided by cuda-kat which are not a good fit in any other header.
KAT_FD void kat::detail::copy | ( | uint32_t *__restrict__ | destination, |
const uint32_t *__restrict__ | source, | ||
std::size_t | num_elements_to_copy | ||
) |
KAT_FD void kat::detail::copy | ( | uint16_t *__restrict__ | destination, |
const uint16_t *__restrict__ | source, | ||
std::size_t | num_elements_to_copy | ||
) |
KAT_FD void kat::detail::copy | ( | uint8_t *__restrict__ | destination, |
const uint8_t *__restrict__ | source, | ||
std::size_t | num_elements_to_copy | ||
) |
KAT_FD T* kat::copy | ( | T *__restrict__ | destination, |
const T *__restrict__ | source, | ||
std::size_t | num_elements_to_copy | ||
) |
Copies some data from one location to another - using the native register size for individual elements on CUDA GPUs, i.e.
sizeof(int) = 4
destination | Destination of the copy. Must have at least 4 (num_elements_to_copy} bytes allocated. Data must be self-aligned, i.e. the numeric value of this parameter must be divisible by sizeof(T). |
source | The beginning of the memory region from which to copy. There must be sizeof(T) * {num_elements_to_copy} bytes readable starting with this address. Data must be self-aligned, i.e. the numeric value of this parameter must be divisible by sizeof(T). |
num_elements_to_copy | the number of elements of data to copy - not their total size in bytes! |
constexpr KAT_FHD I kat::num_warp_sizes_to_cover | ( | I | number_of_threads | ) |
Return the number of full warps in a linear grid which would, overall, contain at least a given number of threads.