|
cuda-kat
CUDA kernel author's tools
|
Miscellaneous functions provided by cuda-kat which are not a good fit in any other header. More...
#include "common.cuh"#include <kat/detail/pointers.cuh>#include <type_traits>#include <limits>#include <cassert>Typedefs | |
| template<std::size_t NumBits> | |
| using | kat::detail::int_t = typename detail::integer_type_struct< true, NumBits >::type |
| A templating by size of the signed integer types. | |
| template<std::size_t NumBits> | |
| using | kat::detail::uint_t = typename detail::integer_type_struct< false, NumBits >::type |
| A templating by size of the unsigned integer types. | |
Functions | |
| KAT_FD void | kat::detail::copy (uint32_t *__restrict__ destination, const uint32_t *__restrict__ source, std::size_t num_elements_to_copy) |
| KAT_FD void | kat::detail::copy (uint16_t *__restrict__ destination, const uint16_t *__restrict__ source, std::size_t num_elements_to_copy) |
| KAT_FD void | kat::detail::copy (uint8_t *__restrict__ destination, const uint8_t *__restrict__ source, std::size_t num_elements_to_copy) |
| template<typename T , bool AssumeSameAlignmentWithinWord = false> | |
| KAT_FD T * | kat::copy (T *__restrict__ destination, const T *__restrict__ source, std::size_t num_elements_to_copy) |
| Copies some data from one location to another - using the native register size for individual elements on CUDA GPUs, i.e. More... | |
| template<typename I > | |
| constexpr KAT_FHD I | kat::num_warp_sizes_to_cover (I number_of_threads) |
| Return the number of full warps in a linear grid which would, overall, contain at least a given number of threads. More... | |
Miscellaneous functions provided by cuda-kat which are not a good fit in any other header.
| KAT_FD void kat::detail::copy | ( | uint32_t *__restrict__ | destination, |
| const uint32_t *__restrict__ | source, | ||
| std::size_t | num_elements_to_copy | ||
| ) |
| KAT_FD void kat::detail::copy | ( | uint16_t *__restrict__ | destination, |
| const uint16_t *__restrict__ | source, | ||
| std::size_t | num_elements_to_copy | ||
| ) |
| KAT_FD void kat::detail::copy | ( | uint8_t *__restrict__ | destination, |
| const uint8_t *__restrict__ | source, | ||
| std::size_t | num_elements_to_copy | ||
| ) |
| KAT_FD T* kat::copy | ( | T *__restrict__ | destination, |
| const T *__restrict__ | source, | ||
| std::size_t | num_elements_to_copy | ||
| ) |
Copies some data from one location to another - using the native register size for individual elements on CUDA GPUs, i.e.
sizeof(int) = 4
| destination | Destination of the copy. Must have at least 4 (num_elements_to_copy} bytes allocated. Data must be self-aligned, i.e. the numeric value of this parameter must be divisible by sizeof(T). |
| source | The beginning of the memory region from which to copy. There must be sizeof(T) * {num_elements_to_copy} bytes readable starting with this address. Data must be self-aligned, i.e. the numeric value of this parameter must be divisible by sizeof(T). |
| num_elements_to_copy | the number of elements of data to copy - not their total size in bytes! |
| constexpr KAT_FHD I kat::num_warp_sizes_to_cover | ( | I | number_of_threads | ) |
Return the number of full warps in a linear grid which would, overall, contain at least a given number of threads.
1.8.12