cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
|
Implementations requiring the definitions of multiple CUDA entity proxy classes, in the cuda::memory
namespace.
More...
#include "context.hpp"
#include "ipc.hpp"
#include <cuda_runtime_api.h>
#include "../memory.hpp"
#include "../array.hpp"
#include "../device.hpp"
#include "../event.hpp"
#include "../pointer.hpp"
#include "../stream.hpp"
#include "../primary_context.hpp"
#include "../kernel.hpp"
#include "../virtual_memory.hpp"
#include "../memory_pool.hpp"
#include "../current_device.hpp"
Go to the source code of this file.
Namespaces | |
cuda | |
Definitions and functionality wrapping CUDA APIs. | |
cuda::memory | |
Representation, allocation and manipulation of CUDA-related memory, of different. | |
cuda::memory::device | |
CUDA-Device-global memory on a single device (not accessible from the host) | |
cuda::memory::managed | |
Paged memory accessible in both device-side and host-side code by triggering transfers of pages between physical system memory and physical device memory. | |
cuda::memory::mapped | |
Memory regions appearing in both on the host-side and device-side address spaces with the regions in both spaces mapped to each other (i.e. | |
cuda::memory::host | |
Host-side (= system) memory which is "pinned", i.e. | |
Functions | |
template<typename T , dimensionality_t NumDimensions> | |
void | cuda::memory::copy (array_t< T, NumDimensions > &destination, span< T const > source, optional_ref< const stream_t > stream) |
template<typename T , dimensionality_t NumDimensions> | |
void | cuda::memory::copy (T *destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream={}) |
Synchronously copies data into a CUDA array from non-array memory. More... | |
template<dimensionality_t NumDimensions> | |
void | cuda::memory::copy (copy_parameters_t< NumDimensions > params, optional_ref< const stream_t > stream={}) |
An almost-generalized-case memory copy, taking a rather complex structure of copy parameters - wrapping the CUDA driver's own most-generalized-case copy. More... | |
template<typename T > | |
void | cuda::memory::copy_single (T *destination, const T *source, optional_ref< const stream_t > stream={}) |
Synchronously copies a single (typed) value between two memory locations. More... | |
template<typename T , dimensionality_t NumDimensions> | |
void | cuda::memory::copy (array_t< T, NumDimensions > &destination, const T *source, optional_ref< const stream_t > stream={}) |
Synchronously copies data from a CUDA array into non-array memory. More... | |
void | cuda::memory::copy (void *destination, void const *source, size_t num_bytes, optional_ref< const stream_t > stream={}) |
Asynchronously copies data between memory spaces or within a memory space. More... | |
region_t | cuda::memory::device::allocate (const context_t &context, size_t size_in_bytes) |
Allocate device-side memory on a CUDA device context. More... | |
region_t | cuda::memory::device::allocate (const device_t &device, size_t size_in_bytes) |
Allocate device-side memory on a CUDA device. More... | |
void | cuda::memory::device::free (void *ptr) |
Free a region of device-side memory (regardless of how it was allocated) | |
void | cuda::memory::inter_context::copy (void *destination_address, const context_t &destination_context, const void *source_address, const context_t &source_context, size_t num_bytes, optional_ref< const stream_t > stream) |
Asynchronously copy a region of memory defined in one context into a region defined in another. | |
void | cuda::memory::managed::advise_expected_access_by (const_region_t region, device_t &device) |
Advice the CUDA driver that device is expected to access region . | |
void | cuda::memory::managed::advise_no_access_expected_by (const_region_t region, device_t &device) |
Advice the CUDA driver that device is not expected to access region . | |
template<typename Allocator > | |
::std::vector< device_t, Allocator > | cuda::memory::managed::expected_accessors (const_region_t region, const Allocator &allocator) |
void | cuda::memory::managed::prefetch (const_region_t region, const cuda::device_t &destination, const stream_t &stream) |
Prefetches a region of managed memory to a specific device, so it can later be used there without waiting for I/O from the host or other devices. | |
void | cuda::memory::managed::prefetch_to_host (const_region_t region, const stream_t &stream) |
Prefetches a region of managed memory into host memory. More... | |
region_t | cuda::memory::managed::allocate (const context_t &context, size_t num_bytes, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices) |
Allocate a a region of managed memory, accessible with the same address on the host and on CUDA devices. More... | |
region_t | cuda::memory::managed::allocate (const device_t &device, size_t num_bytes, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices) |
Allocate a a region of managed memory, accessible with the same address on the host and on CUDA devices. More... | |
region_t | cuda::memory::managed::allocate (size_t num_bytes) |
Allocate a a region of managed memory, accessible with the same address on the host and on all CUDA devices. More... | |
region_pair_t | cuda::memory::mapped::allocate (cuda::device_t &device, size_t size_in_bytes, allocation_options options=allocation_options{}) |
Allocate a memory region on the host, which is also mapped to a memory region in the global memory of a CUDA device - so that changes to one will be reflected in the other. More... | |
region_pair_t | cuda::memory::mapped::allocate (cuda::context_t &context, size_t size_in_bytes, allocation_options options) |
Allocate a memory region on the host, which is also mapped to a memory region in a context of some CUDA device - so that changes to one will be reflected in the other. More... | |
region_t | cuda::memory::host::allocate (size_t size_in_bytes, allocation_options options) |
Allocates pinned host memory. More... | |
template<typename T > | |
void | cuda::memory::device::typed_set (T *start, const T &value, size_t num_elements, optional_ref< const stream_t > stream={}) |
Sets consecutive elements of a region of memory to a fixed value of some width. More... | |
void | cuda::memory::set (void *ptr, int byte_value, size_t num_bytes, optional_ref< const stream_t > stream={}) |
Sets a number of bytes in memory to a fixed value. More... | |
Implementations requiring the definitions of multiple CUDA entity proxy classes, in the cuda::memory
namespace.