freestanding wrapper functions for working with CUDA's various kinds of memory spaces, arranged into a relevant namespace hierarchy. More...

#include "copy_parameters.hpp"
#include "array.hpp"
#include "constants.hpp"
#include "current_device.hpp"
#include "error.hpp"
#include "pointer.hpp"
#include "current_context.hpp"
#include "detail/unique_span.hpp"
#include <cuda_runtime.h>
#include <memory>
#include <cstring>
#include <vector>
#include <utility>

Include dependency graph for memory.hpp:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes
struct	cuda::memory::allocation_options
	options accepted by CUDA's allocator of memory with a host-side aspect (host-only or managed memory). More...

struct	cuda::memory::mapped::span_pair_t< T >
	A pair of memory spans, one in device-global memory and one in host/system memory, mapped to it. More...

struct	cuda::memory::mapped::region_pair_t
	A pair of memory regions, one in system (=host) memory and one on a CUDA device's memory - mapped to each other. More...

Namespaces
	cuda
	Definitions and functionality wrapping CUDA APIs.

	cuda::memory
	Representation, allocation and manipulation of CUDA-related memory, of different.

	cuda::memory::mapped
	Memory regions appearing in both on the host-side and device-side address spaces with the regions in both spaces mapped to each other (i.e.

	cuda::memory::device
	CUDA-Device-global memory on a single device (not accessible from the host)

	cuda::memory::host
	Host-side (= system) memory which is "pinned", i.e.

	cuda::memory::managed
	Paged memory accessible in both device-side and host-side code by triggering transfers of pages between physical system memory and physical device memory.

Typedefs
using	cuda::memory::managed::region_t = detail_::region_helper< memory::region_t >
	A child class of the generic region_t with some managed-memory-specific functionality.

using	cuda::memory::managed::const_region_t = detail_::region_helper< memory::const_region_t >
	A child class of the generic const_region_t with some managed-memory-specific functionality.

Enumerations
enum	cuda::memory::portability_across_contexts : bool { isnt_portable = false, is_portable = true }
	A memory allocation setting: Can the allocated memory be used in other CUDA driver contexts (in addition to the implicit default context we have with the Runtime API).

enum	cuda::memory::cpu_write_combining : bool { without_wc = false, with_wc = true }
	A memory allocation setting: Should the allocated memory be configured as write-combined, i.e. More...

enum	cuda::memory::host::mapped_io_space : bool { is_mapped_io_space = true, is_not_mapped_io_space = false }
	Whether or not the registration of the host-side pointer should map it into the CUDA address space for access on the device. More...

enum	cuda::memory::host::map_into_device_memory : bool { map_into_device_memory = true, do_not_map_into_device_memory = false }
	Whether or not the registration of the host-side pointer should map it into the CUDA address space for access on the device. More...

enum	cuda::memory::host::accessibility_on_all_devices : bool { cuda::memory::host::is_accessible_on_all_devices = true, cuda::memory::host::is_not_accessible_on_all_devices = false }
	Whether the allocated host-side memory should be recognized as pinned memory by all CUDA contexts, not just the (implicit Runtime API) context that performed the allocation. More...

enum	cuda::memory::managed::attachment_t : unsigned { global = CU_MEM_ATTACH_GLOBAL, host = CU_MEM_ATTACH_HOST, single_stream = CU_MEM_ATTACH_SINGLE }
	Kinds of managed memory region attachments.

Functions
void	cuda::memory::device::free (void *ptr)
	Free a region of device-side memory (regardless of how it was allocated)

void	cuda::memory::device::free (region_t region)
	Free a region of device-side memory (regardless of how it was allocated) More...

region_t	cuda::memory::device::allocate (const context_t &context, size_t size_in_bytes)
	Allocate device-side memory on a CUDA device context. More...

region_t	cuda::memory::device::allocate (const device_t &device, size_t size_in_bytes)
	Allocate device-side memory on a CUDA device. More...

template<typename T >
void	cuda::memory::device::typed_set (T *start, const T &value, size_t num_elements, optional_ref< const stream_t > stream={})
	Sets consecutive elements of a region of memory to a fixed value of some width. More...

void	cuda::memory::device::set (void *start, int byte_value, size_t num_bytes, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to a fixed value. More...

void	cuda::memory::device::set (region_t region, int byte_value, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to a fixed value. More...

void	cuda::memory::device::zero (void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to 0 (zero) More...

void	cuda::memory::device::zero (region_t region, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to 0 (zero) More...

template<typename T >
void	cuda::memory::device::zero (T *ptr, optional_ref< const stream_t > stream={})
	Sets all bytes of a single pointed-to value to 0. More...

void	cuda::memory::set (void *ptr, int byte_value, size_t num_bytes, optional_ref< const stream_t > stream={})
	Sets a number of bytes in memory to a fixed value. More...

void	cuda::memory::set (region_t region, int byte_value, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to a fixed value. More...

void	cuda::memory::zero (region_t region, optional_ref< const stream_t > stream={})
	Sets all bytes in a region of memory to 0 (zero) More...

void	cuda::memory::zero (void *ptr, size_t num_bytes, optional_ref< const stream_t > stream={})
	Zero-out a region of memory. More...

template<typename T >
void	cuda::memory::zero (T *ptr)
	Sets all bytes of a single pointed-to value to 0. More...

template<dimensionality_t NumDimensions>
void	cuda::memory::copy (copy_parameters_t< NumDimensions > params, optional_ref< const stream_t > stream={})
	An almost-generalized-case memory copy, taking a rather complex structure of copy parameters - wrapping the CUDA driver's own most-generalized-case copy. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (const array_t< T, NumDimensions > &destination, const context_t &source_context, const T *source, optional_ref< const stream_t > stream={})
	Synchronously copies data from a CUDA array into non-array memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (array_t< T, NumDimensions > &destination, const T *source, optional_ref< const stream_t > stream={})
	Synchronously copies data from a CUDA array into non-array memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (const array_t< T, NumDimensions > &destination, span< T const > source, optional_ref< const stream_t > stream={})
	Copies a contiguous sequence of elements in memory into a CUDA array. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (const context_t &context, T *destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream={})
	Synchronously copies data into a CUDA array from non-array memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (T *destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream={})
	Synchronously copies data into a CUDA array from non-array memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (span< T > destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream={})
	Copies the contents of a CUDA array into a sequence of contiguous elements in memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (const array_t< T, NumDimensions > &destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream)
	Copies the contents of one CUDA array to another. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (region_t destination, const array_t< T, NumDimensions > &source, optional_ref< const stream_t > stream={})
	Copies the contents of a CUDA array into a region of memory. More...

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::copy (array_t< T, NumDimensions > &destination, const_region_t source, optional_ref< const stream_t > stream={})
	Copies the contents of a region of memory into a CUDA array. More...

template<typename T >
void	cuda::memory::copy_single (T destination, const T source, optional_ref< const stream_t > stream={})
	Synchronously copies a single (typed) value between two memory locations. More...

void	cuda::memory::copy (void destination, void const source, size_t num_bytes, optional_ref< const stream_t > stream={})
	Asynchronously copies data between memory spaces or within a memory space. More...

template<typename T , size_t N>
void	cuda::memory::copy (c_array< T, N > &destination, const_region_t source, optional_ref< const stream_t > stream={})
	Copy the contents of memory region into a C-style array, interpreting the memory as a sequence of elements of the array's element type. More...

template<typename T , size_t N>
void	cuda::memory::copy (region_t destination, c_array< const T, N > const &source, optional_ref< const stream_t > stream={})

void	cuda::memory::copy (region_t destination, const_region_t source, size_t num_bytes, optional_ref< const stream_t > stream={})
	Asynchronously copies data between memory spaces or within a memory space. More...

void	cuda::memory::copy (region_t destination, const_region_t source, optional_ref< const stream_t > stream={})

void	cuda::memory::copy (region_t destination, void *source, optional_ref< const stream_t > stream={})
	Copy memory between memory regions. More...

void	cuda::memory::copy (region_t destination, void *source, size_t num_bytes, optional_ref< const stream_t > stream={})
	Copy one region of memory into another. More...

void	cuda::memory::copy (void *destination, const_region_t source, size_t num_bytes, optional_ref< const stream_t > stream={})
	Copy one region of memory to another location. More...

void	cuda::memory::copy (void *destination, const_region_t source, optional_ref< const stream_t > stream={})

void	cuda::memory::inter_context::copy (void destination_address, const context_t &destination_context, const void source_address, const context_t &source_context, size_t num_bytes, optional_ref< const stream_t > stream)
	Asynchronously copy a region of memory defined in one context into a region defined in another.

void	cuda::memory::inter_context::copy (void *destination, const context_t &destination_context, const_region_t source, const context_t &source_context, optional_ref< const stream_t > stream)
	Asynchronously copy a region of memory defined in one context into a region defined in another.

void	cuda::memory::inter_context::copy (region_t destination, const context_t &destination_context, const void *source, const context_t &source_context, optional_ref< const stream_t > stream)
	Asynchronously copy a region of memory defined in one context into a region defined in another.

void	cuda::memory::inter_context::copy (region_t destination, const context_t &destination_context, const_region_t source, const context_t &source_context, optional_ref< const stream_t > stream)
	Asynchronously copy a region of memory defined in one context into a region defined in another.

template<typename T , dimensionality_t NumDimensions>
void	cuda::memory::inter_context::copy (array_t< T, NumDimensions > destination, array_t< T, NumDimensions > source, optional_ref< const stream_t > stream)
	Asynchronously copy a CUDA array defined in one context into a CUDA array defined in another.

region_t	cuda::memory::host::allocate (size_t size_in_bytes, allocation_options options)
	Allocates pinned host memory. More...

region_t	cuda::memory::host::allocate (size_t size_in_bytes, portability_across_contexts portability=portability_across_contexts(false), cpu_write_combining cpu_wc=cpu_write_combining(false))
	Allocates pinned host memory. More...

region_t	cuda::memory::host::allocate (size_t size_in_bytes, cpu_write_combining cpu_wc)
	Allocates pinned host memory. More...

void	cuda::memory::host::free (void *host_ptr)
	Frees a region of pinned host memory which was allocated with one of the pinned host memory allocation functions. More...

void	cuda::memory::host::free (region_t region)
	Frees a region of pinned host memory which was allocated with one of the pinned host memory allocation functions. More...

void	cuda::memory::host::register_ (const void *ptr, size_t size, bool register_mapped_io_space, bool map_into_device_space, bool make_device_side_accessible_to_all)
	Register a memory region with the CUDA driver. More...

void	cuda::memory::host::register_ (const_region_t region, bool register_mapped_io_space, bool map_into_device_space, bool make_device_side_accessible_to_all)
	Register a memory region with the CUDA driver. More...

void	cuda::memory::host::register_ (void const *ptr, size_t size)
	Register a memory region with the CUDA driver. More...

void	cuda::memory::host::register_ (const_region_t region)
	Register a memory region with the CUDA driver. More...

void	cuda::memory::host::deregister (const void *ptr)
	Have the CUDA driver "forget" about a region of memory which was previously registered with it, and page-unlock it. More...

void	cuda::memory::host::deregister (const_region_t region)
	Have the CUDA driver "forget" about a region of memory which was previously registered with it, and page-unlock it. More...

void	cuda::memory::managed::advise_expected_access_by (const_region_t region, device_t &device)
	Advice the CUDA driver that `device` is expected to access `region`.

void	cuda::memory::managed::advise_no_access_expected_by (const_region_t region, device_t &device)
	Advice the CUDA driver that `device` is not expected to access `region`.

template<typename Allocator = ::std::allocator<cuda::device_t>>
typename ::std::vector< device_t, Allocator >	cuda::memory::managed::expected_accessors (const_region_t region, const Allocator &allocator=Allocator())

region_t	cuda::memory::managed::allocate (const context_t &context, size_t num_bytes, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices)
	Allocate a a region of managed memory, accessible with the same address on the host and on CUDA devices. More...

region_t	cuda::memory::managed::allocate (const device_t &device, size_t num_bytes, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices)
	Allocate a a region of managed memory, accessible with the same address on the host and on CUDA devices. More...

region_t	cuda::memory::managed::allocate (size_t num_bytes)
	Allocate a a region of managed memory, accessible with the same address on the host and on all CUDA devices. More...

void	cuda::memory::managed::free (void *managed_ptr)
	Free a managed memory region (host-side and device-side regions on all devices where it was allocated, all with the same address) which was allocated with allocate.

void	cuda::memory::managed::free (region_t region)
	Free a managed memory region (host-side and device-side regions on all devices where it was allocated, all with the same address) which was allocated with allocate. More...

void	cuda::memory::managed::prefetch (const_region_t region, const cuda::device_t &destination, const stream_t &stream)
	Prefetches a region of managed memory to a specific device, so it can later be used there without waiting for I/O from the host or other devices.

void	cuda::memory::managed::prefetch_to_host (const_region_t region, const stream_t &stream)
	Prefetches a region of managed memory into host memory. More...

template<typename T >
T *	cuda::memory::mapped::device_side_pointer_for (T *host_memory_ptr)
	Obtain a pointer in the device-side memory space (= address range) given given a host-side pointer mapped to it. More...

region_t	cuda::memory::mapped::device_side_region_for (region_t region)
	Get the memory region mapped to a given host-side region. More...

const_region_t	cuda::memory::mapped::device_side_region_for (const_region_t region)
	Get the memory region mapped to a given host-side region. More...

region_pair_t	cuda::memory::mapped::allocate (cuda::context_t &context, size_t size_in_bytes, allocation_options options)
	Allocate a memory region on the host, which is also mapped to a memory region in a context of some CUDA device - so that changes to one will be reflected in the other. More...

region_pair_t	cuda::memory::mapped::allocate (cuda::device_t &device, size_t size_in_bytes, allocation_options options=allocation_options{})
	Allocate a memory region on the host, which is also mapped to a memory region in the global memory of a CUDA device - so that changes to one will be reflected in the other. More...

void	cuda::memory::mapped::free (region_pair_t pair)
	Free a pair of mapped memory regions. More...

void	cuda::memory::mapped::free_region_pair_of (void *ptr)
	Free a pair of mapped memory regions using just one of them. More...

bool	cuda::memory::mapped::is_part_of_a_region_pair (const void *ptr)
	Determine whether a given stretch of memory was allocated as part of a mapped pair of host and device memory regions. More...

template<typename T >
unique_span< T >	cuda::memory::device::make_unique_span (const context_t &context, size_t size)
	Allocate memory for a consecutive sequence of typed elements in device-global memory. More...

template<typename T >
unique_span< T >	cuda::memory::device::make_unique_span (const device_t &device, size_t size)
	Allocate memory for a consecutive sequence of typed elements in device-global memory. More...

template<typename T >
unique_span< T >	cuda::memory::device::make_unique_span (size_t size)
	Allocate memory for a consecutive sequence of typed elements in device-global memory. More...

template<typename T >
unique_span< T >	cuda::memory::make_unique_span (const context_t &context, size_t size)
	See `device::make_unique_span(const context_t& context, size_t size)`

template<typename T >
unique_span< T >	cuda::memory::make_unique_span (const device_t &device, size_t size)
	See `device::make_unique_span(const context_t& context, size_t num_elements)`

template<typename T >
unique_span< T >	cuda::memory::host::make_unique_span (size_t size)
	Allocate memory for a consecutive sequence of typed elements in system (host-side) memory. More...

template<typename T >
unique_span< T >	cuda::memory::managed::make_unique_span (const context_t &context, size_t size, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices)
	Allocate memory for a consecutive sequence of typed elements in system (host-side) memory. More...

template<typename T >
unique_span< T >	cuda::memory::managed::make_unique_span (const device_t &device, size_t size, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices)
	See `device::make_unique_span(const context_t& context, size_t size)` More...

template<typename T >
unique_span< T >	cuda::memory::managed::make_unique_span (size_t size, initial_visibility_t initial_visibility=initial_visibility_t::to_all_devices)
	See `device::make_unique_span(const context_t& context, size_t size)` More...

template<typename T >
memory::region_t	cuda::symbol::locate (T &&symbol)
	Locates a CUDA symbol in global or constant device memory. More...


template<typename T , size_t N>
void	cuda::memory::copy (span< T > destination, c_array< const T, N > const &source, optional_ref< const stream_t > stream={})
	Copy the contents of a C-style array into a span of same-type elements. More...

template<typename T , size_t N>
void	cuda::memory::copy (c_array< T, N > &destination, span< T const > source, optional_ref< const stream_t > stream={})
	Copy the contents of a span into a C-style array. More...

template<typename T , size_t N>
void	cuda::memory::copy (void *destination, c_array< const T, N > const &source, optional_ref< const stream_t > stream={})
	Copy the contents of a C-style array to another location in memory. More...

template<typename T , size_t N>
void	cuda::memory::copy (c_array< T, N > &destination, T *source, optional_ref< const stream_t > stream={})
	Copy memory into a C-style array. More...


void	cuda::memory::host::set (void *start, int byte_value, size_t num_bytes)
	Sets all bytes in a stretch of host-side memory to a single value. More...

void	cuda::memory::host::set (region_t region, int byte_value)

void	cuda::memory::host::zero (void *start, size_t num_bytes)
	Zero-out a region of host memory. More...

void	cuda::memory::host::zero (region_t region)
	Zero-out a region of host memory. More...

template<typename T >
void	cuda::memory::host::zero (T *ptr)
	Asynchronously sets all bytes of a single pointed-to value to 0 (zero). More...

Detailed Description

freestanding wrapper functions for working with CUDA's various kinds of memory spaces, arranged into a relevant namespace hierarchy.

Note: Some of the CUDA API for allocating and copying memory involves the concept of "pitch" and "pitched pointers". To better understand what that means, consider the following two-dimensional representation of an array (which is in fact embedded in linear memory):

X X X X * * * X X X X * * * X X X X * * *

= padding element X = actually used element of the array

The pitch in the example above is 7 * sizeof(T) The width is 4 * sizeof(T) The height is 3

Function Documentation

◆ locate()

template<typename T >

memory::region_t cuda::symbol::locate ( T && symbol )

Locates a CUDA symbol in global or constant device memory.

Note: symbol_t symbols are associated with the primary context

Returns: The region of memory CUDA associates with the symbol

Classes

Namespaces

Typedefs

Enumerations

Functions

Detailed Description

Function Documentation

◆ locate()