6 #ifndef CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP 7 #define CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP 22 template<dimensionality_t NumDimensions>
29 template<dimensionality_t NumDimensions>
30 struct base_copy_params;
33 struct base_copy_params<2> {
34 using intra_context_type = CUDA_MEMCPY2D;
35 using type = intra_context_type;
39 struct base_copy_params<3> {
40 using type = CUDA_MEMCPY3D_PEER;
41 using intra_context_type = CUDA_MEMCPY3D;
45 template<dimensionality_t NumDimensions>
46 using base_copy_params_t =
typename base_copy_params<NumDimensions>::type;
48 template<
size_t NumDimensions>
67 template<dimensionality_t NumDimensions>
69 using parent = detail_::base_copy_params_t<NumDimensions>;
75 using intra_context_type =
typename detail_::base_copy_params<NumDimensions>::intra_context_type;
82 bool is_intra_context() const noexcept {
return parent::srcContext == parent::dstContext; }
90 set_context(endpoint_t::source, context);
91 set_context(endpoint_t::destination, context);
144 return set_endpoint(endpoint, span.data(),
dimensions_type(span.size()));
155 return set_endpoint(endpoint_t::source, array);
167 return set_endpoint_untyped(endpoint_t::source, context_handle, ptr, dimensions);
172 return set_endpoint_ptr(endpoint_t::source, context_handle, ptr);
185 return set_endpoint(endpoint_t::source, ptr, dimensions);
191 return set_endpoint(endpoint_t::source, context_handle, ptr, dimensions);
215 return set_endpoint(endpoint_t::destination, array);
227 set_endpoint_untyped(endpoint_t::destination, context_handle, ptr, dimensions);
232 set_endpoint_untyped(endpoint_t::destination, context_handle, ptr);
245 return set_endpoint(endpoint_t::destination, ptr, dimensions);
251 return set_endpoint(endpoint_t::destination, context_handle, ptr, dimensions);
264 return set_destination(span.data(), {span.size(), 1, 1});
286 clear_offset(endpoint_t::source);
287 clear_offset(endpoint_t::destination);
296 (endpoint == endpoint_t::source ? parent::srcPitch : parent::dstPitch) = pitch_in_bytes;
306 return set_bytes_pitch(endpoint, pitch_in_elements *
sizeof(T));
315 auto uniform_pitch_in_bytes { uniform_pitch_in_elements *
sizeof(T) };
316 set_pitch<T>(endpoint_t::source, uniform_pitch_in_bytes);
317 set_pitch<T>(endpoint_t::destination, uniform_pitch_in_bytes);
324 return set_bytes_pitch(endpoint, parent::WidthInBytes);
328 this_type& set_default_pitches() noexcept
330 set_default_pitch(endpoint_t::source);
331 set_default_pitch(endpoint_t::destination);
363 template <
typename T>
366 auto extent_ = bytes_extent();
368 if (extent_.width %
sizeof(T) != 0) {
369 throw ::std::invalid_argument(
370 "Attempt to get the copy extent with assumed type of size " 371 + ::std::to_string(
sizeof(T)) +
" while the byte extent's " 372 +
"minor dimension is not a multiple of this size");
375 extent_.width /=
sizeof(T);
379 this_type& set_pitches(dimension_type uniform_pitch_in_bytes) noexcept
381 set_pitch(endpoint_t::source, uniform_pitch_in_bytes);
382 set_pitch(endpoint_t::destination, uniform_pitch_in_bytes);
427 return set_endpoint_untyped(endpoint, context_handle, ptr, untyped_dims);
440 auto context_handle = context::detail_::none;
441 return set_endpoint<T>(endpoint, context_handle, ptr, dimensions);
448 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) = CU_MEMORYTYPE_ARRAY;
449 (endpoint == endpoint_t::source ? srcArray : dstArray) = array.get();
450 (endpoint == endpoint_t::source ? srcDevice : dstDevice) = array.device_id();
461 return (endpoint == endpoint_t::source) ?
462 dims_type{ params.WidthInBytes, params.Height } :
463 dims_type{ params.WidthInBytes, params.Height };
470 return (endpoint == endpoint_t::source) ?
471 dims_type{ params.srcPitch, params.Height, params.Depth } :
472 dims_type{ params.WidthInBytes, params.Height, params.Depth };
481 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) = CU_MEMORYTYPE_ARRAY;
482 (endpoint == endpoint_t::source ? srcArray : dstArray) = array.get();
483 (endpoint == endpoint_t::source ? srcContext : dstContext) = array.context_handle();
495 (endpoint == endpoint_t::source ? srcContext : dstContext) = context.handle();
508 return set_endpoint_untyped(endpoint, context_handle, ptr, untyped_dims);
518 return set_endpoint<T>(endpoint, context::current::detail_::get_handle(), ptr, dimensions);
539 WidthInBytes = extent_in_elements.width *
sizeof(T);
540 Height = extent_in_elements.height;
547 WidthInBytes = extent_in_elements.width;
548 Height = extent_in_elements.height;
555 WidthInBytes = extent_in_elements.width;
556 Height = extent_in_elements.height;
557 Depth = extent_in_elements.depth;
564 return { WidthInBytes, Height };
570 return { WidthInBytes, Height, Depth };
579 auto memory_type =
type_of(ptr);
580 if (memory_type == array) {
581 throw ::std::invalid_argument(
"Attempt to use the non-array endpoint setter with array memory at " + cuda::detail_::ptr_as_hex(ptr));
583 if (memory_type == unified_ or memory_type == device_)
585 (endpoint == endpoint_t::source ? srcDevice : dstDevice) =
device::address(ptr);
589 if (endpoint == endpoint_t::source) { srcHost = ptr; }
590 else { dstHost = ptr; }
592 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) =
593 static_cast<CUmemorytype>(memory_type == non_cuda ? host_ : memory_type);
606 set_endpoint_ptr(endpoint, context_handle, ptr);
607 set_bytes_pitch(endpoint, dimensions.
width);
608 set_bytes_extent(dimensions);
618 auto memory_type =
type_of(ptr);
619 if (memory_type == array) {
620 throw ::std::invalid_argument(
"Attempt to use the non-array endpoint setter with array memory at " + cuda::detail_::ptr_as_hex(ptr));
622 if (memory_type == unified_ or memory_type == device_)
624 (endpoint == endpoint_t::source ? srcDevice : dstDevice) =
device::address(ptr);
628 if (endpoint == endpoint_t::source) { srcHost = ptr; }
629 else { dstHost = ptr; }
631 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) =
632 static_cast<CUmemorytype> (memory_type == non_cuda ? host_ : memory_type);
633 (endpoint == endpoint_t::source ? srcContext : dstContext) = context_handle;
644 set_endpoint_ptr(endpoint, context_handle, ptr);
645 (endpoint == endpoint_t::source ? srcHeight : dstHeight) = dimensions.height;
646 set_bytes_pitch(endpoint, dimensions.
width);
647 set_bytes_extent(dimensions);
656 extent_in_elements.width *
sizeof(T),
657 extent_in_elements.height,
658 extent_in_elements.depth
660 return set_bytes_extent(extent_in_bytes);
667 (endpoint == endpoint_t::source ? srcXInBytes : dstXInBytes) = offset.width;
668 (endpoint == endpoint_t::source ? srcY : dstY) = offset.height;
669 (endpoint == endpoint_t::source ? srcZ : dstZ) = offset.depth;
677 (endpoint == endpoint_t::source ? srcXInBytes : dstXInBytes) = offset.width;
678 (endpoint == endpoint_t::source ? srcY : dstY) = offset.height;
686 dimensions_type offset_in_bytes{offset.width *
sizeof(T), offset.height, offset.depth};
687 return set_bytes_offset(endpoint, offset_in_bytes);
694 dimensions_type offset_in_bytes{offset.width *
sizeof(T), offset.height};
695 return set_bytes_offset(endpoint, offset_in_bytes);
701 if (params.srcDevice != params.dstDevice) {
702 throw ::std::invalid_argument(
"Attempt to use inter-device copy parameters for an intra-context copy");
704 if (params.srcContext != params.dstContext) {
705 throw ::std::invalid_argument(
"Attempt to use inter-context copy parameters for an intra-context copy");
711 result.srcXInBytes = params.srcXInBytes;
712 result.srcY = params.srcY;
713 result.srcZ = params.srcZ;
714 result.srcLOD = params.srcLOD;
715 result.srcMemoryType = params.srcMemoryType;
716 result.srcHost = params.srcHost;
717 result.srcDevice = params.srcDevice;
718 result.srcArray = params.srcArray;
719 result.reserved0 =
nullptr;
720 result.srcPitch = params.srcPitch;
721 result.srcHeight = params.srcHeight;
723 result.dstXInBytes = params.dstXInBytes;
724 result.dstY = params.dstY;
725 result.dstZ = params.dstZ;
726 result.dstLOD = params.dstLOD;
727 result.dstMemoryType = params.dstMemoryType;
728 result.dstHost = params.dstHost;
729 result.dstDevice = params.dstDevice;
730 result.dstArray = params.dstArray;
731 result.reserved1 =
nullptr;
732 result.dstPitch = params.dstPitch;
733 result.dstHeight = params.dstHeight;
735 result.WidthInBytes = params.WidthInBytes;
736 result.Height = params.Height;
737 result.Depth = params.Depth;
745 #endif //CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP this_type & set_destination(const cuda::array_t< T, NumDimensions > &array) noexcept
Set the source endpoint of the copy operation to be a CUDA array.
Definition: copy_parameters.hpp:213
this_type & set_endpoint(endpoint_t endpoint, span< T > span) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting at the beginning of a span of...
Definition: copy_parameters.hpp:142
endpoint_t
Type for choosing between endpoints of copy operations.
Definition: copy_parameters.hpp:19
this_type & set_single_context(const context_t &context) noexcept
Set the same context for both endpoints of the copy operation.
Definition: copy_parameters.hpp:88
Wrapper class for a CUDA context.
Definition: context.hpp:249
this_type & set_source(const cuda::array_t< T, NumDimensions > &array) noexcept
Set the source endpoint of the copy operation to be a CUDA array.
Definition: copy_parameters.hpp:153
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
this_type & set_endpoint_untyped(endpoint_t endpoint, context::handle_t context_handle, void *ptr, dimensions_type dimensions)
Set one of the copy endpoints to a multi-dimensional elements, with dimensions specified in bytes rat...
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
this_type & clear_offsets() noexcept
Clear the offsets into both the source and the destination endpoint regions.
Definition: copy_parameters.hpp:284
dimension_t width
The three constituent individual dimensions, named.
Definition: types.hpp:109
Owning wrapper for CUDA 2D and 3D arrays.
Definition: array.hpp:29
this_type & set_context(endpoint_t endpoint, const context_t &context) noexcept
Set the context for one end of the copy operation.
Dimensions for 2D CUDA arrays.
Definition: types.hpp:156
memory::type_t type_of(const void *ptr)
Determine the type of memory at a given address vis-a-vis the CUDA ecosystem: Was it allocated by the...
Definition: pointer.hpp:112
Dimensions for 3D CUDA arrays.
Definition: types.hpp:106
void set_destination_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions) noexcept
Set the destination of the copy operation to be a sequence of multi-dimensional elements, with dimensions specified in bytes rather than actual elements, starting somewhere in memory (in any CUDA memory space)
Definition: copy_parameters.hpp:225
dimension_t width
The two constituent individual dimensions, named; no "depth" for the 2D case.
Definition: types.hpp:159
dimensions_type extent() const noexcept
Definition: copy_parameters.hpp:364
dimensions_type bytes_extent() const noexcept
this_type & set_destination(T *ptr, dimensions_type dimensions) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting somewhere in memory (in any C...
Definition: copy_parameters.hpp:243
this_type & set_source(T *ptr, dimensions_type dimensions) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting somewhere in memory (in any C...
Definition: copy_parameters.hpp:183
Contains a proxy class for CUDA arrays - GPU memory with 2-D or 3-D locality and hardware support for...
this_type & set_extent(dimensions_type extent_in_elements) noexcept
Set how much is to be copied in each dimension - in elements.
this_type & set_offset(endpoint_t endpoint, dimensions_type offset) noexcept
Set the (multi-dimensional) offset, in elements, into multidimensional range of elements at one of th...
this_type & clear_offset(endpoint_t endpoint) noexcept
Set the copy operation to use the multi-dimensional region of the specified endpoint without skipping...
Definition: copy_parameters.hpp:278
A builder-ish subclass template around the basic 2D or 3D copy parameters which CUDA's complex copyin...
Definition: copy_parameters.hpp:68
this_type & set_bytes_offset(endpoint_t endpoint, dimensions_type offset) noexcept
Set the (multi-dimensional) offset, in bytes, into multidimensional range of elements at one of the e...
typename detail_::base_copy_params< NumDimensions >::intra_context_type intra_context_type
A Raw CUDA Driver API type punning the general copy parameters, which is used for copy operations wit...
Definition: copy_parameters.hpp:75
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:684
size_t dimension_t
An individual dimension extent for an array.
Definition: types.hpp:91
A wrapper class for host and/or device pointers, allowing easy access to CUDA's pointer attributes...
this_type & set_destination(span< T > span) noexcept
Set the desintation of the copy operation to a range of multi-dimensional elements, starting at the beginning of a span of memory (in any CUDA memory space)
Definition: copy_parameters.hpp:262
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
bool is_intra_context() const noexcept
Definition: copy_parameters.hpp:82
this_type & set_endpoint(endpoint_t endpoint, const cuda::array_t< T, NumDimensions > &array) noexcept
Set one of the copy endpoints to a CUDA array.
this_type & set_source(span< T > span) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting at the beginning of a span of...
Definition: copy_parameters.hpp:202
this_type & set_pitches(dimension_type uniform_pitch_in_elements) noexcept
Set the difference, in elements, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:313
this_type & set_source_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions)
Set the source of the copy operation to be a sequence of multi-dimensional elements, with dimensions specified in bytes rather than actual elements, starting somewhere in memory (in any CUDA memory space)
Definition: copy_parameters.hpp:165
this_type & set_bytes_pitch(endpoint_t endpoint, dimension_type pitch_in_bytes) noexcept
Set the difference, in bytes, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:294
this_type & set_pitch(endpoint_t endpoint, dimension_type pitch_in_elements) noexcept
Set the difference, in elements, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:304
CUDA's array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:102
void zero(void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:418
this_type & set_bytes_extent(dimensions_type extent_in_bytes) noexcept
Set how much is to be copied in each dimension - in bytes.