6 #ifndef CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP 7 #define CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP 22 template<dimensionality_t NumDimensions>
29 template<dimensionality_t NumDimensions>
30 struct base_copy_params;
33 struct base_copy_params<2> {
34 using intra_context_type = CUDA_MEMCPY2D;
35 using type = intra_context_type;
39 struct base_copy_params<3> {
40 using type = CUDA_MEMCPY3D_PEER;
41 using intra_context_type = CUDA_MEMCPY3D;
45 template<dimensionality_t NumDimensions>
46 using base_copy_params_t =
typename base_copy_params<NumDimensions>::type;
48 template<
size_t NumDimensions>
67 template<dimensionality_t NumDimensions>
69 using parent = detail_::base_copy_params_t<NumDimensions>;
75 using intra_context_type =
typename detail_::base_copy_params<NumDimensions>::intra_context_type;
82 bool is_intra_context() const noexcept {
return parent::srcContext == parent::dstContext; }
90 set_context(endpoint_t::source, context);
91 set_context(endpoint_t::destination, context);
142 return set_endpoint(endpoint, span.data(),
dimensions_type(span.size()));
153 return set_endpoint(endpoint_t::source, array);
165 return set_endpoint_untyped(endpoint_t::source, context_handle, ptr, dimensions);
178 return set_endpoint(endpoint_t::source, ptr, dimensions);
184 return set_endpoint(endpoint_t::source, context_handle, ptr, dimensions);
208 return set_endpoint(endpoint_t::destination, array);
223 set_endpoint_untyped(endpoint_t::destination, context_handle, ptr, dimensions);
236 return set_endpoint(endpoint_t::destination, ptr, dimensions);
242 return set_endpoint(endpoint_t::destination, context_handle, ptr, dimensions);
255 return set_destination(span.data(), {span.size(), 1, 1});
277 clear_offset(endpoint_t::source);
278 clear_offset(endpoint_t::destination);
287 (endpoint == endpoint_t::source ? parent::srcPitch : parent::dstPitch) = pitch_in_bytes;
297 return set_bytes_pitch(endpoint, pitch_in_elements *
sizeof(T));
306 auto uniform_pitch_in_bytes { uniform_pitch_in_elements *
sizeof(T) };
307 set_pitch<T>(endpoint_t::source, uniform_pitch_in_bytes);
308 set_pitch<T>(endpoint_t::destination, uniform_pitch_in_bytes);
315 return set_bytes_pitch(endpoint, parent::WidthInBytes);
319 this_type& set_default_pitches() noexcept
321 set_default_pitch(endpoint_t::source);
322 set_default_pitch(endpoint_t::destination);
354 template <
typename T>
357 auto extent_ = bytes_extent();
359 if (extent_.width %
sizeof(T) != 0) {
360 throw ::std::invalid_argument(
361 "Attempt to get the copy extent with assumed type of size " 362 + ::std::to_string(
sizeof(T)) +
" while the byte extent's " 363 +
"minor dimension is not a multiple of this size");
366 extent_.width /=
sizeof(T);
370 this_type& set_pitches(dimension_type uniform_pitch_in_bytes) noexcept
372 set_pitch(endpoint_t::source, uniform_pitch_in_bytes);
373 set_pitch(endpoint_t::destination, uniform_pitch_in_bytes);
406 return set_endpoint_untyped(endpoint, context_handle, ptr, untyped_dims);
419 auto context_handle = context::detail_::none;
420 return set_endpoint<T>(endpoint, context_handle, ptr, dimensions);
427 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) = CU_MEMORYTYPE_ARRAY;
428 (endpoint == endpoint_t::source ? srcArray : dstArray) = array.get();
429 (endpoint == endpoint_t::source ? srcDevice : dstDevice) = array.device_id();
440 return (endpoint == endpoint_t::source) ?
441 dims_type{ params.WidthInBytes, params.Height } :
442 dims_type{ params.WidthInBytes, params.Height };
449 return (endpoint == endpoint_t::source) ?
450 dims_type{ params.srcPitch, params.Height, params.Depth } :
451 dims_type{ params.WidthInBytes, params.Height, params.Depth };
460 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) = CU_MEMORYTYPE_ARRAY;
461 (endpoint == endpoint_t::source ? srcArray : dstArray) = array.get();
462 (endpoint == endpoint_t::source ? srcContext : dstContext) = array.context_handle();
474 (endpoint == endpoint_t::source ? srcContext : dstContext) = context.handle();
487 return set_endpoint_untyped(endpoint, context_handle, ptr, untyped_dims);
497 return set_endpoint<T>(endpoint, context::current::detail_::get_handle(), ptr, dimensions);
518 WidthInBytes = extent_in_elements.width *
sizeof(T);
519 Height = extent_in_elements.height;
526 WidthInBytes = extent_in_elements.width;
527 Height = extent_in_elements.height;
534 WidthInBytes = extent_in_elements.width;
535 Height = extent_in_elements.height;
536 Depth = extent_in_elements.depth;
560 if (memory_type == memory::type_t::array) {
561 throw ::std::invalid_argument(
"Attempt to use the non-array endpoint setter with array memory at " + cuda::detail_::ptr_as_hex(ptr));
563 if (memory_type == memory::type_t::unified_ or memory_type == type_t::device_)
565 (endpoint == endpoint_t::source ? srcDevice : dstDevice) =
device::address(ptr);
569 if (endpoint == endpoint_t::source) { srcHost = ptr; }
570 else { dstHost = ptr; }
572 set_bytes_pitch(endpoint, dimensions.
width);
573 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) = static_cast<CUmemorytype>
574 (memory_type == memory::type_t::non_cuda ? memory::type_t::host_ : memory_type);
578 if (bytes_extent().area() == 0) {
579 set_bytes_extent(dimensions);
592 if (memory_type == memory::type_t::array) {
593 throw ::std::invalid_argument(
"Attempt to use the non-array endpoint setter with array memory at " + cuda::detail_::ptr_as_hex(ptr));
595 if (memory_type == memory::type_t::unified_ or memory_type == type_t::device_)
597 (endpoint == endpoint_t::source ? srcDevice : dstDevice) =
device::address(ptr);
601 if (endpoint == endpoint_t::source) { srcHost = ptr; }
602 else { dstHost = ptr; }
604 set_bytes_pitch(endpoint, dimensions.
width);
605 (endpoint == endpoint_t::source ? srcHeight : dstHeight) = dimensions.height;
606 (endpoint == endpoint_t::source ? srcMemoryType : dstMemoryType) =
static_cast<CUmemorytype
> 607 (memory_type == memory::type_t::non_cuda ? memory::type_t::host_ : memory_type);
608 (endpoint == endpoint_t::source ? srcContext : dstContext) = context_handle;
610 if (bytes_extent().volume() == 0) {
611 set_bytes_extent(dimensions);
623 extent_in_elements.width *
sizeof(T),
624 extent_in_elements.height,
625 extent_in_elements.depth
627 return set_bytes_extent(extent_in_bytes);
634 (endpoint == endpoint_t::source ? srcXInBytes : dstXInBytes) = offset.width;
635 (endpoint == endpoint_t::source ? srcY : dstY) = offset.height;
636 (endpoint == endpoint_t::source ? srcZ : dstZ) = offset.depth;
644 (endpoint == endpoint_t::source ? srcXInBytes : dstXInBytes) = offset.width;
645 (endpoint == endpoint_t::source ? srcY : dstY) = offset.height;
653 dimensions_type offset_in_bytes{offset.width *
sizeof(T), offset.height, offset.depth};
654 return set_bytes_offset(endpoint, offset_in_bytes);
661 dimensions_type offset_in_bytes{offset.width *
sizeof(T), offset.height};
662 return set_bytes_offset(endpoint, offset_in_bytes);
668 if (params.srcDevice != params.dstDevice) {
669 throw ::std::invalid_argument(
"Attempt to use inter-device copy parameters for an intra-context copy");
671 if (params.srcContext != params.dstContext) {
672 throw ::std::invalid_argument(
"Attempt to use inter-context copy parameters for an intra-context copy");
678 result.srcXInBytes = params.srcXInBytes;
679 result.srcY = params.srcY;
680 result.srcZ = params.srcZ;
681 result.srcLOD = params.srcLOD;
682 result.srcMemoryType = params.srcMemoryType;
683 result.srcHost = params.srcHost;
684 result.srcDevice = params.srcDevice;
685 result.srcArray = params.srcArray;
686 result.reserved0 =
nullptr;
687 result.srcPitch = params.srcPitch;
688 result.srcHeight = params.srcHeight;
690 result.dstXInBytes = params.dstXInBytes;
691 result.dstY = params.dstY;
692 result.dstZ = params.dstZ;
693 result.dstLOD = params.dstLOD;
694 result.dstMemoryType = params.dstMemoryType;
695 result.dstHost = params.dstHost;
696 result.dstDevice = params.dstDevice;
697 result.dstArray = params.dstArray;
698 result.reserved1 =
nullptr;
699 result.dstPitch = params.dstPitch;
700 result.dstHeight = params.dstHeight;
702 result.WidthInBytes = params.WidthInBytes;
703 result.Height = params.Height;
704 result.Depth = params.Depth;
712 #endif //CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP this_type & set_destination(const cuda::array_t< T, NumDimensions > &array) noexcept
Set the source endpoint of the copy operation to be a CUDA array.
Definition: copy_parameters.hpp:206
this_type & set_endpoint(endpoint_t endpoint, span< T > span) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting at the beginning of a span of...
Definition: copy_parameters.hpp:140
endpoint_t
Type for choosing between endpoints of copy operations.
Definition: copy_parameters.hpp:19
this_type & set_single_context(const context_t &context) noexcept
Set the same context for both endpoints of the copy operation.
Definition: copy_parameters.hpp:88
Wrapper class for a CUDA context.
Definition: context.hpp:244
this_type & set_source(const cuda::array_t< T, NumDimensions > &array) noexcept
Set the source endpoint of the copy operation to be a CUDA array.
Definition: copy_parameters.hpp:151
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
this_type & set_endpoint_untyped(endpoint_t endpoint, context::handle_t context_handle, void *ptr, dimensions_type dimensions)
Set one of the copy endpoints to a multi-dimensional elements, with dimensions specified in bytes rat...
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
this_type & clear_offsets() noexcept
Clear the offsets into both the source and the destination endpoint regions.
Definition: copy_parameters.hpp:275
dimension_t width
The three constituent individual dimensions, named.
Definition: types.hpp:112
Owning wrapper for CUDA 2D and 3D arrays.
Definition: array.hpp:29
this_type & set_context(endpoint_t endpoint, const context_t &context) noexcept
Set the context for one end of the copy operation.
Dimensions for 2D CUDA arrays.
Definition: types.hpp:159
memory::type_t type_of(const void *ptr)
Determine the type of memory at a given address vis-a-vis the CUDA ecosystem: Was it allocated by the...
Definition: pointer.hpp:112
Dimensions for 3D CUDA arrays.
Definition: types.hpp:109
void set_destination_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions) noexcept
Set the destination of the copy operation to be a sequence of multi-dimensional elements, with dimensions specified in bytes rather than actual elements, starting somewhere in memory (in any CUDA memory space)
Definition: copy_parameters.hpp:218
dimension_t width
The two constituent individual dimensions, named; no "depth" for the 2D case.
Definition: types.hpp:162
dimensions_type extent() const noexcept
Definition: copy_parameters.hpp:355
dimensions_type bytes_extent() const noexcept
this_type & set_destination(T *ptr, dimensions_type dimensions) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting somewhere in memory (in any C...
Definition: copy_parameters.hpp:234
this_type & set_source(T *ptr, dimensions_type dimensions) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting somewhere in memory (in any C...
Definition: copy_parameters.hpp:176
Contains a proxy class for CUDA arrays - GPU memory with 2-D or 3-D locality and hardware support for...
this_type & set_extent(dimensions_type extent_in_elements) noexcept
Set how much is to be copied in each dimension - in elements.
this_type & set_offset(endpoint_t endpoint, dimensions_type offset) noexcept
Set the (multi-dimensional) offset, in elements, into multidimensional range of elements at one of th...
this_type & clear_offset(endpoint_t endpoint) noexcept
Set the copy operation to use the multi-dimensional region of the specified endpoint without skipping...
Definition: copy_parameters.hpp:269
A builder-ish subclass template around the basic 2D or 3D copy parameters which CUDA's complex copyin...
Definition: copy_parameters.hpp:68
this_type & set_bytes_offset(endpoint_t endpoint, dimensions_type offset) noexcept
Set the (multi-dimensional) offset, in bytes, into multidimensional range of elements at one of the e...
typename detail_::base_copy_params< NumDimensions >::intra_context_type intra_context_type
A Raw CUDA Driver API type punning the general copy parameters, which is used for copy operations wit...
Definition: copy_parameters.hpp:75
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682
size_t dimension_t
An individual dimension extent for an array.
Definition: types.hpp:94
A wrapper class for host and/or device pointers, allowing easy access to CUDA's pointer attributes...
this_type & set_destination(span< T > span) noexcept
Set the desintation of the copy operation to a range of multi-dimensional elements, starting at the beginning of a span of memory (in any CUDA memory space)
Definition: copy_parameters.hpp:253
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
bool is_intra_context() const noexcept
Definition: copy_parameters.hpp:82
this_type & set_endpoint(endpoint_t endpoint, const cuda::array_t< T, NumDimensions > &array) noexcept
Set one of the copy endpoints to a CUDA array.
this_type & set_source(span< T > span) noexcept
Set one of the copy endpoints to a multi-dimensional elements, starting at the beginning of a span of...
Definition: copy_parameters.hpp:195
this_type & set_pitches(dimension_type uniform_pitch_in_elements) noexcept
Set the difference, in elements, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:304
this_type & set_source_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions)
Set the source of the copy operation to be a sequence of multi-dimensional elements, with dimensions specified in bytes rather than actual elements, starting somewhere in memory (in any CUDA memory space)
Definition: copy_parameters.hpp:163
this_type & set_bytes_pitch(endpoint_t endpoint, dimension_type pitch_in_bytes) noexcept
Set the difference, in bytes, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:285
this_type & set_pitch(endpoint_t endpoint, dimension_type pitch_in_elements) noexcept
Set the difference, in elements, between the beginning of sequences of the minor-most dimension...
Definition: copy_parameters.hpp:295
CUDA's array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:105
void zero(void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:416
this_type & set_bytes_extent(dimensions_type extent_in_bytes) noexcept
Set how much is to be copied in each dimension - in bytes.