cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
device.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef MULTI_WRAPPER_IMPLS_DEVICE_HPP_
12 #define MULTI_WRAPPER_IMPLS_DEVICE_HPP_
13 
14 #include "../device.hpp"
15 #include "../error.hpp"
16 #include "../event.hpp"
17 #include "../kernel_launch.hpp"
18 #include "../stream.hpp"
19 #include "../primary_context.hpp"
20 #include "../current_context.hpp"
21 #include "../current_device.hpp"
22 #include "../peer_to_peer.hpp"
23 
24 #include "../types.hpp"
25 
26 #include <string>
27 
28 namespace cuda {
29 
30 namespace device {
31 
32 namespace primary_context {
33 
34 inline bool is_active(const device_t& device)
35 {
36  return detail_::is_active(device.id());
37 }
38 
39 inline void destroy(const device_t& device)
40 {
41  auto status = cuDevicePrimaryCtxReset(device.id());
42  throw_if_error_lazy(status, "Failed destroying/resetting the primary context of device " + ::std::to_string(device.id()));
43 }
44 
45 inline primary_context_t get(const device_t& device)
46 {
47  auto pc_handle = detail_::get_handle(device.id(), true);
48  return detail_::wrap( device.id(), pc_handle, true);
49 }
50 
51 namespace detail_ {
52 
53 // Use this when you need a PC, you don't have a device_t to hang it on,
54 // and you don't want it to get deactivated/destroyed right after you use it.
55 inline primary_context_t leaky_get(cuda::device::id_t device_id)
56 {
57  bool need_to_activate_and_leak = not cuda::device::primary_context::detail_::is_active(device_id);
58  auto pc_handle = cuda::device::primary_context::detail_::get_handle(device_id, true);
59  return cuda::device::primary_context::detail_::wrap(device_id, pc_handle, not need_to_activate_and_leak);
60 }
61 
62 } // namespace detail_
63 
64 } // namespace primary_context
65 
66 namespace peer_to_peer {
67 
68 inline bool can_access(const device_t& accessor, const device_t& peer)
69 {
70  return detail_::can_access(accessor.id(), peer.id());
71 }
72 
73 inline void enable_access(const device_t& accessor, const device_t& peer)
74 {
76 }
77 
78 inline void disable_access(const device_t& accessor, const device_t& peer)
79 {
80 #ifndef NDEBUG
81  if (accessor == peer) {
82  throw ::std::invalid_argument("A device cannot be used as its own peer");
83  }
84 #endif
86 }
87 
88 inline bool can_access_each_other(const device_t& first, const device_t& second)
89 {
90  return can_access(first, second) and can_access(second, first);
91 }
92 
93 inline void enable_bidirectional_access(const device_t& first, const device_t& second)
94 {
95 #ifndef NDEBUG
96  if (first == second) {
97  throw ::std::invalid_argument("A device cannot be used as its own peer");
98  }
99 #endif
101 }
102 
103 inline void disable_bidirectional_access(const device_t& first, const device_t& second)
104 {
105 #ifndef NDEBUG
106  if (first == second) {
107  throw ::std::invalid_argument("A device cannot be used as its own peer");
108  }
109 #endif
111 }
112 
113 inline attribute_value_t get_attribute(attribute_t attribute, const device_t& first, const device_t& second)
114 {
115 #ifndef NDEBUG
116  if (first == second) {
117  throw ::std::invalid_argument("A device cannot be used as its own peer");
118  }
119 #endif
120  return detail_::get_attribute(attribute, first.id(), second.id());
121 }
122 
123 } // namespace peer_to_peer
124 
126 {
127  return stream::wrap(device_id_, handle_, stream::default_stream_handle);
128 }
129 
130 } // namespace device
131 
132 // device_t methods
133 
134 inline stream_t device_t::default_stream(bool hold_primary_context_refcount_unit) const
135 {
136  auto pc = primary_context();
137  if (hold_primary_context_refcount_unit) {
138  device::primary_context::detail_::increase_refcount(id_);
139  }
140  return stream::wrap(
141  id(), pc.handle(), stream::default_stream_handle,
142  do_not_take_ownership, hold_primary_context_refcount_unit);
143 }
144 
146  bool will_synchronize_with_default_stream,
147  stream::priority_t priority) const
148 {
149  return stream::create(*this, will_synchronize_with_default_stream, priority);
150 }
151 
152 inline device::primary_context_t device_t::primary_context(bool hold_pc_refcount_unit) const
153 {
154  auto pc_handle = primary_context_handle();
155  if (hold_pc_refcount_unit) {
156  device::primary_context::detail_::increase_refcount(id_);
157  // Q: Why increase the refcount here, when `primary_context_handle()`
158  // ensured this has already happened for this object?
159  // A: Because an unscoped primary_context_t needs its own refcount
160  // unit (e.g. in case this object gets destructed but the
161  // primary_context_t is still alive).
162  }
163  return device::primary_context::detail_::wrap(id_, pc_handle, hold_pc_refcount_unit);
164 }
165 
166 inline void synchronize(const device_t& device)
167 {
168  auto pc = device.primary_context();
169  CAW_SET_SCOPE_CONTEXT(pc.handle());
170  context::current::detail_::synchronize(device.id(), pc.handle());
171 }
172 
173 template <typename Kernel, typename ... KernelParameters>
175  Kernel kernel,
176  launch_configuration_t launch_configuration,
177  KernelParameters... parameters) const
178 {
179  auto pc = primary_context();
180  pc.launch(kernel, launch_configuration, parameters...);
181 }
182 
184  context::host_thread_sync_scheduling_policy_t sync_scheduling_policy,
185  bool keep_larger_local_mem_after_resize) const
186 {
187  return context::create(*this, sync_scheduling_policy, keep_larger_local_mem_after_resize);
188 }
189 
191  bool uses_blocking_sync,
192  bool records_timing,
193  bool interprocess)
194 {
195  // The current implementation of event::create is not super-smart,
196  // but it's probably not worth it trying to improve just this function
197  return event::create(*this, uses_blocking_sync, records_timing, interprocess);
198 }
199 
200 } // namespace cuda
201 
202 #endif // MULTI_WRAPPER_IMPLS_DEVICE_HPP_
203 
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:860
stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device.
Definition: stream.hpp:57
Proxy class for a CUDA stream.
Definition: stream.hpp:246
Wrapper class for a CUDA context.
Definition: context.hpp:244
bool can_access_each_other(const device_t &first, const device_t &second)
Determine whether two CUDA devices can currently access each other.
Definition: device.hpp:88
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246
bool can_access(const device_t &accessor, const device_t &peer)
Determine whether one CUDA device can access the global memory of another CUDA device.
Definition: device.hpp:68
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
context_t create_context(context::host_thread_sync_scheduling_policy_t sync_scheduling_policy=context::heuristic, bool keep_larger_local_mem_after_resize=false) const
See cuda::context::create()
Definition: device.hpp:183
Wrapper class for a CUDA event.
Definition: event.hpp:133
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112
stream_t default_stream(bool hold_primary_context_refcount_unit=false) const
Obtain a wrapper for the (always-existing) default stream within the device&#39; primary context...
Definition: device.hpp:134
attribute_value_t get_attribute(attribute_t attribute, const device_t &first, const device_t &second)
Get one of the numeric attributes for a(n ordered) pair of devices, relating to their interaction...
Definition: device.hpp:113
void enable_bidirectional_access(context_t first, context_t second)
Enable access both by the first to the second context and the other way around.
Definition: context.hpp:215
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
stream_t create_stream(bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
See cuda::stream::create()
Definition: device.hpp:145
device::id_t id() const noexcept
Return the proxied device&#39;s ID.
Definition: device.hpp:594
stream_t default_stream() const noexcept
Definition: device.hpp:125
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884
void launch(Kernel kernel, launch_configuration_t launch_configuration, KernelParameters... arguments) const
Launch a kernel on the default stream of the device&#39; primary context.
Definition: device.hpp:174
void synchronize(const device_t &device)
Waits for all previously-scheduled tasks on all streams (= queues) on a specified device to conclude...
Definition: device.hpp:166
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device&#39;s primary context - the one used by runtime API calls.
Definition: device.hpp:152
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1006
event_t create_event(bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess)
See cuda::event::create()
Definition: device.hpp:190
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:869
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
void disable_access(const device_t &accessor, const device_t &peer)
Disable access by one CUDA device to the global memory of another.
Definition: device.hpp:78
void enable_access(const device_t &accessor, const device_t &peer)
Enable access by one CUDA device to the global memory of another.
Definition: device.hpp:73
void disable_access(context_t accessor, context_t peer)
Disable access by one CUDA device to the global memory of another.
Definition: context.hpp:210
void enable_access(context_t accessor, context_t peer)
Enable access by one CUDA device to the global memory of another.
Definition: context.hpp:205
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:825
void enable_bidirectional_access(const device_t &first, const device_t &second)
Enable access both by the first to the second device and the other way around.
Definition: device.hpp:93
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
void disable_bidirectional_access(context_t first, context_t second)
Disable access both by the first to the second context and the other way around.
Definition: context.hpp:222
bool is_active(const device_t &device)
Definition: device.hpp:34
Wrapper class for a CUDA device.
Definition: device.hpp:135
void disable_bidirectional_access(const device_t &first, const device_t &second)
Disable access both by the first to the second device and the other way around.
Definition: device.hpp:103