cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
device.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef MULTI_WRAPPER_IMPLS_DEVICE_HPP_
12 #define MULTI_WRAPPER_IMPLS_DEVICE_HPP_
13 
14 #include "../device.hpp"
15 #include "../event.hpp"
16 #include "../kernel_launch.hpp"
17 #include "../stream.hpp"
18 #include "../primary_context.hpp"
19 #include "../kernel.hpp"
20 #include "../apriori_compiled_kernel.hpp"
21 #include "../current_context.hpp"
22 #include "../current_device.hpp"
23 #include "../peer_to_peer.hpp"
24 
25 namespace cuda {
26 
27 namespace device {
28 
29 namespace primary_context {
30 
31 inline bool is_active(const device_t& device)
32 {
33  return detail_::is_active(device.id());
34 }
35 
36 inline void destroy(const device_t& device)
37 {
38  auto status = cuDevicePrimaryCtxReset(device.id());
39  throw_if_error(status, "Failed destroying/resetting the primary context of device " + ::std::to_string(device.id()));
40 }
41 
42 inline primary_context_t get(const device_t& device)
43 {
44  auto pc_handle = detail_::get_handle(device.id(), true);
45  return detail_::wrap( device.id(), pc_handle, true);
46 }
47 
48 namespace detail_ {
49 
50 // Use this when you need a PC, you don't have a device_t to hang it on,
51 // and you don't want it to get deactivated/destroyed right after you use it.
52 inline primary_context_t leaky_get(cuda::device::id_t device_id)
53 {
54  bool need_to_activate_and_leak = not cuda::device::primary_context::detail_::is_active(device_id);
55  auto pc_handle = cuda::device::primary_context::detail_::get_handle(device_id, true);
56  return cuda::device::primary_context::detail_::wrap(device_id, pc_handle, not need_to_activate_and_leak);
57 }
58 
59 } // namespace detail_
60 
61 } // namespace primary_context
62 
63 namespace peer_to_peer {
64 
65 inline bool can_access(device_t accessor, device_t peer)
66 {
67  return detail_::can_access(accessor.id(), peer.id());
68 }
69 
70 inline void enable_access(device_t accessor, device_t peer)
71 {
72  return context::peer_to_peer::enable_access(accessor.primary_context(), peer.primary_context());
73 }
74 
75 inline void disable_access(device_t accessor, device_t peer)
76 {
77 #ifndef NDEBUG
78  if (accessor == peer) {
79  throw ::std::invalid_argument("A device cannot be used as its own peer");
80  }
81 #endif
82  context::peer_to_peer::disable_access(accessor.primary_context(), peer.primary_context());
83 }
84 
85 inline bool can_access_each_other(device_t first, device_t second)
86 {
87  return can_access(first, second) and can_access(second, first);
88 }
89 
90 inline void enable_bidirectional_access(device_t first, device_t second)
91 {
92 #ifndef NDEBUG
93  if (first == second) {
94  throw ::std::invalid_argument("A device cannot be used as its own peer");
95  }
96 #endif
97  context::peer_to_peer::enable_bidirectional_access(first.primary_context(), second.primary_context());
98 }
99 
100 inline void disable_bidirectional_access(device_t first, device_t second)
101 {
102 #ifndef NDEBUG
103  if (first == second) {
104  throw ::std::invalid_argument("A device cannot be used as its own peer");
105  }
106 #endif
107  context::peer_to_peer::disable_bidirectional_access(first.primary_context(), second.primary_context());
108 }
109 
110 inline attribute_value_t get_attribute(attribute_t attribute, device_t first, device_t second)
111 {
112 #ifndef NDEBUG
113  if (first == second) {
114  throw ::std::invalid_argument("A device cannot be used as its own peer");
115  }
116 #endif
117  return detail_::get_attribute(attribute, first.id(), second.id());
118 }
119 
120 } // namespace peer_to_peer
121 
122 inline stream_t primary_context_t::default_stream() const noexcept
123 {
124  return stream::wrap(device_id_, handle_, stream::default_stream_handle);
125 }
126 
127 } // namespace device
128 
129 // device_t methods
130 
131 inline stream_t device_t::default_stream(bool hold_primary_context_refcount_unit) const
132 {
133  auto pc = primary_context();
134  if (hold_primary_context_refcount_unit) {
135  device::primary_context::detail_::increase_refcount(id_);
136  }
137  return stream::wrap(
138  id(), pc.handle(), stream::default_stream_handle,
139  do_not_take_ownership, hold_primary_context_refcount_unit);
140 }
141 
142 inline stream_t device_t::create_stream(
143  bool will_synchronize_with_default_stream,
144  stream::priority_t priority) const
145 {
146  return stream::create(*this, will_synchronize_with_default_stream, priority);
147 }
148 
149 inline device::primary_context_t device_t::primary_context(bool hold_pc_refcount_unit) const
150 {
151  auto pc_handle = primary_context_handle();
152  if (hold_pc_refcount_unit) {
153  device::primary_context::detail_::increase_refcount(id_);
154  // Q: Why increase the refcount here, when `primary_context_handle()`
155  // ensured this has already happened for this object?
156  // A: Because an unscoped primary_context_t needs its own refcount
157  // unit (e.g. in case this object gets destructed but the
158  // primary_context_t is still alive.
159  }
160  return device::primary_context::detail_::wrap(id_, pc_handle, hold_pc_refcount_unit);
161 }
162 
163 inline void synchronize(const device_t& device)
164 {
165  auto pc = device.primary_context();
166  context::current::detail_::scoped_override_t set_device_for_this_scope(pc.handle());
167  context::current::detail_::synchronize(device.id(), pc.handle());
168 }
169 
170 template <typename KernelFunction, typename ... KernelParameters>
171 void device_t::launch(
172 KernelFunction kernel_function, launch_configuration_t launch_configuration,
173 KernelParameters ... parameters) const
174 {
175  auto pc = primary_context();
176  pc.default_stream().enqueue.kernel_launch(
177  kernel_function, launch_configuration, parameters...);
178 }
179 
180 inline context_t device_t::create_context(
181  context::host_thread_synch_scheduling_policy_t synch_scheduling_policy,
182  bool keep_larger_local_mem_after_resize) const
183 {
184  return context::create(*this, synch_scheduling_policy, keep_larger_local_mem_after_resize);
185 }
186 
187 inline event_t device_t::create_event(
188  bool uses_blocking_sync,
189  bool records_timing,
190  bool interprocess)
191 {
192  // The current implementation of event::create is not super-smart,
193  // but it's probably not worth it trying to improve just this function
194  return event::create(*this, uses_blocking_sync, records_timing, interprocess);
195 }
196 
197 namespace detail_ {
198 
199 template<typename Kernel>
200 device::primary_context_t get_implicit_primary_context(Kernel)
201 {
202  return device::current::get().primary_context();
203 }
204 
205 template<>
206 inline device::primary_context_t get_implicit_primary_context<kernel_t>(kernel_t kernel)
207 {
208  auto context = kernel.context();
209  auto device = context.device();
210  auto primary_context = device.primary_context();
211  if (context != primary_context) {
212  throw ::std::logic_error("Attempt to launch a kernel associated with a non-primary context without specifying a stream associated with that context.");
213  }
214  return primary_context;
215 }
216 
217 template<>
218 inline device::primary_context_t get_implicit_primary_context<apriori_compiled_kernel_t>(apriori_compiled_kernel_t kernel)
219 {
220  const kernel_t& kernel_ = kernel;
221  return get_implicit_primary_context(kernel_);
222 }
223 
224 } // namespace detail_
225 
226 } // namespace cuda
227 
228 #endif // MULTI_WRAPPER_IMPLS_DEVICE_HPP_
229 
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:725
stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device.
Definition: stream.hpp:57
Proxy class for a CUDA stream.
Definition: stream.hpp:206
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
attribute_value_t get_attribute(attribute_t attribute, device_t first, device_t second)
Get one of the numeric attributes for a(n ordered) pair of devices, relating to their interaction...
Definition: device.hpp:110
Wrapper class for a CUDA context.
Definition: context.hpp:219
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:303
Definition: launch_configuration.hpp:26
Wrapper class for a CUDA event.
Definition: event.hpp:123
A class for holding the primary context of a CUDA device (device_t).
Definition: primary_context.hpp:110
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:715
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:734
void throw_if_error(status_t status, const ::std::string &message) noexcept(false)
Do nothing...
Definition: error.hpp:313
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:922
bool can_access_each_other(device_t first, device_t second)
Determine whether two CUDA devices can currently access each other.
Definition: device.hpp:85
host_thread_synch_scheduling_policy_t
Scheduling policies the Runtime API may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:751
void launch(Kernel kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:144
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42