cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
context.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef MULTI_WRAPPER_IMPLS_CONTEXT_HPP_
12 #define MULTI_WRAPPER_IMPLS_CONTEXT_HPP_
13 
14 #include "../device.hpp"
15 #include "../stream.hpp"
16 #include "../event.hpp"
17 #include "../current_context.hpp"
18 #include "../current_device.hpp"
19 #include "../peer_to_peer.hpp"
20 #include "../memory.hpp"
21 #include "../context.hpp"
22 
23 
24 namespace cuda {
25 
26 namespace context {
27 
28 namespace detail_ {
29 
30 inline handle_t get_primary_for_same_device(handle_t handle, bool increase_refcount)
31 {
32  auto device_id = get_device_id(handle);
33  return device::primary_context::detail_::get_handle(device_id, increase_refcount);
34 }
35 
36 inline bool is_primary_for_device(handle_t handle, device::id_t device_id)
37 {
38  auto context_device_id = context::detail_::get_device_id(handle);
39  if (context_device_id != device_id) {
40  return false;
41  }
42  static constexpr const bool dont_increase_refcount { false };
43  auto pc_handle = device::primary_context::detail_::get_handle(device_id, dont_increase_refcount);
44  return handle == pc_handle;
45 }
46 
47 } // namespace detail
48 
49 inline bool is_primary(const context_t& context)
50 {
51  return context::detail_::is_primary_for_device(context.handle(), context.device_id());
52 }
53 
54 inline void synchronize(const context_t& context)
55 {
56  return detail_::synchronize(context.device_id(), context.handle());
57 }
58 
59 namespace current {
60 
61 namespace detail_ {
62 
63 inline bool is_primary(handle_t cc_handle, device::id_t current_context_device_id)
64 {
65  // Note we assume current_context_device_id really is the device ID for cc_handle;
66  // otherwise we could just use is_primary_for_device()
67  return cc_handle == device::primary_context::detail_::get_handle(current_context_device_id);
68 }
69 
70 } // namespace detail_
71 
72 inline bool is_primary()
73 {
74  auto current_context = get();
75  return detail_::is_primary(current_context.handle(), current_context.device_id());
76 }
77 
78 namespace detail_ {
79 
80 inline scoped_override_t::scoped_override_t(bool hold_primary_context_ref_unit, device::id_t device_id, handle_t context_handle)
81 : hold_primary_context_ref_unit_(hold_primary_context_ref_unit), device_id_or_0_(device_id)
82 {
83  if (hold_primary_context_ref_unit) { device::primary_context::detail_::increase_refcount(device_id); }
84  push(context_handle);
85 }
86 
87 inline scoped_override_t::~scoped_override_t() DESTRUCTOR_EXCEPTION_SPEC
88 {
89 #if THROW_IN_DESTRUCTORS
90  pop();
91 #else
92  pop_and_discard_nothrow();
93 #endif
94  if (hold_primary_context_ref_unit_) {
95 #if THROW_IN_DESTRUCTORS
96  device::primary_context::detail_::decrease_refcount(device_id_or_0_);
97 #else
98  device::primary_context::detail_::decrease_refcount_nothrow(device_id_or_0_);
99 #endif
100  }
101 }
102 
103 
107 inline handle_t push_default_if_missing()
108 {
109  auto handle = detail_::get_handle();
110  if (handle != context::detail_::none) {
111  return handle;
112  }
113  // TODO: consider using cudaSetDevice here instead
114  auto current_device_id = device::current::detail_::get_id();
115  auto pc_handle = device::primary_context::detail_::obtain_and_increase_refcount(current_device_id);
116  push(pc_handle);
117  return pc_handle;
118 }
119 
137 class scoped_existence_ensurer_t {
138 public:
139  context::handle_t context_handle;
140  device::id_t device_id_;
141  bool decrease_pc_refcount_on_destruct_;
142 
143  explicit scoped_existence_ensurer_t(bool avoid_pc_refcount_increase = true)
144  {
145  auto status_and_handle = get_with_status();
146  if (status_and_handle.status == cuda::status::not_yet_initialized) {
147  context_handle = context::detail_::none;
148  initialize_driver(); // and the handle
149  }
150  else {
151  context_handle = status_and_handle.handle;
152  }
153  if (context_handle == context::detail_::none) {
154  device_id_ = device::current::detail_::get_id();
155  context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device_id_);
156  context::current::detail_::push(context_handle);
157  decrease_pc_refcount_on_destruct_ = avoid_pc_refcount_increase;
158  }
159  else {
160  // Some compilers fail to detect that device_id is never used
161  // unless it's initialized, and thus warns us of maybe-uninitialized
162  // use, so...
163  device_id_ = 0;
164  decrease_pc_refcount_on_destruct_ = false;
165  }
166  }
167 
168  ~scoped_existence_ensurer_t()
169  {
170  if (context_handle != context::detail_::none and decrease_pc_refcount_on_destruct_) {
171 #if THROW_IN_DESTRUCTORS
172  context::current::detail_::pop();
173  device::primary_context::detail_::decrease_refcount(device_id_);
174 #else
175  context::current::detail_::pop_and_discard_nothrow();
176  device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
177 #endif
178  }
179  }
180 };
181 
182 } // namespace detail_
183 
184 inline scoped_override_t::scoped_override_t(device::primary_context_t&& primary_context)
185  : parent(primary_context.is_owning(), primary_context.device_id(), primary_context.handle()) {}
186 inline scoped_override_t::scoped_override_t(const context_t& context) : parent(context.handle()) {}
187 inline scoped_override_t::scoped_override_t(context_t&& context) : parent(context.handle()) {}
188 
189 } // namespace current
190 
191 inline context_t create_and_push(
192  const device_t& device,
193  host_thread_sync_scheduling_policy_t sync_scheduling_policy,
194  bool keep_larger_local_mem_after_resize)
195 {
196  auto handle = detail_::create_and_push(device.id(), sync_scheduling_policy, keep_larger_local_mem_after_resize);
197  bool take_ownership = true;
198  return context::wrap(device.id(), handle, take_ownership);
199 }
200 
201 inline context_t create(
202  const device_t& device,
203  host_thread_sync_scheduling_policy_t sync_scheduling_policy,
204  bool keep_larger_local_mem_after_resize)
205 {
206  auto created = create_and_push(device, sync_scheduling_policy, keep_larger_local_mem_after_resize);
207  current::pop();
208  return created;
209 }
210 
211 namespace peer_to_peer {
212 
213 inline bool can_access(context_t accessor, context_t peer)
214 {
215  return device::peer_to_peer::detail_::can_access(accessor.device_id(), peer.device_id());
216 }
217 
218 inline void enable_access(context_t accessor, context_t peer)
219 {
220  detail_::enable_access(accessor.handle(), peer.handle());
221 }
222 
223 inline void disable_access(context_t accessor, context_t peer)
224 {
225  detail_::disable_access(accessor.handle(), peer.handle());
226 }
227 
229 {
230  // Note: What happens when first and second are the same context? Or on the same device?
231  enable_access(first, second);
232  enable_access(second, first );
233 }
234 
236 {
237  // Note: What happens when first and second are the same context? Or on the same device?
238  disable_access(first, second);
239  disable_access(second, first );
240 }
241 
242 
243 } // namespace peer_to_peer
244 
245 namespace current {
246 
247 namespace peer_to_peer {
248 
249 inline void enable_access_to(const context_t &peer_context)
250 {
251  context::peer_to_peer::detail_::enable_access_to(peer_context.handle());
252 }
253 
254 inline void disable_access_to(const context_t &peer_context)
255 {
256  context::peer_to_peer::detail_::disable_access_to(peer_context.handle());
257 }
258 
259 } // namespace peer_to_peer
260 
261 } // namespace current
262 
263 } // namespace context
264 
266 {
267  return memory::device::detail_::allocate(context_handle_, size_in_bytes);
268 }
269 
271  size_t size_in_bytes, memory::managed::initial_visibility_t initial_visibility) const
272 {
273  return memory::managed::detail_::allocate(context_handle_, size_in_bytes, initial_visibility);
274 }
275 
276 
278 {
279  return cuda::device::get(device_id_);
280 }
281 
283 {
284  static constexpr const bool non_owning { false };
285  return cuda::context::wrap(device_id_, context_handle_, non_owning);
286 }
287 
288 inline bool context_t::is_primary() const
289 {
290  return context::current::detail_::is_primary(handle(), device_id());
291 }
292 
293 // Note: The context_t::create_module() member functions are defined in module.hpp,
294 // for better separation of runtime-origination and driver-originating headers; see
295 // issue #320 on the issue tracker.
296 
297 inline void context_t::enable_access_to(const context_t& peer) const
298 {
300 }
301 
302 inline void context_t::disable_access_to(const context_t& peer) const
303 {
305 }
306 
307 inline device_t context_t::device() const
308 {
309  return device::wrap(device_id_);
310 }
311 
313  bool will_synchronize_with_default_stream,
314  stream::priority_t priority) const
315 {
316  return stream::detail_::create(device_id_, handle_, will_synchronize_with_default_stream, priority);
317 }
318 
320  bool uses_blocking_sync,
321  bool records_timing,
322  bool interprocess) const
323 {
324  return cuda::event::detail_::create(
325  device_id_, handle_, do_not_hold_primary_context_refcount_unit,
326  uses_blocking_sync, records_timing, interprocess);
327 }
328 
329 inline stream_t context_t::default_stream() const
330 {
331  return stream::wrap(device_id_, handle_, stream::default_stream_handle, do_not_take_ownership);
332 }
333 
334 template <typename Kernel, typename ... KernelParameters>
335 void context_t::launch(
336  Kernel kernel,
337  launch_configuration_t launch_configuration,
338  KernelParameters... parameters) const
339 {
340  default_stream().enqueue.kernel_launch(kernel, launch_configuration, parameters...);
341 }
342 
343 } // namespace cuda
344 
345 #endif // MULTI_WRAPPER_IMPLS_CONTEXT_HPP_
346 
event_t create_event(bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess) const
Create a new event within this context; see cuda::event::create() for details regarding the parameter...
Definition: context.hpp:319
Proxy class for a CUDA stream.
Definition: stream.hpp:258
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:243
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
memory::region_t allocate_managed(size_t size_in_bytes, cuda::memory::managed::initial_visibility_t initial_visibility=cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access) const
Allocates memory on the device whose pointer is also visible on the host, and possibly on other devic...
Definition: context.hpp:270
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
Wrapper class for a CUDA event.
Definition: event.hpp:147
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:122
void enable_bidirectional_access(context_t first, context_t second)
Enable access both by the first to the second context and the other way around.
Definition: context.hpp:228
void disable_access_to(const context_t &peer) const
Prevent kernels and memory operations within this context from involving memory allocated in a peer c...
Definition: context.hpp:302
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
device::id_t id() const noexcept
Return the proxied device&#39;s ID.
Definition: device.hpp:594
bool is_primary() const
Definition: context.hpp:288
device_t associated_device() const
Device on which te memory managed with this object is allocated.
Definition: context.hpp:277
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:886
void initialize_driver()
Obtains the CUDA Runtime version.
Definition: miscellany.hpp:26
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:980
stream_t create_stream(bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
Create a new event within this context; see cuda::stream::create() for details regarding the paramete...
Definition: context.hpp:312
device_t get(id_t id)
Returns a proxy for the CUDA device with a given id.
Definition: device.hpp:832
memory::region_t allocate(size_t size_in_bytes) const
Allocate a region of memory on the device.
Definition: context.hpp:265
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1020
void enable_access_to(const context_t &peer_context)
Allows subsequently-executed memory operations and kernels to access the memory associated with the s...
Definition: context.hpp:249
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
void disable_access_to(const context_t &peer_context)
Prevents subsequently-executed memory operations and kernels from accessing the memory associated wit...
Definition: context.hpp:254
void enable_access_to(const context_t &peer) const
Allow kernels and memory operations within this context to involve memory allocated in a peer context...
Definition: context.hpp:297
void disable_access(context_t accessor, context_t peer)
Disable access by one CUDA device to the global memory of another.
Definition: context.hpp:223
void enable_access(context_t accessor, context_t peer)
Enable access by one CUDA device to the global memory of another.
Definition: context.hpp:218
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:820
bool can_access(context_t accessor, context_t peer)
Check if a CUDA context can access the global memory of another CUDA context.
Definition: context.hpp:213
bool is_primary(const context_t &context)
Definition: context.hpp:49
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
context_t associated_context() const
Context in which te memory managed with this object is recognized / usable.
Definition: context.hpp:282
void disable_bidirectional_access(context_t first, context_t second)
Disable access both by the first to the second context and the other way around.
Definition: context.hpp:235
Wrapper class for a CUDA device.
Definition: device.hpp:135
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:755