Wrapper class for a CUDA device. More...

#include <device.hpp>

Public Types
using	properties_t = device::properties_t

using	attribute_value_t = device::attribute_value_t

Public Member Functions
context_t::global_memory_type	memory () const

bool	can_access (const device_t &peer) const
	Determine whether this device can access the global memory of another CUDA device. More...

void	enable_access_to (const device_t &peer) const
	Enable access by this device to the global memory of another device. More...

void	disable_access_to (const device_t &peer) const
	Disable access by this device to the global memory of another device. More...

device::primary_context_t	primary_context (bool hold_pc_refcount_unit=false) const
	Produce a proxy for the device's primary context - the one used by runtime API calls. More...

properties_t	properties () const
	Obtains the (mostly) non-numeric properties for this device. More...

::std::string	name () const
	Obtains this device's human-readable name, e.g. More...

attribute_value_t	get_attribute (device::attribute_t attribute) const
	Obtain a numeric-value attribute of the device. More...

grid::block_dimension_t	maximum_threads_per_block () const

device::pci_location_t	pci_id () const
	Obtains this device's location on the PCI express bus in terms of domain, bus and device id, e.g. More...

device::multiprocessor_count_t	multiprocessor_count () const

device::compute_architecture_t	architecture () const
	Obtains the device's hardware architecture generation numeric designator see cuda::device::compute_architecture_t.

device::compute_capability_t	compute_capability () const
	Obtains the device's compute capability; see cuda::device::compute_capability_t.

bool	supports_concurrent_managed_access () const
	Determine whether this device can coherently access managed memory concurrently with the CPU.

bool	supports_block_cooperation () const
	True if this device supports executing kernels in which blocks can directly cooperate beyond the use of global-memory atomics.

device::limit_value_t	get_limit (device::limit_t limit) const
	Obtains the upper limit on the amount of a certain kind of resource this device offers. More...

void	set_limit (device::limit_t limit, device::limit_value_t new_value) const
	Set the upper limit of one of the named numeric resources on this device.

const device_t &	synchronize () const
	Waits for all previously-scheduled tasks on all streams (= queues) on this device to conclude. More...

device_t &	synchronize ()

const device_t &	make_current () const

device_t &	make_current ()

void	reset () const
	Invalidates all memory allocations and resets all state regarding this CUDA device on the current operating system process. More...

void	set_cache_preference (multiprocessor_cache_preference_t preference) const
	Controls the balance between L1 space and shared memory space for kernels executing on this device. More...

multiprocessor_cache_preference_t	cache_preference () const
	Determines the balance between L1 space and shared memory space set for kernels executing on this device.

void	set_shared_memory_bank_size (device::shared_memory_bank_size_t new_bank_size) const
	Sets the shared memory bank size, described in this Parallel-for-all blog entry More...

device::shared_memory_bank_size_t	shared_memory_bank_size () const
	Returns the shared memory bank size, as described in this Parallel-for-all blog entry More...

device::id_t	id () const noexcept
	Return the proxied device's ID. More...

stream_t	default_stream (bool hold_primary_context_refcount_unit=false) const
	Obtain a wrapper for the (always-existing) default stream within the device' primary context. More...

stream_t	create_stream (bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
	See cuda::stream::create()

event_t	create_event (bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess)
	See cuda::event::create()

context_t	create_context (context::host_thread_sync_scheduling_policy_t sync_scheduling_policy=context::heuristic, bool keep_larger_local_mem_after_resize=false) const
	See cuda::context::create()

template<typename Kernel , typename ... KernelParameters>
void	launch (Kernel kernel, launch_configuration_t launch_configuration, KernelParameters... arguments) const
	Launch a kernel on the default stream of the device' primary context. More...

device::stream_priority_range_t	stream_priority_range () const
	Determines the range of possible priorities for streams on this device. More...

context::host_thread_sync_scheduling_policy_t	sync_scheduling_policy () const

void	set_sync_scheduling_policy (context::host_thread_sync_scheduling_policy_t new_policy)

bool	keeping_larger_local_mem_after_resize () const

void	keep_larger_local_mem_after_resize (bool keep=true)
	Instructs the (primary context of) the device to keep larger amounts of global device memory allocated for use as local memory, after a kernel was executed which required a larger-than-usual allocation.

void	dont_keep_larger_local_mem_after_resize ()
	Instructs the (primary context of) the device to discard allocations of larger amounts of global device memory which were used by a kernel requiring a larger amount of local memory, and has concluded execution. More...

	device_t (device_t &&other) noexcept

	device_t (const device_t &other) noexcept

device_t &	operator= (const device_t &other) noexcept

device_t &	operator= (device_t &&other) noexcept

Static Public Member Functions
static device_t	choose_best_match (const properties_t &properties)

Friends
void	swap (device_t &lhs, device_t &rhs) noexcept

Detailed Description

Wrapper class for a CUDA device.

Use this class - built around a device ID, or for the current device - to perform almost, if not all, device-related operations, as opposed to passing the device ID around all that time.

Note: this is one of the three main classes in the Runtime API wrapper library, together with cuda::stream_t and cuda::event_t; obtaining device LUID's is not supported (those are too graphics-specific); This class is a "reference type", not a "value type". Therefore, making changes to properties of the device is a const-respecting operation on this class.

Member Function Documentation

◆ can_access()

bool cuda::device_t::can_access ( const device_t & peer ) const

inline

Determine whether this device can access the global memory of another CUDA device.

Parameters

peer	the device which is to be accessed

Returns: true iff acesss is possible

◆ default_stream()

stream_t cuda::device_t::default_stream ( bool hold_primary_context_refcount_unit = false ) const

inline

Obtain a wrapper for the (always-existing) default stream within the device' primary context.

Parameters

hold_primary_context_refcount_unit when true, the returned stream wrapper will keep the device' primary context in existence during its lifetime.

◆ disable_access_to()

void cuda::device_t::disable_access_to ( const device_t & peer ) const

inline

Disable access by this device to the global memory of another device.

Parameters

peer	the device to which to disable access

◆ dont_keep_larger_local_mem_after_resize()

void cuda::device_t::dont_keep_larger_local_mem_after_resize ( )

inline

Instructs the (primary context of) the device to discard allocations of larger amounts of global device memory which were used by a kernel requiring a larger amount of local memory, and has concluded execution.

◆ enable_access_to()

void cuda::device_t::enable_access_to ( const device_t & peer ) const

inline

Enable access by this device to the global memory of another device.

Parameters

peer	the device to which to enable access

◆ get_attribute()

attribute_value_t cuda::device_t::get_attribute ( device::attribute_t attribute ) const

inline

Obtain a numeric-value attribute of the device.

Note: See device::attribute_t for explanation about attributes, properties and flags.

◆ get_limit()

device::limit_value_t cuda::device_t::get_limit ( device::limit_t limit ) const

inline

Obtains the upper limit on the amount of a certain kind of resource this device offers.

Parameters

resource which resource's limit to obtain

◆ id()

device::id_t cuda::device_t::id ( ) const

inlinenoexcept

Return the proxied device's ID.

◆ keeping_larger_local_mem_after_resize()

bool cuda::device_t::keeping_larger_local_mem_after_resize ( ) const

inline

Returns: true if the device will keep larger amounts of global device memory allocated for use as local memory, after a kernel was executed which required a larger-than-usual allocation

◆ launch()

template<typename Kernel , typename ... KernelParameters>

void cuda::device_t::launch	(	Kernel	kernel,
		launch_configuration_t	launch_configuration,
		KernelParameters...	arguments
	)		const

Launch a kernel on the default stream of the device' primary context.

Template Parameters

Kernel May be either a plain function type (for a __global__ function accessible to the translation unit, or (a reference to) any subclass of cuda::kernel_t`.

Parameters

kernel_function	the kernel to launch; may be either a (`__global__`) function pointer, or a kernel proxy class.
launch_configuration	the configuration with which to launch the kernel;
arguments	the arguments with which to launch `kernel` (but note that references are not maintained).

◆ memory()

context_t::global_memory_type cuda::device_t::memory ( ) const

inline

Note: The memory proxy regards the device's primary context.

Todo:: Consider a scoped/unscoped dichotomy.

◆ name()

::std::string cuda::device_t::name ( ) const

inline

Obtains this device's human-readable name, e.g.

"GeForce GTX 650 Ti BOOST".

◆ pci_id()

device::pci_location_t cuda::device_t::pci_id ( ) const

inline

Obtains this device's location on the PCI express bus in terms of domain, bus and device id, e.g.

(0, 1, 0)

◆ primary_context()

device::primary_context_t cuda::device_t::primary_context ( bool hold_pc_refcount_unit = false ) const

inline

Produce a proxy for the device's primary context - the one used by runtime API calls.

Parameters

scoped When true, the primary proxy object returned will not perform its own reference accounting, and will assume the primary context is active while this device object exists. When false, the returned primary context proxy object will take care of its own reference count unit, and can outlive this object.

◆ properties()

properties_t cuda::device_t::properties ( ) const

inline

Obtains the (mostly) non-numeric properties for this device.

Todo:: get rid of this in favor of individual properties only.

◆ reset()

void cuda::device_t::reset ( ) const

inline

Invalidates all memory allocations and resets all state regarding this CUDA device on the current operating system process.

Todo:: Determine whether this actually performs a hardware reset or not

◆ set_cache_preference()

void cuda::device_t::set_cache_preference ( multiprocessor_cache_preference_t preference ) const

inline

Controls the balance between L1 space and shared memory space for kernels executing on this device.

Parameters

preference the preferred balance between L1 and shared memory

◆ set_shared_memory_bank_size()

void cuda::device_t::set_shared_memory_bank_size ( device::shared_memory_bank_size_t new_bank_size ) const

inline

Sets the shared memory bank size, described in this Parallel-for-all blog entry

Parameters

new_bank_size the shared memory bank size to set, in bytes

◆ shared_memory_bank_size()

device::shared_memory_bank_size_t cuda::device_t::shared_memory_bank_size ( ) const

inline

Returns the shared memory bank size, as described in this Parallel-for-all blog entry

Returns: the shared memory bank size in bytes

◆ stream_priority_range()

device::stream_priority_range_t cuda::device_t::stream_priority_range ( ) const

inline

Determines the range of possible priorities for streams on this device.

Returns: a priority range, whose semantics are a bit confusing; see priority_range_t . If the device does not support stream priorities, a 'trivial' range of priority values will be returned.

◆ synchronize()

const device_t& cuda::device_t::synchronize ( ) const

inline

Waits for all previously-scheduled tasks on all streams (= queues) on this device to conclude.

Depending on the host_thread_sync_scheduling_policy_t set for this device, the thread calling this method will either yield, spin or block until all tasks scheduled previously scheduled on this device have been concluded.

The documentation for this class was generated from the following file:

src/cuda/api/device.hpp

Public Types

Public Member Functions

Static Public Member Functions

Friends

Detailed Description

Member Function Documentation

◆ can_access()

◆ default_stream()

◆ disable_access_to()

◆ dont_keep_larger_local_mem_after_resize()

◆ enable_access_to()

◆ get_attribute()

◆ get_limit()

◆ id()

◆ keeping_larger_local_mem_after_resize()

◆ launch()

◆ memory()

◆ name()

◆ pci_id()

◆ primary_context()

◆ properties()

◆ reset()

◆ set_cache_preference()

◆ set_shared_memory_bank_size()

◆ shared_memory_bank_size()

◆ stream_priority_range()

◆ synchronize()