11 #ifndef MULTI_WRAPPER_IMPLS_APRIORI_COMPILED_KERNEL_HPP_    12 #define MULTI_WRAPPER_IMPLS_APRIORI_COMPILED_KERNEL_HPP_    14 #include "../kernels/apriori_compiled.hpp"    22 #if ! CAW_CAN_GET_APRIORI_KERNEL_HANDLE    23 #if defined(__CUDACC__)    31     CAW_SET_SCOPE_CONTEXT(context_handle_);
    32     apriori_compiled::attributes_t function_attributes;
    33     auto status = cudaFuncGetAttributes(&function_attributes, ptr_);
    35     return function_attributes;
    41     CAW_SET_SCOPE_CONTEXT(context_handle_);
    42     auto result = cudaFuncSetCacheConfig(ptr_, (cudaFuncCache) preference);
    44         "Setting the multiprocessor L1/Shared Memory cache distribution preference for a "    45         "CUDA device function");
    52     CAW_SET_SCOPE_CONTEXT(context_handle_);
    53     auto result = cudaFuncSetSharedMemConfig(ptr_, (cudaSharedMemConfig) config);
    54     throw_if_error_lazy(result, 
"Failed setting shared memory bank size to " + ::std::to_string(config));
    57 inline void apriori_compiled_t::set_attribute(attribute_t attribute, attribute_value_t value)
 const    60     CAW_SET_SCOPE_CONTEXT(context_handle_);
    61     cudaFuncAttribute runtime_attribute = [attribute]() {
    63             case CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES:
    64                 return cudaFuncAttributeMaxDynamicSharedMemorySize;
    65             case CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT:
    66                 return cudaFuncAttributePreferredSharedMemoryCarveout;
    69                     "Kernel attribute " + ::std::to_string(attribute) + 
" not supported (with CUDA version "    70                     + ::std::to_string(CUDA_VERSION));
    73     auto result = cudaFuncSetAttribute(ptr_, runtime_attribute, value);
    74     throw_if_error_lazy(result, 
"Setting CUDA device function attribute " + ::std::to_string(attribute) + 
" to value " + ::std::to_string(value));
    77 inline attribute_value_t apriori_compiled_t::get_attribute(attribute_t attribute)
 const    79     apriori_compiled::attributes_t attrs = 
attributes();
    81         case CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
    82             return attrs.maxThreadsPerBlock;
    83         case CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
    84             return attrs.sharedSizeBytes;
    85         case CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES:
    86             return attrs.constSizeBytes;
    87         case CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES:
    88             return attrs.localSizeBytes;
    89         case CU_FUNC_ATTRIBUTE_NUM_REGS:
    91         case CU_FUNC_ATTRIBUTE_PTX_VERSION:
    92             return attrs.ptxVersion;
    93         case CU_FUNC_ATTRIBUTE_BINARY_VERSION:
    94             return attrs.binaryVersion;
    95         case CU_FUNC_ATTRIBUTE_CACHE_MODE_CA:
    96             return attrs.cacheModeCA;
    97         case CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES:
    98             return attrs.maxDynamicSharedSizeBytes;
    99         case CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT:
   100             return attrs.preferredShmemCarveout;
   103                 ::std::string(
"Attribute ") +
   105                     ::std::to_string(
static_cast<::std::underlying_type<attribute_t>::type
>(attribute))
   107                     detail_::attribute_name(attribute)
   109                 + 
" cannot be obtained for apriori-compiled kernels before CUDA version 11.0"   114 #endif // defined(__CUDACC__)   115 #endif // ! CAW_CAN_GET_APRIORI_KERNEL_HANDLE   117 namespace apriori_compiled {
   121 template<
typename KernelFunctionPtr>
   122 apriori_compiled_t 
get(
   125     KernelFunctionPtr    function_ptr)
   128         ::std::is_pointer<KernelFunctionPtr>::value
   129         and ::std::is_function<typename ::std::remove_pointer<KernelFunctionPtr>::type>::value,
   130         "function must be a bona fide pointer to a kernel (__global__) function");
   132     auto ptr_ = 
reinterpret_cast<const void *
>(function_ptr);
   133 #if CAW_CAN_GET_APRIORI_KERNEL_HANDLE   134     auto handle = detail_::get_handle(ptr_);
   138     return wrap(device_id, primary_context_handle, 
handle, ptr_, do_hold_primary_context_refcount_unit);
   155 template<
typename KernelFunctionPtr>
   158     auto primary_context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device.id());
   159     return apriori_compiled::detail_::get(device.id(), primary_context_handle, function_ptr);
   167 inline ::cuda::device::primary_context_t
   171     return get_implicit_primary_context(kernel_);
   178 #endif // MULTI_WRAPPER_IMPLS_APRIORI_COMPILED_KERNEL_HPP_ kernel::handle_t handle() const
Get the raw (intra-context) CUDA handle for this kernel. 
Definition: kernel.hpp:181
 
Definitions and functionality wrapping CUDA APIs. 
Definition: array.hpp:22
 
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API. 
Definition: types.hpp:850
 
device_t device() const noexcept
Get (a proxy for) the device for (a context of) which this kernel is defined. 
Definition: kernel.hpp:28
 
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)'s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:830
 
Implementations requiring the definitions of multiple CUDA entity proxy classes, and which regard ker...
 
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
 
Implementations requiring the definitions of multiple CUDA entity proxy classes, and which regard dev...
 
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option. 
Definition: types.hpp:804
 
A subclass of the kernel_t interface for kernels being functions marked as global in source files and...
Definition: apriori_compiled.hpp:310
 
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
 
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension) 
Definition: array.hpp:34
 
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance. 
Definition: array.hpp:264
 
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
 
void set_cache_preference(multiprocessor_cache_preference_t preference) const override
See context_t::cache_preference() 
 
device::id_t device_id() const noexcept
Get the id of the device for (a context of) which this kernel is defined. 
Definition: kernel.hpp:169
 
apriori_compiled::attributes_t attributes() const
Obtain the set of all attributes one can obtain individually with get_attribute. 
 
void set_shared_memory_bank_size(multiprocessor_shared_memory_bank_size_option_t config) const override
See context_t::shared_memory_bank_size() 
 
Wrapper class for a CUDA device. 
Definition: device.hpp:135