7 #ifndef CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ 8 #define CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ 10 #if CUDA_VERSION >= 12040 12 #include "../api/detail/region.hpp" 21 class fatbin_builder_t;
24 namespace fatbin_builder {
26 inline fatbin_builder_t
wrap(
handle_t handle,
bool take_ownership =
false) noexcept;
28 inline fatbin_builder_t create(const options_t & options);
32 inline ::std::string identify(
handle_t handle)
34 return "Fatbin builder with handle " + cuda::detail_::ptr_as_hex(handle);
37 inline ::std::string identify(
const fatbin_builder_t&);
44 class fatbin_builder_t {
49 void operator()(
void * data) {
operator delete(data); }
59 bool is_owning() const noexcept
66 auto status = nvFatbinGet(handle_, target_region.data());
68 cuda::detail_::ptr_as_hex(target_region.data()));
72 size_type size()
const 75 auto status = nvFatbinSize(handle_, &result);
76 throw_if_error_lazy(status,
"Failed determining prospective fatbin size for " + fatbin_builder::detail_::identify(*
this));
82 auto required_size = size();
83 if (target_region.size() < required_size) {
84 throw ::std::invalid_argument(
"Provided region for fatbin creation is of size " 85 + ::std::to_string(target_region.size()) +
" bytes, while the fatbin requires " + ::std::to_string(required_size));
87 return build_without_size_check_in(target_region);
90 memory::unique_region<deleter_type> build()
const 93 auto ptr =
operator new(size_);
95 build_in(target_region);
96 return memory::unique_region<deleter_type>(target_region);
100 const char* identifier,
101 span<char> nul_terminated_ptx_source,
102 device::compute_capability_t target_compute_capability)
const 105 if (nul_terminated_ptx_source.empty()) {
106 throw ::std::invalid_argument(
"Empty PTX source code passed for addition into fatbin");
108 if (nul_terminated_ptx_source[nul_terminated_ptx_source.size() - 1] !=
'\0') {
109 throw ::std::invalid_argument(
"PTX source code passed for addition into fatbin was not nul-character-terminated");
112 auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
113 auto empty_cmdline =
"";
114 auto status = nvFatbinAddPTX(handle_,
115 nul_terminated_ptx_source.data(),
116 nul_terminated_ptx_source.size(),
117 compute_capability_str.c_str(),
121 + ::std::string(identifier) +
" at " + detail_::ptr_as_hex(nul_terminated_ptx_source.data())
122 +
" to a fat binary for target compute capability " + compute_capability_str);
126 const char* identifier,
128 device::compute_capability_t target_compute_capability)
const 130 auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
131 auto empty_cmdline =
"";
132 auto status = nvFatbinAddLTOIR(
133 handle_, lto_ir.data(), lto_ir.size(), compute_capability_str.c_str(), identifier, empty_cmdline);
135 + ::std::string(identifier) +
" at " + detail_::ptr_as_hex(lto_ir.data())
136 +
" to a fat binary for target compute capability " + compute_capability_str);
140 const char* identifier,
142 device::compute_capability_t target_compute_capability)
const 144 auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number());
145 auto status = nvFatbinAddCubin(
146 handle_, cubin.data(), cubin.size(), compute_capability_str.c_str(), identifier);
148 + ::std::string(identifier) +
" at " + detail_::ptr_as_hex(cubin.data())
149 +
" to a fat binary for target compute capability " + compute_capability_str);
152 #if CUDA_VERSION >= 12050 162 auto status = nvFatbinAddReloc(handle_, ptx_code.data(), ptx_code.size());
163 throw_if_error_lazy(status,
"Failed adding relocatable PTX code at " + detail_::ptr_as_hex(ptx_code.data())
164 +
"to fatbin builder " + fatbin_builder::detail_::identify(*
this) );
170 auto status = nvFatbinAddIndex(handle_, index.data(), index.size(), identifier);
172 + detail_::ptr_as_hex(index.data()) +
" to a fat binary");
174 #endif // CUDA_VERSION >= 12050 181 bool take_ownership) noexcept
182 : handle_(handle), owning(take_ownership)
191 fatbin_builder_t(
const fatbin_builder_t &) =
delete;
193 fatbin_builder_t(fatbin_builder_t &&other) noexcept:
194 fatbin_builder_t(other.handle_, other.owning)
196 other.owning =
false;
199 ~fatbin_builder_t() noexcept(false)
202 auto status = nvFatbinDestroy(&handle_);
204 ::std::string(
"Failed destroying fatbin builder ") + detail_::ptr_as_hex(handle_) +
205 " in " + fatbin_builder::detail_::identify(handle_));
211 fatbin_builder_t &operator=(
const fatbin_builder_t &) =
delete;
213 fatbin_builder_t &operator=(fatbin_builder_t &&other) noexcept
215 ::std::swap(handle_, other.handle_);
216 ::std::swap(owning, owning);
227 namespace fatbin_builder {
230 inline fatbin_builder_t create(
const options_t & options)
233 auto marshalled_options = marshalling::marshal(options);
234 auto option_ptrs = marshalled_options.option_ptrs();
235 auto status = nvFatbinCreate(&new_handle, option_ptrs.data(), option_ptrs.size());
237 auto do_take_ownership =
true;
238 return wrap(new_handle, do_take_ownership);
241 inline fatbin_builder_t
wrap(
handle_t handle,
bool take_ownership) noexcept
243 return fatbin_builder_t{handle, take_ownership};
248 inline ::std::string identify(
const fatbin_builder_t& builder)
250 return identify(builder.handle());
260 #endif // CUDA_VERSION >= 12040 262 #endif // CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
Contains fatbin_builder::options_t class and related definitions.
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264