7 #ifndef CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ 8 #define CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ 35 template <source_kind_t Kind>
46 class primary_context_t;
55 template <source_kind_t Kind>
69 template <source_kind_t Kind>
70 inline ::std::string identify(
const char *name)
72 return ::std::string{detail_::kind_name(Kind)} +
" program" +
73 ((name ==
nullptr) ?
"" :
" '" + ::std::string{name} +
"'");
76 template <source_kind_t Kind>
79 return identify<Kind>(name) +
" at " + cuda::detail_::ptr_as_hex(handle);
82 template <source_kind_t Kind>
89 auto status = nvrtcGetProgramLogSize(program_handle, &size);
90 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation log size for " 91 + identify<cuda_cpp>(program_handle, program_name));
92 return (size > 0) ? size - 1 : 0;
95 #if CUDA_VERSION >= 11010 100 auto status = nvPTXCompilerGetErrorLogSize(program_handle, &size);
101 throw_if_error<ptx>(status,
"Failed obtaining compilation log size for " 102 + identify<ptx>(program_handle, program_name));
105 #endif // CUDA_VERSION >= 11010 107 template <source_kind_t Kind>
110 #if CUDA_VERSION >= 11010 114 auto status = nvPTXCompilerGetErrorLog(program_handle, buffer);
116 throw_if_error<ptx>(status,
"Failed obtaining compilation log for " 117 + identify<ptx>(program_handle, program_name));
124 auto status = nvrtcGetProgramLog(program_handle, buffer);
125 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation log for " 126 + identify<cuda_cpp>(program_handle, program_name));
129 #if CUDA_VERSION >= 11010 130 template <source_kind_t Kind>
134 inline size_t get_cubin_size_or_zero<ptx>(
program::handle_t<ptx> program_handle,
const char* program_name)
137 auto status = nvPTXCompilerGetCompiledProgramSize(program_handle, &size);
138 throw_if_error<ptx>(status,
"Failed obtaining program output CUBIN size for " 139 + identify<ptx>(program_handle, program_name));
147 auto status = nvrtcGetCUBINSize(program_handle, &size);
148 throw_if_error<cuda_cpp>(status,
"Failed obtaining program output CUBIN size for " 149 + identify<cuda_cpp>(program_handle, program_name));
153 template <source_kind_t Kind,
bool FailOnMissingCubin = true>
156 auto size = get_cubin_size_or_zero<Kind>(program_handle, program_name);
157 const bool have_failed = (FailOnMissingCubin and size == 0);
160 ::std::runtime_error(
"Output CUBIN requested for a compilation for a virtual architecture only of " 161 + identify<Kind>(program_handle, program_name)):
162 ::std::runtime_error(
"Empty output CUBIN for compilation of " 163 + identify<Kind>(program_handle, program_name));
168 template <source_kind_t Kind>
169 inline void get_cubin(
char* buffer,
program::handle_t<Kind> program_handle,
const char *program_name =
nullptr);
174 auto status = nvPTXCompilerGetCompiledProgram(program_handle, buffer);
175 throw_if_error<ptx>(status,
"Failed obtaining compilation output CUBIN for " 176 + identify<ptx>(program_handle, program_name));
182 auto status = nvrtcGetCUBIN(program_handle, buffer);
183 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation output CUBIN for " 184 + identify<cuda_cpp>(program_handle, program_name));
186 #endif // CUDA_VERSION >= 11010 191 auto status = nvrtcGetPTXSize(program_handle, &size);
192 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation output PTX size for compilation of " 193 + identify<cuda_cpp>(program_handle, program_name));
199 auto status = nvrtcGetPTX(program_handle, buffer);
201 + identify<cuda_cpp>(program_handle, program_name));
204 #if CUDA_VERSION >= 11040 209 #if CUDA_VERSION >= 12000 210 auto status = nvrtcGetLTOIRSize(program_handle, &size);
212 auto status = nvrtcGetNVVMSize(program_handle, &size);
215 + identify<cuda_cpp>(program_handle, program_name));
221 #if CUDA_VERSION >= 12000 222 auto status = nvrtcGetLTOIR(program_handle, buffer);
224 auto status = nvrtcGetNVVM(program_handle, buffer);
227 + identify<cuda_cpp>(program_handle, program_name));
229 #endif // CUDA_VERSION >= 11040 231 template <source_kind_t Kind>
234 #if CUDA_VERSION >= 11010 237 return nvPTXCompilerDestroy(&handle);
242 return nvrtcDestroyProgram(&handle);
250 namespace compilation_output {
254 template <source_kind_t Kind>
257 template <source_kind_t Kind>
260 ::std::string program_name,
278 template <source_kind_t Kind>
291 bool failed()
const {
return not succeeded_; }
294 operator bool()
const {
return succeeded_; }
295 const ::std::string& program_name()
const {
return program_name_; }
296 handle_type program_handle()
const {
return program_handle_; }
310 span<char>
log(span<char> buffer)
const 312 size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
313 if (buffer.size() < size) {
314 throw ::std::invalid_argument(
315 "Provided buffer size is insufficient for the program compilation log (" 316 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 317 + compilation_output::detail_::identify(*
this));
319 program::detail_::get_log(buffer.data(), program_handle_, program_name_.c_str());
320 return { buffer.data(), size };
330 unique_span<char>
log()
const 332 size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
333 auto result = make_unique_span<char>(size+1);
338 program::detail_::get_log<source_kind>(result.data(), program_handle_, program_name_.c_str());
343 #if CUDA_VERSION >= 11010 355 virtual span<char> cubin(span<char> buffer)
const = 0;
366 virtual unique_span<char> cubin()
const = 0;
370 virtual bool has_cubin()
const = 0;
375 : program_handle_(handle), program_name_(::std::move(name)), succeeded_(succeeded), owns_handle_(owning) { }
379 program_handle_(other.program_handle_),
380 program_name_(::std::move(other.program_name_)),
381 succeeded_(other.succeeded_),
382 owns_handle_(other.owns_handle_)
384 other.owns_handle_ =
false;
390 auto status = program::detail_::destroy_and_return_status<Kind>(program_handle_);
391 throw_if_error<Kind>(status,
"Destroying " + program::detail_::identify<Kind>(program_handle_, program_name_.c_str()));
402 ::std::string program_name_;
413 using parent::parent;
416 handle_type program_handle,
417 ::std::string program_name,
443 span<char>
ptx(span<char> buffer)
const 445 size_t size = program::detail_::get_ptx_size(parent::program_handle_, program_name_.c_str());
446 if (buffer.size() < size) {
447 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's PTX (" 448 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 449 + compilation_output::detail_::identify(*
this));
451 program::detail_::get_ptx(buffer.data(), program_handle_, program_name_.c_str());
452 return { buffer.data(), size };
462 unique_span<char>
ptx()
const 464 size_t size = program::detail_::get_ptx_size(program_handle_, program_name_.c_str());
465 auto result = make_unique_span<char>(size+1);
470 program::detail_::get_ptx(result.data(), program_handle_, program_name_.c_str());
480 status_type status = nvrtcGetPTXSize(program_handle_, &size);
481 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
482 throw_if_rtc_error_lazy(source_kind, status,
"Failed determining whether compilation resulted in PTX code for " 483 + compilation_output::detail_::identify<source_kind>(*
this));
485 throw ::std::logic_error(
"PTX size reported as 0 by " 486 + compilation_output::detail_::identify<source_kind>(*
this));
491 #if CUDA_VERSION >= 11010 492 span<char> cubin(span<char> buffer)
const override 494 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
495 if (buffer.size() < size) {
496 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's cubin (" 497 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 498 + compilation_output::detail_::identify(*
this));
500 program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
501 return { buffer.data(), size };
504 unique_span<char> cubin()
const override 506 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
507 auto result = make_unique_span<char>(size);
508 if (size == 0) {
return result; }
509 program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
513 bool has_cubin()
const override 516 auto status = nvrtcGetCUBINSize(program_handle_, &size);
517 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
519 + compilation_output::detail_::identify(*
this));
524 #if CUDA_VERSION >= 11040 539 span<char> lto_ir(span<char> buffer)
const 541 size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
542 if (buffer.size() < size) {
543 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's LTO IR (" 544 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 545 + compilation_output::detail_::identify(*
this));
547 program::detail_::get_lto_ir(buffer.data(), program_handle_, program_name_.c_str());
548 return { buffer.data(), size };
560 unique_span<char> lto_ir()
const 562 size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
563 auto result = make_unique_span<char>(size+1);
568 program::detail_::get_lto_ir(result.data(), program_handle_, program_name_.c_str());
575 bool has_lto_ir()
const 578 #if CUDA_VERSION >= 12000 579 auto status = nvrtcGetLTOIRSize(program_handle_, &size);
581 auto status = nvrtcGetNVVMSize(program_handle_, &size);
583 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
585 + compilation_output::detail_::identify(*
this));
587 throw ::std::logic_error(
"LTO IR size reported as 0 by NVRTC for program: " 588 + compilation_output::detail_::identify(*
this));
606 auto status = nvrtcGetLoweredName(program_handle_, unmangled_name, &result);
607 throw_if_error<source_kind>(status, ::std::string(
"Failed obtaining the mangled form of name \"")
608 + unmangled_name +
"\" in dynamically-compiled program \"" + program_name_ +
'\"');
615 return get_mangling_of(unmangled_name.c_str());
619 #if CUDA_VERSION >= 11010 625 using parent::parent;
628 handle_type program_handle,
629 ::std::string program_name,
634 span<char> cubin(span<char> buffer)
const override 636 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
637 if (buffer.size() < size) {
638 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's cubin (" 639 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 640 + compilation_output::detail_::identify<source_kind>(*this));
642 program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
643 return { buffer.data(), size };
646 unique_span<char> cubin()
const override 648 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
649 auto result = make_unique_span<char>(size+1);
654 program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
659 bool has_cubin()
const override 662 auto status = nvPTXCompilerGetCompiledProgramSize(program_handle_, &size);
663 if (status == NVPTXCOMPILE_ERROR_INVALID_INPUT) {
return false; }
664 throw_if_error<source_kind>(status,
"Failed determining whether the program has a compiled CUBIN result: " 665 + compilation_output::detail_::identify(*
this));
670 #endif // CUDA_VERSION >= 11010 672 namespace compilation_output {
676 template <source_kind_t Kind>
679 return "Compilation output of " + program::detail_::identify<Kind>(
680 compilation_output.program_handle(),
681 compilation_output.program_name().c_str());
684 template <source_kind_t Kind>
687 ::std::string program_name,
702 template<>
inline module_t create<cuda_cpp>(
707 if (not compilation_output.succeeded()) {
708 throw ::std::invalid_argument(
"Attempt to create a module after compilation failure of " 709 + cuda::rtc::program::detail_::identify<cuda_cpp>(compilation_output.program_handle()));
711 #if CUDA_VERSION >= 11010 712 auto program_handle = compilation_output.program_handle();
713 auto program_name = compilation_output.program_name().c_str();
714 static const bool dont_fail_on_missing_cubin {
false };
715 auto cubin_size = rtc::program::detail_::get_cubin_size<cuda_cpp, dont_fail_on_missing_cubin>(program_handle, program_name);
717 bool has_cubin = (cubin_size > 0);
719 auto cubin = make_unique_span<char>(cubin_size);
720 rtc::program::detail_::get_cubin<cuda_cpp>(cubin.data(), program_handle, program_name);
721 return module::create(context, cubin.get(), options);
726 auto ptx = compilation_output.ptx();
727 return module::create(context, ptx.get(), options);
730 #if CUDA_VERSION >= 11010 731 template<>
inline module_t create<source_kind_t::ptx>(
736 if (not compilation_output.succeeded()) {
737 throw ::std::invalid_argument(
"Attempt to create a module after compilation failure of " 738 + cuda::rtc::program::detail_::identify<source_kind_t::ptx>(compilation_output.program_handle()));
740 auto cubin = compilation_output.cubin();
741 return module::create(context, cubin.get(), options);
743 #endif // CUDA_VERSION >= 11010 748 template <source_kind_t Kind>
761 #endif // CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130
bool failed() const
Definition: compilation_output.hpp:291
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries's API calls: // The NVRTC librar...
Definition: types.hpp:124
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
typename detail_::types< Kind >::status_type status_t
Status values returned by the NVIDIA run-time compilation libraries's API calls: The NVRTC library fo...
Definition: types.hpp:131
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
Wrapper class for a CUDA code module.
Definition: module.hpp:123
bool has_ptx() const
Definition: compilation_output.hpp:477
The result of the compilation of an {rtc::program_t}, whether successful or failed, with any related byproducts.
Definition: compilation_output.hpp:279
Type definitions used in CUDA real-time compilation work wrappers.
span< char > log(span< char > buffer) const
Write a copy of the program compilation log into a user-provided buffer.
Definition: compilation_output.hpp:310
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:410
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device's primary context - the one used by runtime API calls.
Definition: device.hpp:152
bool succeeded() const
Definition: compilation_output.hpp:288
const char * get_mangling_of(const ::std::string &unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:613
unique_span< char > log() const
Obtain a copy of the compilation log.
Definition: compilation_output.hpp:330
NVIDIA's architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44
const char * get_mangling_of(const char *unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:603
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
unique_span< char > ptx() const
Obtain a copy of the PTX resulting from the program compilation.
Definition: compilation_output.hpp:462
Wrapper class for a CUDA device.
Definition: device.hpp:135
span< char > ptx(span< char > buffer) const
Obtain a (nul-terminated) copy of the PTX result of the last compilation.
Definition: compilation_output.hpp:443
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.