7 #ifndef CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ 8 #define CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ 35 template <source_kind_t Kind>
46 class primary_context_t;
55 template <source_kind_t Kind>
69 template <source_kind_t Kind>
70 inline ::std::string identify(
const char *name)
72 return ::std::string{detail_::kind_name(Kind)} +
" program" +
73 ((name ==
nullptr) ?
"" :
" '" + ::std::string{name} +
"'");
76 template <source_kind_t Kind>
79 return identify<Kind>(name) +
" at " + cuda::detail_::ptr_as_hex(handle);
82 template <source_kind_t Kind>
89 auto status = nvrtcGetProgramLogSize(program_handle, &size);
90 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation log size for " 91 + identify<cuda_cpp>(program_handle, program_name));
92 return (size > 0) ? size - 1 : 0;
95 #if CUDA_VERSION >= 11010 100 auto status = nvPTXCompilerGetErrorLogSize(program_handle, &size);
101 throw_if_error<ptx>(status,
"Failed obtaining compilation log size for " 102 + identify<ptx>(program_handle, program_name));
105 #endif // CUDA_VERSION >= 11010 107 template <source_kind_t Kind>
110 #if CUDA_VERSION >= 11010 114 auto status = nvPTXCompilerGetErrorLog(program_handle, buffer);
116 throw_if_error<ptx>(status,
"Failed obtaining compilation log for " 117 + identify<ptx>(program_handle, program_name));
124 auto status = nvrtcGetProgramLog(program_handle, buffer);
125 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation log for " 126 + identify<cuda_cpp>(program_handle, program_name));
129 #if CUDA_VERSION >= 11010 130 template <source_kind_t Kind>
134 inline size_t get_cubin_size_or_zero<ptx>(
program::handle_t<ptx> program_handle,
const char* program_name)
137 auto status = nvPTXCompilerGetCompiledProgramSize(program_handle, &size);
138 throw_if_error<ptx>(status,
"Failed obtaining program output CUBIN size for " 139 + identify<ptx>(program_handle, program_name));
147 auto status = nvrtcGetCUBINSize(program_handle, &size);
148 throw_if_error<cuda_cpp>(status,
"Failed obtaining program output CUBIN size for " 149 + identify<cuda_cpp>(program_handle, program_name));
153 template <source_kind_t Kind,
bool FailOnMissingCubin = true>
156 auto size = get_cubin_size_or_zero<Kind>(program_handle, program_name);
157 const bool have_failed = (FailOnMissingCubin and size == 0);
160 ::std::runtime_error(
"Output CUBIN requested for a compilation for a virtual architecture only of " 161 + identify<Kind>(program_handle, program_name)):
162 ::std::runtime_error(
"Empty output CUBIN for compilation of " 163 + identify<Kind>(program_handle, program_name));
168 template <source_kind_t Kind>
169 inline void get_cubin(
char* buffer,
program::handle_t<Kind> program_handle,
const char *program_name =
nullptr);
174 auto status = nvPTXCompilerGetCompiledProgram(program_handle, buffer);
175 throw_if_error<ptx>(status,
"Failed obtaining compilation output CUBIN for " 176 + identify<ptx>(program_handle, program_name));
182 auto status = nvrtcGetCUBIN(program_handle, buffer);
183 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation output CUBIN for " 184 + identify<cuda_cpp>(program_handle, program_name));
186 #endif // CUDA_VERSION >= 11010 191 auto status = nvrtcGetPTXSize(program_handle, &size);
192 throw_if_error<cuda_cpp>(status,
"Failed obtaining compilation output PTX size for compilation of " 193 + identify<cuda_cpp>(program_handle, program_name));
199 auto status = nvrtcGetPTX(program_handle, buffer);
201 + identify<cuda_cpp>(program_handle, program_name));
204 #if CUDA_VERSION >= 11040 209 #if CUDA_VERSION >= 12000 210 auto status = nvrtcGetLTOIRSize(program_handle, &size);
212 auto status = nvrtcGetNVVMSize(program_handle, &size);
215 + identify<cuda_cpp>(program_handle, program_name));
221 #if CUDA_VERSION >= 12000 222 auto status = nvrtcGetLTOIR(program_handle, buffer);
224 auto status = nvrtcGetNVVM(program_handle, buffer);
227 + identify<cuda_cpp>(program_handle, program_name));
229 #endif // CUDA_VERSION >= 11040 231 template <source_kind_t Kind>
234 #if CUDA_VERSION >= 11010 237 return nvPTXCompilerDestroy(&handle);
242 return nvrtcDestroyProgram(&handle);
250 namespace compilation_output {
254 template <source_kind_t Kind>
257 template <source_kind_t Kind>
260 ::std::string program_name,
278 template <source_kind_t Kind>
291 bool failed()
const {
return not succeeded_; }
294 operator bool()
const {
return succeeded_; }
295 const ::std::string& program_name()
const {
return program_name_; }
296 handle_type program_handle()
const {
return program_handle_; }
310 span<char>
log(span<char> buffer)
const 312 size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
313 if (buffer.size() < size) {
314 throw ::std::invalid_argument(
315 "Provided buffer size is insufficient for the program compilation log (" 316 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 317 + compilation_output::detail_::identify(*
this));
319 program::detail_::get_log(buffer.data(), program_handle_, program_name_.c_str());
320 return { buffer.data(), size };
330 unique_span<char>
log()
const 332 size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
333 auto result = make_unique_span<char>(size+1);
338 program::detail_::get_log<source_kind>(result.data(), program_handle_, program_name_.c_str());
343 #if CUDA_VERSION >= 11010 355 virtual span<char> cubin(span<char> buffer)
const = 0;
366 virtual unique_span<char> cubin()
const = 0;
370 virtual bool has_cubin()
const = 0;
375 : program_handle_(handle), program_name_(::std::move(name)), succeeded_(succeeded), owns_handle_(owning) { }
379 program_handle_(other.program_handle_),
380 program_name_(::std::move(other.program_name_)),
381 succeeded_(other.succeeded_),
382 owns_handle_(other.owns_handle_)
384 other.owns_handle_ =
false;
389 if (not owns_handle_) {
return; }
390 auto status = program::detail_::destroy_and_return_status<Kind>(program_handle_);
391 #ifndef THROW_IN_DESTRUCTORS 392 throw_if_error<Kind>(status,
"Destroying " + program::detail_::identify<Kind>(program_handle_, program_name_.c_str()));
403 ::std::string program_name_;
414 using parent::parent;
417 handle_type program_handle,
418 ::std::string program_name,
444 span<char>
ptx(span<char> buffer)
const 446 size_t size = program::detail_::get_ptx_size(parent::program_handle_, program_name_.c_str());
447 if (buffer.size() < size) {
448 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's PTX (" 449 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 450 + compilation_output::detail_::identify(*
this));
452 program::detail_::get_ptx(buffer.data(), program_handle_, program_name_.c_str());
453 return { buffer.data(), size };
463 unique_span<char>
ptx()
const 465 size_t size = program::detail_::get_ptx_size(program_handle_, program_name_.c_str());
466 auto result = make_unique_span<char>(size+1);
471 program::detail_::get_ptx(result.data(), program_handle_, program_name_.c_str());
481 status_type status = nvrtcGetPTXSize(program_handle_, &size);
482 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
483 throw_if_rtc_error_lazy(source_kind, status,
"Failed determining whether compilation resulted in PTX code for " 484 + compilation_output::detail_::identify<source_kind>(*
this));
486 throw ::std::logic_error(
"PTX size reported as 0 by " 487 + compilation_output::detail_::identify<source_kind>(*
this));
492 #if CUDA_VERSION >= 11010 493 span<char> cubin(span<char> buffer)
const override 495 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
496 if (buffer.size() < size) {
497 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's cubin (" 498 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 499 + compilation_output::detail_::identify(*
this));
501 program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
502 return { buffer.data(), size };
505 unique_span<char> cubin()
const override 507 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
508 auto result = make_unique_span<char>(size);
509 if (size == 0) {
return result; }
510 program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
514 bool has_cubin()
const override 517 auto status = nvrtcGetCUBINSize(program_handle_, &size);
518 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
520 + compilation_output::detail_::identify(*
this));
525 #if CUDA_VERSION >= 11040 540 span<char> lto_ir(span<char> buffer)
const 542 size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
543 if (buffer.size() < size) {
544 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's LTO IR (" 545 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 546 + compilation_output::detail_::identify(*
this));
548 program::detail_::get_lto_ir(buffer.data(), program_handle_, program_name_.c_str());
549 return { buffer.data(), size };
561 unique_span<char> lto_ir()
const 563 size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
564 auto result = make_unique_span<char>(size+1);
569 program::detail_::get_lto_ir(result.data(), program_handle_, program_name_.c_str());
576 bool has_lto_ir()
const 579 #if CUDA_VERSION >= 12000 580 auto status = nvrtcGetLTOIRSize(program_handle_, &size);
582 auto status = nvrtcGetNVVMSize(program_handle_, &size);
584 if (status == NVRTC_ERROR_INVALID_PROGRAM) {
return false; }
586 + compilation_output::detail_::identify(*
this));
588 throw ::std::logic_error(
"LTO IR size reported as 0 by NVRTC for program: " 589 + compilation_output::detail_::identify(*
this));
607 auto status = nvrtcGetLoweredName(program_handle_, unmangled_name, &result);
608 throw_if_error<source_kind>(status, ::std::string(
"Failed obtaining the mangled form of name \"")
609 + unmangled_name +
"\" in dynamically-compiled program \"" + program_name_ +
'\"');
616 return get_mangling_of(unmangled_name.c_str());
620 #if CUDA_VERSION >= 11010 626 using parent::parent;
629 handle_type program_handle,
630 ::std::string program_name,
635 span<char> cubin(span<char> buffer)
const override 637 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
638 if (buffer.size() < size) {
639 throw ::std::invalid_argument(
"Provided buffer size is insufficient for the compiled program's cubin (" 640 + ::std::to_string(buffer.size()) +
" < " + ::std::to_string(size) +
": " 641 + compilation_output::detail_::identify<source_kind>(*this));
643 program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
644 return { buffer.data(), size };
647 unique_span<char> cubin()
const override 649 size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
650 auto result = make_unique_span<char>(size+1);
655 program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
660 bool has_cubin()
const override 663 auto status = nvPTXCompilerGetCompiledProgramSize(program_handle_, &size);
664 if (status == NVPTXCOMPILE_ERROR_INVALID_INPUT) {
return false; }
665 throw_if_error<source_kind>(status,
"Failed determining whether the program has a compiled CUBIN result: " 666 + compilation_output::detail_::identify(*
this));
671 #endif // CUDA_VERSION >= 11010 673 namespace compilation_output {
677 template <source_kind_t Kind>
680 return "Compilation output of " + program::detail_::identify<Kind>(
681 compilation_output.program_handle(),
682 compilation_output.program_name().c_str());
685 template <source_kind_t Kind>
688 ::std::string program_name,
703 template<>
inline module_t create<cuda_cpp>(
708 if (not compilation_output.succeeded()) {
709 throw ::std::invalid_argument(
"Attempt to create a module after compilation failure of " 710 + cuda::rtc::program::detail_::identify<cuda_cpp>(compilation_output.program_handle()));
712 #if CUDA_VERSION >= 11010 713 auto program_handle = compilation_output.program_handle();
714 auto program_name = compilation_output.program_name().c_str();
715 static const bool dont_fail_on_missing_cubin {
false };
716 auto cubin_size = rtc::program::detail_::get_cubin_size<cuda_cpp, dont_fail_on_missing_cubin>(program_handle, program_name);
718 bool has_cubin = (cubin_size > 0);
720 auto cubin = make_unique_span<char>(cubin_size);
721 rtc::program::detail_::get_cubin<cuda_cpp>(cubin.data(), program_handle, program_name);
722 return module::create(context, cubin.get(), options);
727 auto ptx = compilation_output.ptx();
728 return module::create(context, ptx.get(), options);
731 #if CUDA_VERSION >= 11010 732 template<>
inline module_t create<source_kind_t::ptx>(
737 if (not compilation_output.succeeded()) {
738 throw ::std::invalid_argument(
"Attempt to create a module after compilation failure of " 739 + cuda::rtc::program::detail_::identify<source_kind_t::ptx>(compilation_output.program_handle()));
741 auto cubin = compilation_output.cubin();
742 return module::create(context, cubin.get(), options);
744 #endif // CUDA_VERSION >= 11010 749 template <source_kind_t Kind>
762 #endif // CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_ A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130
bool failed() const
Definition: compilation_output.hpp:291
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries's API calls: // The NVRTC librar...
Definition: types.hpp:124
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
typename detail_::types< Kind >::status_type status_t
Status values returned by the NVIDIA run-time compilation libraries's API calls: The NVRTC library fo...
Definition: types.hpp:131
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
Wrapper class for a CUDA code module.
Definition: module.hpp:126
bool has_ptx() const
Definition: compilation_output.hpp:478
The result of the compilation of an {rtc::program_t}, whether successful or failed, with any related byproducts.
Definition: compilation_output.hpp:279
Type definitions used in CUDA real-time compilation work wrappers.
span< char > log(span< char > buffer) const
Write a copy of the program compilation log into a user-provided buffer.
Definition: compilation_output.hpp:310
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:411
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device's primary context - the one used by runtime API calls.
Definition: device.hpp:152
bool succeeded() const
Definition: compilation_output.hpp:288
const char * get_mangling_of(const ::std::string &unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:614
unique_span< char > log() const
Obtain a copy of the compilation log.
Definition: compilation_output.hpp:330
NVIDIA's architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44
const char * get_mangling_of(const char *unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:604
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
unique_span< char > ptx() const
Obtain a copy of the PTX resulting from the program compilation.
Definition: compilation_output.hpp:463
Wrapper class for a CUDA device.
Definition: device.hpp:135
span< char > ptx(span< char > buffer) const
Obtain a (nul-terminated) copy of the PTX result of the last compilation.
Definition: compilation_output.hpp:444
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.