cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
compilation_output.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
8 #define CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
9 
10 #include "compilation_options.hpp"
11 #include "error.hpp"
12 #include "types.hpp"
13 #include "../api.hpp"
14 
15 #include <vector>
16 #include <iostream>
17 
18 namespace cuda {
19 
21 class device_t;
22 class context_t;
24 
25 namespace rtc {
26 
35 template <source_kind_t Kind>
37 
38 } // namespace rtc
39 
41 namespace link {
42 struct options_t;
43 } // namespace link
44 
45 namespace device {
46 class primary_context_t;
47 } // namespace device
48 
49 class module_t;
51 
52 namespace module {
53 
55 template <source_kind_t Kind>
56 inline module_t create(
57  const context_t& context,
58  const rtc::compilation_output_t<Kind>& compilation_output,
59  const link::options_t& options = {});
60 
61 } // namespace module
62 
63 namespace rtc {
64 
65 namespace program {
66 
67 namespace detail_ {
68 
69 template <source_kind_t Kind>
70 inline ::std::string identify(const char *name)
71 {
72  return ::std::string{detail_::kind_name(Kind)} + " program" +
73  ((name == nullptr) ? "" : " '" + ::std::string{name} + "'");
74 }
75 
76 template <source_kind_t Kind>
77 inline ::std::string identify(program::handle_t<Kind> handle, const char *name = nullptr)
78 {
79  return identify<Kind>(name) + " at " + cuda::detail_::ptr_as_hex(handle);
80 }
81 
82 template <source_kind_t Kind>
83 inline size_t get_log_size(program::handle_t<Kind> program_handle, const char* program_name);
84 
85 template <>
86 inline size_t get_log_size<cuda_cpp>(program::handle_t<cuda_cpp> program_handle, const char* program_name)
87 {
88  size_t size;
89  auto status = nvrtcGetProgramLogSize(program_handle, &size);
90  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation log size for "
91  + identify<cuda_cpp>(program_handle, program_name));
92  return (size > 0) ? size - 1 : 0;
93 }
94 
95 #if CUDA_VERSION >= 11010
96 template <>
97 inline size_t get_log_size<ptx>(program::handle_t<ptx> program_handle, const char* program_name)
98 {
99  size_t size;
100  auto status = nvPTXCompilerGetErrorLogSize(program_handle, &size);
101  throw_if_error<ptx>(status, "Failed obtaining compilation log size for "
102  + identify<ptx>(program_handle, program_name));
103  return size;
104 }
105 #endif // CUDA_VERSION >= 11010
106 
107 template <source_kind_t Kind>
108 inline void get_log(char* buffer, program::handle_t<Kind> program_handle, const char *program_name = nullptr);
109 
110 #if CUDA_VERSION >= 11010
111 template <>
112 inline void get_log<ptx>(char* buffer, program::handle_t<ptx> program_handle, const char *program_name)
113 {
114  auto status = nvPTXCompilerGetErrorLog(program_handle, buffer);
115 // (status_t<Kind>) nvrtcGetProgramLog((handle_t<cuda_cpp>)program_handle, buffer);
116  throw_if_error<ptx>(status, "Failed obtaining compilation log for "
117  + identify<ptx>(program_handle, program_name));
118 }
119 #endif
120 
121 template <>
122 inline void get_log<cuda_cpp>(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name)
123 {
124  auto status = nvrtcGetProgramLog(program_handle, buffer);
125  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation log for "
126  + identify<cuda_cpp>(program_handle, program_name));
127 }
128 
129 #if CUDA_VERSION >= 11010
130 template <source_kind_t Kind>
131 inline size_t get_cubin_size_or_zero(program::handle_t<Kind> program_handle, const char* program_name);
132 
133 template <>
134 inline size_t get_cubin_size_or_zero<ptx>(program::handle_t<ptx> program_handle, const char* program_name)
135 {
136  size_t size;
137  auto status = nvPTXCompilerGetCompiledProgramSize(program_handle, &size);
138  throw_if_error<ptx>(status, "Failed obtaining program output CUBIN size for "
139  + identify<ptx>(program_handle, program_name));
140  return size;
141 }
142 
143 template <>
144 inline size_t get_cubin_size_or_zero<cuda_cpp>(program::handle_t<cuda_cpp> program_handle, const char* program_name)
145 {
146  size_t size;
147  auto status = nvrtcGetCUBINSize(program_handle, &size);
148  throw_if_error<cuda_cpp>(status, "Failed obtaining program output CUBIN size for "
149  + identify<cuda_cpp>(program_handle, program_name));
150  return size;
151 }
152 
153 template <source_kind_t Kind, bool FailOnMissingCubin = true>
154 inline size_t get_cubin_size(program::handle_t<Kind> program_handle, const char* program_name)
155 {
156  auto size = get_cubin_size_or_zero<Kind>(program_handle, program_name);
157  const bool have_failed = (FailOnMissingCubin and size == 0);
158  if (have_failed) {
159  throw (Kind == cuda_cpp) ?
160  ::std::runtime_error("Output CUBIN requested for a compilation for a virtual architecture only of "
161  + identify<Kind>(program_handle, program_name)):
162  ::std::runtime_error("Empty output CUBIN for compilation of "
163  + identify<Kind>(program_handle, program_name));
164  }
165  return size;
166 }
167 
168 template <source_kind_t Kind>
169 inline void get_cubin(char* buffer, program::handle_t<Kind> program_handle, const char *program_name = nullptr);
170 
171 template <>
172 inline void get_cubin<ptx>(char* buffer, program::handle_t<ptx> program_handle, const char *program_name)
173 {
174  auto status = nvPTXCompilerGetCompiledProgram(program_handle, buffer);
175  throw_if_error<ptx>(status, "Failed obtaining compilation output CUBIN for "
176  + identify<ptx>(program_handle, program_name));
177 }
178 
179 template <>
180 inline void get_cubin<cuda_cpp>(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name)
181 {
182  auto status = nvrtcGetCUBIN(program_handle, buffer);
183  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation output CUBIN for "
184  + identify<cuda_cpp>(program_handle, program_name));
185 }
186 #endif // CUDA_VERSION >= 11010
187 
188 inline size_t get_ptx_size(program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
189 {
190  size_t size;
191  auto status = nvrtcGetPTXSize(program_handle, &size);
192  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation output PTX size for compilation of "
193  + identify<cuda_cpp>(program_handle, program_name));
194  return size;
195 }
196 
197 inline void get_ptx(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
198 {
199  auto status = nvrtcGetPTX(program_handle, buffer);
200  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining compilation output PTX for compilation of "
201  + identify<cuda_cpp>(program_handle, program_name));
202 }
203 
204 #if CUDA_VERSION >= 11040
205 
206 inline size_t get_lto_ir_size(program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
207 {
208  size_t size;
209 #if CUDA_VERSION >= 12000
210  auto status = nvrtcGetLTOIRSize(program_handle, &size);
211 #else
212  auto status = nvrtcGetNVVMSize(program_handle, &size);
213 #endif
214  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR size for compilation of "
215  + identify<cuda_cpp>(program_handle, program_name));
216  return size;
217 }
218 
219 inline void get_lto_ir(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
220 {
221 #if CUDA_VERSION >= 12000
222  auto status = nvrtcGetLTOIR(program_handle, buffer);
223 #else
224  auto status = nvrtcGetNVVM(program_handle, buffer);
225 #endif
226  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR code for compilation of "
227  + identify<cuda_cpp>(program_handle, program_name));
228 }
229 #endif // CUDA_VERSION >= 11040
230 
231 template <source_kind_t Kind>
232 status_t<Kind> destroy_and_return_status(handle_t<Kind> handle);
233 
234 #if CUDA_VERSION >= 11010
235 template <> inline status_t<ptx> destroy_and_return_status<ptx>(handle_t<ptx> handle)
236 {
237  return nvPTXCompilerDestroy(&handle);
238 }
239 #endif
240 template <> inline status_t<cuda_cpp> destroy_and_return_status<cuda_cpp>(handle_t<cuda_cpp> handle)
241 {
242  return nvrtcDestroyProgram(&handle);
243 }
244 
245 } // namespace detail_
246 
247 } // namespace program
248 
250 namespace compilation_output {
251 
252 namespace detail_ {
253 
254 template <source_kind_t Kind>
255 ::std::string identify(const compilation_output_t<Kind> &compilation_output);
256 
257 template <source_kind_t Kind>
258 inline compilation_output_t<Kind> wrap(
259  program::handle_t<Kind> program_handle,
260  ::std::string program_name,
261  bool succeeded,
262  bool own_handle);
263 
264 } // namespace detail
265 
266 } // namespace compilation_output
267 
278 template <source_kind_t Kind>
280 public: // types and constants
281  constexpr static const source_kind_t source_kind { Kind };
282  using handle_type = program::handle_t<source_kind>;
283  using status_type = status_t<source_kind>;
284 
285 public: // getters
286 
288  bool succeeded() const { return succeeded_; }
289 
291  bool failed() const { return not succeeded_; }
292 
294  operator bool() const { return succeeded_; }
295  const ::std::string& program_name() const { return program_name_; }
296  handle_type program_handle() const { return program_handle_; }
297 
298 public: // non-mutators
299 
310  span<char> log(span<char> buffer) const
311  {
312  size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
313  if (buffer.size() < size) {
314  throw ::std::invalid_argument(
315  "Provided buffer size is insufficient for the program compilation log ("
316  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
317  + compilation_output::detail_::identify(*this));
318  }
319  program::detail_::get_log(buffer.data(), program_handle_, program_name_.c_str());
320  return { buffer.data(), size };
321  }
322 
330  unique_span<char> log() const
331  {
332  size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
333  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
334  if (size == 0) {
335  result[size] = '\0';
336  return result;
337  }
338  program::detail_::get_log<source_kind>(result.data(), program_handle_, program_name_.c_str());
339  result[size] = '\0';
340  return result;
341  }
342 
343 #if CUDA_VERSION >= 11010
344 
355  virtual span<char> cubin(span<char> buffer) const = 0;
356 
366  virtual unique_span<char> cubin() const = 0;
367 
370  virtual bool has_cubin() const = 0;
371 #endif
372 
373 protected: // constructors
374  compilation_output_base_t(handle_type handle, ::std::string name, bool succeeded, bool owning = false)
375  : program_handle_(handle), program_name_(::std::move(name)), succeeded_(succeeded), owns_handle_(owning) { }
376 
377 public: // constructors & destructor
379  program_handle_(other.program_handle_),
380  program_name_(::std::move(other.program_name_)),
381  succeeded_(other.succeeded_),
382  owns_handle_(other.owns_handle_)
383  {
384  other.owns_handle_ = false;
385  };
386 
387  ~compilation_output_base_t() noexcept(false)
388  {
389  if (not owns_handle_) { return; }
390  auto status = program::detail_::destroy_and_return_status<Kind>(program_handle_);
391 #ifndef THROW_IN_DESTRUCTORS
392  throw_if_error<Kind>(status, "Destroying " + program::detail_::identify<Kind>(program_handle_, program_name_.c_str()));
393 #endif
394  }
395 
396 public: // operators
397 
398  compilation_output_base_t& operator=(const compilation_output_base_t& other) = delete;
399  compilation_output_base_t& operator=(compilation_output_base_t&& other) = delete;
400 
401 protected: // data members
402  program::handle_t<Kind> program_handle_;
403  ::std::string program_name_;
404  bool succeeded_;
405  bool owns_handle_;
406 
407 };
408 
410 template <>
412 public:
414  using parent::parent;
415 
416  friend compilation_output_t compilation_output::detail_::wrap<source_kind>(
417  handle_type program_handle,
418  ::std::string program_name,
419  bool succeeded,
420  bool own_handle);
421 
422 public: // non-mutators
430 
432 
444  span<char> ptx(span<char> buffer) const
445  {
446  size_t size = program::detail_::get_ptx_size(parent::program_handle_, program_name_.c_str());
447  if (buffer.size() < size) {
448  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's PTX ("
449  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
450  + compilation_output::detail_::identify(*this));
451  }
452  program::detail_::get_ptx(buffer.data(), program_handle_, program_name_.c_str());
453  return { buffer.data(), size };
454  }
455 
463  unique_span<char> ptx() const
464  {
465  size_t size = program::detail_::get_ptx_size(program_handle_, program_name_.c_str());
466  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
467  if (size == 0) {
468  result[size] = '\0';
469  return result;
470  }
471  program::detail_::get_ptx(result.data(), program_handle_, program_name_.c_str());
472  result[size] = '\0';
473  return result;
474  }
475 
478  bool has_ptx() const
479  {
480  size_t size;
481  status_type status = nvrtcGetPTXSize(program_handle_, &size);
482  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
483  throw_if_rtc_error_lazy(source_kind, status, "Failed determining whether compilation resulted in PTX code for "
484  + compilation_output::detail_::identify<source_kind>(*this));
485  if (size == 0) {
486  throw ::std::logic_error("PTX size reported as 0 by "
487  + compilation_output::detail_::identify<source_kind>(*this));
488  }
489  return true;
490  }
491 
492 #if CUDA_VERSION >= 11010
493  span<char> cubin(span<char> buffer) const override
494  {
495  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
496  if (buffer.size() < size) {
497  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's cubin ("
498  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
499  + compilation_output::detail_::identify(*this));
500  }
501  program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
502  return { buffer.data(), size };
503  }
504 
505  unique_span<char> cubin() const override
506  {
507  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
508  auto result = make_unique_span<char>(size);
509  if (size == 0) { return result; }
510  program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
511  return result;
512  }
513 
514  bool has_cubin() const override
515  {
516  size_t size;
517  auto status = nvrtcGetCUBINSize(program_handle_, &size);
518  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
519  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the program has a compiled CUBIN result: "
520  + compilation_output::detail_::identify(*this));
521  return (size > 0);
522  }
523 #endif
524 
525 #if CUDA_VERSION >= 11040
526 
540  span<char> lto_ir(span<char> buffer) const
541  {
542  size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
543  if (buffer.size() < size) {
544  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's LTO IR ("
545  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
546  + compilation_output::detail_::identify(*this));
547  }
548  program::detail_::get_lto_ir(buffer.data(), program_handle_, program_name_.c_str());
549  return { buffer.data(), size };
550  }
551 
561  unique_span<char> lto_ir() const
562  {
563  size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
564  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
565  if (size == 0) {
566  result[size] = '\0';
567  return result;
568  }
569  program::detail_::get_lto_ir(result.data(), program_handle_, program_name_.c_str());
570  result[size] = '\0';
571  return result;
572  }
573 
576  bool has_lto_ir() const
577  {
578  size_t size;
579 #if CUDA_VERSION >= 12000
580  auto status = nvrtcGetLTOIRSize(program_handle_, &size);
581 #else
582  auto status = nvrtcGetNVVMSize(program_handle_, &size);
583 #endif
584  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
585  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the NVRTC program has a compiled LTO IR result: "
586  + compilation_output::detail_::identify(*this));
587  if (size == 0) {
588  throw ::std::logic_error("LTO IR size reported as 0 by NVRTC for program: "
589  + compilation_output::detail_::identify(*this));
590  }
591  return true;
592  }
593 #endif
594 
604  const char* get_mangling_of(const char* unmangled_name) const
605  {
606  const char* result;
607  auto status = nvrtcGetLoweredName(program_handle_, unmangled_name, &result);
608  throw_if_error<source_kind>(status, ::std::string("Failed obtaining the mangled form of name \"")
609  + unmangled_name + "\" in dynamically-compiled program \"" + program_name_ + '\"');
610  return result;
611  }
612 
614  const char* get_mangling_of(const ::std::string& unmangled_name) const
615  {
616  return get_mangling_of(unmangled_name.c_str());
617  }
618 }; // class compilation_output_t<cuda_cpp>
619 
620 #if CUDA_VERSION >= 11010
621 
622 template <>
623 class compilation_output_t<ptx> : public compilation_output_base_t<ptx> {
624 public:
626  using parent::parent;
627 
628  friend compilation_output_t compilation_output::detail_::wrap<source_kind>(
629  handle_type program_handle,
630  ::std::string program_name,
631  bool succeeded,
632  bool own_handle);
633 
634 public: // non-mutators
635  span<char> cubin(span<char> buffer) const override
636  {
637  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
638  if (buffer.size() < size) {
639  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's cubin ("
640  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
641  + compilation_output::detail_::identify<source_kind>(*this));
642  }
643  program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
644  return { buffer.data(), size };
645  }
646 
647  unique_span<char> cubin() const override
648  {
649  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
650  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
651  if (size == 0) {
652  result[size] = '\0';
653  return result;
654  }
655  program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
656  result[size] = '\0';
657  return result;
658  }
659 
660  bool has_cubin() const override
661  {
662  size_t size;
663  auto status = nvPTXCompilerGetCompiledProgramSize(program_handle_, &size);
664  if (status == NVPTXCOMPILE_ERROR_INVALID_INPUT) { return false; }
665  throw_if_error<source_kind>(status, "Failed determining whether the program has a compiled CUBIN result: "
666  + compilation_output::detail_::identify(*this));
667  return (size > 0);
668  }
669 }; // class compilation_output_t<ptx>
670 
671 #endif // CUDA_VERSION >= 11010
672 
673 namespace compilation_output {
674 
675 namespace detail_ {
676 
677 template <source_kind_t Kind>
678 inline ::std::string identify(const compilation_output_t<Kind> &compilation_output)
679 {
680  return "Compilation output of " + program::detail_::identify<Kind>(
681  compilation_output.program_handle(),
682  compilation_output.program_name().c_str());
683 }
684 
685 template <source_kind_t Kind>
686 inline compilation_output_t<Kind> wrap(
687  program::handle_t<Kind> program_handle,
688  ::std::string program_name,
689  bool succeeded,
690  bool own_handle)
691 {
692  return compilation_output_t<Kind>{program_handle, ::std::move(program_name), succeeded, own_handle};
693 }
694 
695 } // namespace detail_
696 
697 } // namespace compilation_output
698 
699 } // namespace rtc
700 
701 namespace module {
702 
703 template<> inline module_t create<cuda_cpp>(
704  const context_t& context,
705  const rtc::compilation_output_t<cuda_cpp>& compilation_output,
706  const link::options_t& options)
707 {
708  if (not compilation_output.succeeded()) {
709  throw ::std::invalid_argument("Attempt to create a module after compilation failure of "
710  + cuda::rtc::program::detail_::identify<cuda_cpp>(compilation_output.program_handle()));
711  }
712 #if CUDA_VERSION >= 11010
713  auto program_handle = compilation_output.program_handle();
714  auto program_name = compilation_output.program_name().c_str();
715  static const bool dont_fail_on_missing_cubin { false };
716  auto cubin_size = rtc::program::detail_::get_cubin_size<cuda_cpp, dont_fail_on_missing_cubin>(program_handle, program_name);
717  // Note: The above won't fail even if no CUBIN was produced
718  bool has_cubin = (cubin_size > 0);
719  if (has_cubin) {
720  auto cubin = make_unique_span<char>(cubin_size);
721  rtc::program::detail_::get_cubin<cuda_cpp>(cubin.data(), program_handle, program_name);
722  return module::create(context, cubin.get(), options);
723  }
724  // Note: At this point, we must have PTX in the output, as otherwise the compilation could
725  // not have succeeded
726 #endif
727  auto ptx = compilation_output.ptx();
728  return module::create(context, ptx.get(), options);
729 }
730 
731 #if CUDA_VERSION >= 11010
732 template<> inline module_t create<source_kind_t::ptx>(
733  const context_t& context,
734  const rtc::compilation_output_t<source_kind_t::ptx>& compilation_output,
735  const link::options_t& options)
736 {
737  if (not compilation_output.succeeded()) {
738  throw ::std::invalid_argument("Attempt to create a module after compilation failure of "
739  + cuda::rtc::program::detail_::identify<source_kind_t::ptx>(compilation_output.program_handle()));
740  }
741  auto cubin = compilation_output.cubin();
742  return module::create(context, cubin.get(), options);
743 }
744 #endif // CUDA_VERSION >= 11010
745 
746 
749 template <source_kind_t Kind>
750 inline module_t create(
751  device_t& device,
752  const rtc::compilation_output_t<Kind>& compilation_output,
753  const link::options_t& options = {})
754 {
755  return create(device.primary_context(), compilation_output, options);
756 }
757 
758 } // namespace module
759 
760 } // namespace cuda
761 
762 #endif // CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
bool failed() const
Definition: compilation_output.hpp:291
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries&#39;s API calls: // The NVRTC librar...
Definition: types.hpp:124
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
typename detail_::types< Kind >::status_type status_t
Status values returned by the NVIDIA run-time compilation libraries&#39;s API calls: The NVRTC library fo...
Definition: types.hpp:131
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
Wrapper class for a CUDA code module.
Definition: module.hpp:126
bool has_ptx() const
Definition: compilation_output.hpp:478
The result of the compilation of an {rtc::program_t}, whether successful or failed, with any related byproducts.
Definition: compilation_output.hpp:279
Type definitions used in CUDA real-time compilation work wrappers.
span< char > log(span< char > buffer) const
Write a copy of the program compilation log into a user-provided buffer.
Definition: compilation_output.hpp:310
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:411
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device&#39;s primary context - the one used by runtime API calls.
Definition: device.hpp:152
bool succeeded() const
Definition: compilation_output.hpp:288
const char * get_mangling_of(const ::std::string &unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:614
unique_span< char > log() const
Obtain a copy of the compilation log.
Definition: compilation_output.hpp:330
NVIDIA&#39;s architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44
const char * get_mangling_of(const char *unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:604
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
unique_span< char > ptx() const
Obtain a copy of the PTX resulting from the program compilation.
Definition: compilation_output.hpp:463
Wrapper class for a CUDA device.
Definition: device.hpp:135
span< char > ptx(span< char > buffer) const
Obtain a (nul-terminated) copy of the PTX result of the last compilation.
Definition: compilation_output.hpp:444
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.