cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
compilation_output.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
8 #define CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
9 
10 #include "compilation_options.hpp"
11 #include "error.hpp"
12 #include "types.hpp"
13 #include "../api.hpp"
14 
15 #include <vector>
16 #include <iostream>
17 
18 namespace cuda {
19 
21 class device_t;
22 class context_t;
24 
25 namespace rtc {
26 
35 template <source_kind_t Kind>
37 
38 } // namespace rtc
39 
41 namespace link {
42 struct options_t;
43 } // namespace link
44 
45 namespace device {
46 class primary_context_t;
47 } // namespace device
48 
49 class module_t;
51 
52 namespace module {
53 
55 template <source_kind_t Kind>
56 inline module_t create(
57  const context_t& context,
58  const rtc::compilation_output_t<Kind>& compilation_output,
59  const link::options_t& options = {});
60 
61 } // namespace module
62 
63 namespace rtc {
64 
65 namespace program {
66 
67 namespace detail_ {
68 
69 template <source_kind_t Kind>
70 inline ::std::string identify(const char *name)
71 {
72  return ::std::string{detail_::kind_name(Kind)} + " program" +
73  ((name == nullptr) ? "" : " '" + ::std::string{name} + "'");
74 }
75 
76 template <source_kind_t Kind>
77 inline ::std::string identify(program::handle_t<Kind> handle, const char *name = nullptr)
78 {
79  return identify<Kind>(name) + " at " + cuda::detail_::ptr_as_hex(handle);
80 }
81 
82 template <source_kind_t Kind>
83 inline size_t get_log_size(program::handle_t<Kind> program_handle, const char* program_name);
84 
85 template <>
86 inline size_t get_log_size<cuda_cpp>(program::handle_t<cuda_cpp> program_handle, const char* program_name)
87 {
88  size_t size;
89  auto status = nvrtcGetProgramLogSize(program_handle, &size);
90  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation log size for "
91  + identify<cuda_cpp>(program_handle, program_name));
92  return (size > 0) ? size - 1 : 0;
93 }
94 
95 #if CUDA_VERSION >= 11010
96 template <>
97 inline size_t get_log_size<ptx>(program::handle_t<ptx> program_handle, const char* program_name)
98 {
99  size_t size;
100  auto status = nvPTXCompilerGetErrorLogSize(program_handle, &size);
101  throw_if_error<ptx>(status, "Failed obtaining compilation log size for "
102  + identify<ptx>(program_handle, program_name));
103  return size;
104 }
105 #endif // CUDA_VERSION >= 11010
106 
107 template <source_kind_t Kind>
108 inline void get_log(char* buffer, program::handle_t<Kind> program_handle, const char *program_name = nullptr);
109 
110 #if CUDA_VERSION >= 11010
111 template <>
112 inline void get_log<ptx>(char* buffer, program::handle_t<ptx> program_handle, const char *program_name)
113 {
114  auto status = nvPTXCompilerGetErrorLog(program_handle, buffer);
115 // (status_t<Kind>) nvrtcGetProgramLog((handle_t<cuda_cpp>)program_handle, buffer);
116  throw_if_error<ptx>(status, "Failed obtaining compilation log for "
117  + identify<ptx>(program_handle, program_name));
118 }
119 #endif
120 
121 template <>
122 inline void get_log<cuda_cpp>(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name)
123 {
124  auto status = nvrtcGetProgramLog(program_handle, buffer);
125  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation log for "
126  + identify<cuda_cpp>(program_handle, program_name));
127 }
128 
129 #if CUDA_VERSION >= 11010
130 template <source_kind_t Kind>
131 inline size_t get_cubin_size_or_zero(program::handle_t<Kind> program_handle, const char* program_name);
132 
133 template <>
134 inline size_t get_cubin_size_or_zero<ptx>(program::handle_t<ptx> program_handle, const char* program_name)
135 {
136  size_t size;
137  auto status = nvPTXCompilerGetCompiledProgramSize(program_handle, &size);
138  throw_if_error<ptx>(status, "Failed obtaining program output CUBIN size for "
139  + identify<ptx>(program_handle, program_name));
140  return size;
141 }
142 
143 template <>
144 inline size_t get_cubin_size_or_zero<cuda_cpp>(program::handle_t<cuda_cpp> program_handle, const char* program_name)
145 {
146  size_t size;
147  auto status = nvrtcGetCUBINSize(program_handle, &size);
148  throw_if_error<cuda_cpp>(status, "Failed obtaining program output CUBIN size for "
149  + identify<cuda_cpp>(program_handle, program_name));
150  return size;
151 }
152 
153 template <source_kind_t Kind, bool FailOnMissingCubin = true>
154 inline size_t get_cubin_size(program::handle_t<Kind> program_handle, const char* program_name)
155 {
156  auto size = get_cubin_size_or_zero<Kind>(program_handle, program_name);
157  const bool have_failed = (FailOnMissingCubin and size == 0);
158  if (have_failed) {
159  throw (Kind == cuda_cpp) ?
160  ::std::runtime_error("Output CUBIN requested for a compilation for a virtual architecture only of "
161  + identify<Kind>(program_handle, program_name)):
162  ::std::runtime_error("Empty output CUBIN for compilation of "
163  + identify<Kind>(program_handle, program_name));
164  }
165  return size;
166 }
167 
168 template <source_kind_t Kind>
169 inline void get_cubin(char* buffer, program::handle_t<Kind> program_handle, const char *program_name = nullptr);
170 
171 template <>
172 inline void get_cubin<ptx>(char* buffer, program::handle_t<ptx> program_handle, const char *program_name)
173 {
174  auto status = nvPTXCompilerGetCompiledProgram(program_handle, buffer);
175  throw_if_error<ptx>(status, "Failed obtaining compilation output CUBIN for "
176  + identify<ptx>(program_handle, program_name));
177 }
178 
179 template <>
180 inline void get_cubin<cuda_cpp>(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name)
181 {
182  auto status = nvrtcGetCUBIN(program_handle, buffer);
183  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation output CUBIN for "
184  + identify<cuda_cpp>(program_handle, program_name));
185 }
186 #endif // CUDA_VERSION >= 11010
187 
188 inline size_t get_ptx_size(program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
189 {
190  size_t size;
191  auto status = nvrtcGetPTXSize(program_handle, &size);
192  throw_if_error<cuda_cpp>(status, "Failed obtaining compilation output PTX size for compilation of "
193  + identify<cuda_cpp>(program_handle, program_name));
194  return size;
195 }
196 
197 inline void get_ptx(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
198 {
199  auto status = nvrtcGetPTX(program_handle, buffer);
200  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining compilation output PTX for compilation of "
201  + identify<cuda_cpp>(program_handle, program_name));
202 }
203 
204 #if CUDA_VERSION >= 11040
205 
206 inline size_t get_lto_ir_size(program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
207 {
208  size_t size;
209 #if CUDA_VERSION >= 12000
210  auto status = nvrtcGetLTOIRSize(program_handle, &size);
211 #else
212  auto status = nvrtcGetNVVMSize(program_handle, &size);
213 #endif
214  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR size for compilation of "
215  + identify<cuda_cpp>(program_handle, program_name));
216  return size;
217 }
218 
219 inline void get_lto_ir(char* buffer, program::handle_t<cuda_cpp> program_handle, const char *program_name = nullptr)
220 {
221 #if CUDA_VERSION >= 12000
222  auto status = nvrtcGetLTOIR(program_handle, buffer);
223 #else
224  auto status = nvrtcGetNVVM(program_handle, buffer);
225 #endif
226  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR code for compilation of "
227  + identify<cuda_cpp>(program_handle, program_name));
228 }
229 #endif // CUDA_VERSION >= 11040
230 
231 template <source_kind_t Kind>
232 status_t<Kind> destroy_and_return_status(handle_t<Kind> handle);
233 
234 #if CUDA_VERSION >= 11010
235 template <> inline status_t<ptx> destroy_and_return_status<ptx>(handle_t<ptx> handle)
236 {
237  return nvPTXCompilerDestroy(&handle);
238 }
239 #endif
240 template <> inline status_t<cuda_cpp> destroy_and_return_status<cuda_cpp>(handle_t<cuda_cpp> handle)
241 {
242  return nvrtcDestroyProgram(&handle);
243 }
244 
245 } // namespace detail_
246 
247 } // namespace program
248 
250 namespace compilation_output {
251 
252 namespace detail_ {
253 
254 template <source_kind_t Kind>
255 ::std::string identify(const compilation_output_t<Kind> &compilation_output);
256 
257 template <source_kind_t Kind>
258 inline compilation_output_t<Kind> wrap(
259  program::handle_t<Kind> program_handle,
260  ::std::string program_name,
261  bool succeeded,
262  bool own_handle);
263 
264 } // namespace detail
265 
266 } // namespace compilation_output
267 
278 template <source_kind_t Kind>
280 public: // types and constants
281  constexpr static const source_kind_t source_kind { Kind };
282  using handle_type = program::handle_t<source_kind>;
283  using status_type = status_t<source_kind>;
284 
285 public: // getters
286 
288  bool succeeded() const { return succeeded_; }
289 
291  bool failed() const { return not succeeded_; }
292 
294  operator bool() const { return succeeded_; }
295  const ::std::string& program_name() const { return program_name_; }
296  handle_type program_handle() const { return program_handle_; }
297 
298 public: // non-mutators
299 
310  span<char> log(span<char> buffer) const
311  {
312  size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
313  if (buffer.size() < size) {
314  throw ::std::invalid_argument(
315  "Provided buffer size is insufficient for the program compilation log ("
316  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
317  + compilation_output::detail_::identify(*this));
318  }
319  program::detail_::get_log(buffer.data(), program_handle_, program_name_.c_str());
320  return { buffer.data(), size };
321  }
322 
330  unique_span<char> log() const
331  {
332  size_t size = program::detail_::get_log_size<source_kind>(program_handle_, program_name_.c_str());
333  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
334  if (size == 0) {
335  result[size] = '\0';
336  return result;
337  }
338  program::detail_::get_log<source_kind>(result.data(), program_handle_, program_name_.c_str());
339  result[size] = '\0';
340  return result;
341  }
342 
343 #if CUDA_VERSION >= 11010
344 
355  virtual span<char> cubin(span<char> buffer) const = 0;
356 
366  virtual unique_span<char> cubin() const = 0;
367 
370  virtual bool has_cubin() const = 0;
371 #endif
372 
373 protected: // constructors
374  compilation_output_base_t(handle_type handle, ::std::string name, bool succeeded, bool owning = false)
375  : program_handle_(handle), program_name_(::std::move(name)), succeeded_(succeeded), owns_handle_(owning) { }
376 
377 public: // constructors & destructor
379  program_handle_(other.program_handle_),
380  program_name_(::std::move(other.program_name_)),
381  succeeded_(other.succeeded_),
382  owns_handle_(other.owns_handle_)
383  {
384  other.owns_handle_ = false;
385  };
386 
387  ~compilation_output_base_t() noexcept(false)
388  {
389  if (owns_handle_) {
390  auto status = program::detail_::destroy_and_return_status<Kind>(program_handle_);
391  throw_if_error<Kind>(status, "Destroying " + program::detail_::identify<Kind>(program_handle_, program_name_.c_str()));
392  }
393  }
394 
395 public: // operators
396 
397  compilation_output_base_t& operator=(const compilation_output_base_t& other) = delete;
398  compilation_output_base_t& operator=(compilation_output_base_t&& other) = delete;
399 
400 protected: // data members
401  program::handle_t<Kind> program_handle_;
402  ::std::string program_name_;
403  bool succeeded_;
404  bool owns_handle_;
405 
406 };
407 
409 template <>
411 public:
413  using parent::parent;
414 
415  friend compilation_output_t compilation_output::detail_::wrap<source_kind>(
416  handle_type program_handle,
417  ::std::string program_name,
418  bool succeeded,
419  bool own_handle);
420 
421 public: // non-mutators
429 
431 
443  span<char> ptx(span<char> buffer) const
444  {
445  size_t size = program::detail_::get_ptx_size(parent::program_handle_, program_name_.c_str());
446  if (buffer.size() < size) {
447  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's PTX ("
448  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
449  + compilation_output::detail_::identify(*this));
450  }
451  program::detail_::get_ptx(buffer.data(), program_handle_, program_name_.c_str());
452  return { buffer.data(), size };
453  }
454 
462  unique_span<char> ptx() const
463  {
464  size_t size = program::detail_::get_ptx_size(program_handle_, program_name_.c_str());
465  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
466  if (size == 0) {
467  result[size] = '\0';
468  return result;
469  }
470  program::detail_::get_ptx(result.data(), program_handle_, program_name_.c_str());
471  result[size] = '\0';
472  return result;
473  }
474 
477  bool has_ptx() const
478  {
479  size_t size;
480  status_type status = nvrtcGetPTXSize(program_handle_, &size);
481  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
482  throw_if_rtc_error_lazy(source_kind, status, "Failed determining whether compilation resulted in PTX code for "
483  + compilation_output::detail_::identify<source_kind>(*this));
484  if (size == 0) {
485  throw ::std::logic_error("PTX size reported as 0 by "
486  + compilation_output::detail_::identify<source_kind>(*this));
487  }
488  return true;
489  }
490 
491 #if CUDA_VERSION >= 11010
492  span<char> cubin(span<char> buffer) const override
493  {
494  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
495  if (buffer.size() < size) {
496  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's cubin ("
497  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
498  + compilation_output::detail_::identify(*this));
499  }
500  program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
501  return { buffer.data(), size };
502  }
503 
504  unique_span<char> cubin() const override
505  {
506  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
507  auto result = make_unique_span<char>(size);
508  if (size == 0) { return result; }
509  program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
510  return result;
511  }
512 
513  bool has_cubin() const override
514  {
515  size_t size;
516  auto status = nvrtcGetCUBINSize(program_handle_, &size);
517  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
518  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the program has a compiled CUBIN result: "
519  + compilation_output::detail_::identify(*this));
520  return (size > 0);
521  }
522 #endif
523 
524 #if CUDA_VERSION >= 11040
525 
539  span<char> lto_ir(span<char> buffer) const
540  {
541  size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
542  if (buffer.size() < size) {
543  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's LTO IR ("
544  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
545  + compilation_output::detail_::identify(*this));
546  }
547  program::detail_::get_lto_ir(buffer.data(), program_handle_, program_name_.c_str());
548  return { buffer.data(), size };
549  }
550 
560  unique_span<char> lto_ir() const
561  {
562  size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str());
563  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
564  if (size == 0) {
565  result[size] = '\0';
566  return result;
567  }
568  program::detail_::get_lto_ir(result.data(), program_handle_, program_name_.c_str());
569  result[size] = '\0';
570  return result;
571  }
572 
575  bool has_lto_ir() const
576  {
577  size_t size;
578 #if CUDA_VERSION >= 12000
579  auto status = nvrtcGetLTOIRSize(program_handle_, &size);
580 #else
581  auto status = nvrtcGetNVVMSize(program_handle_, &size);
582 #endif
583  if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; }
584  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the NVRTC program has a compiled LTO IR result: "
585  + compilation_output::detail_::identify(*this));
586  if (size == 0) {
587  throw ::std::logic_error("LTO IR size reported as 0 by NVRTC for program: "
588  + compilation_output::detail_::identify(*this));
589  }
590  return true;
591  }
592 #endif
593 
603  const char* get_mangling_of(const char* unmangled_name) const
604  {
605  const char* result;
606  auto status = nvrtcGetLoweredName(program_handle_, unmangled_name, &result);
607  throw_if_error<source_kind>(status, ::std::string("Failed obtaining the mangled form of name \"")
608  + unmangled_name + "\" in dynamically-compiled program \"" + program_name_ + '\"');
609  return result;
610  }
611 
613  const char* get_mangling_of(const ::std::string& unmangled_name) const
614  {
615  return get_mangling_of(unmangled_name.c_str());
616  }
617 }; // class compilation_output_t<cuda_cpp>
618 
619 #if CUDA_VERSION >= 11010
620 
621 template <>
622 class compilation_output_t<ptx> : public compilation_output_base_t<ptx> {
623 public:
625  using parent::parent;
626 
627  friend compilation_output_t compilation_output::detail_::wrap<source_kind>(
628  handle_type program_handle,
629  ::std::string program_name,
630  bool succeeded,
631  bool own_handle);
632 
633 public: // non-mutators
634  span<char> cubin(span<char> buffer) const override
635  {
636  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
637  if (buffer.size() < size) {
638  throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's cubin ("
639  + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": "
640  + compilation_output::detail_::identify<source_kind>(*this));
641  }
642  program::detail_::get_cubin<source_kind>(buffer.data(), program_handle_, program_name_.c_str());
643  return { buffer.data(), size };
644  }
645 
646  unique_span<char> cubin() const override
647  {
648  size_t size = program::detail_::get_cubin_size<source_kind>(program_handle_, program_name_.c_str());
649  auto result = make_unique_span<char>(size+1); // Let's append a trailing nul character, to be on the safe side
650  if (size == 0) {
651  result[size] = '\0';
652  return result;
653  }
654  program::detail_::get_cubin<source_kind>(result.data(), program_handle_, program_name_.c_str());
655  result[size] = '\0';
656  return result;
657  }
658 
659  bool has_cubin() const override
660  {
661  size_t size;
662  auto status = nvPTXCompilerGetCompiledProgramSize(program_handle_, &size);
663  if (status == NVPTXCOMPILE_ERROR_INVALID_INPUT) { return false; }
664  throw_if_error<source_kind>(status, "Failed determining whether the program has a compiled CUBIN result: "
665  + compilation_output::detail_::identify(*this));
666  return (size > 0);
667  }
668 }; // class compilation_output_t<ptx>
669 
670 #endif // CUDA_VERSION >= 11010
671 
672 namespace compilation_output {
673 
674 namespace detail_ {
675 
676 template <source_kind_t Kind>
677 inline ::std::string identify(const compilation_output_t<Kind> &compilation_output)
678 {
679  return "Compilation output of " + program::detail_::identify<Kind>(
680  compilation_output.program_handle(),
681  compilation_output.program_name().c_str());
682 }
683 
684 template <source_kind_t Kind>
685 inline compilation_output_t<Kind> wrap(
686  program::handle_t<Kind> program_handle,
687  ::std::string program_name,
688  bool succeeded,
689  bool own_handle)
690 {
691  return compilation_output_t<Kind>{program_handle, ::std::move(program_name), succeeded, own_handle};
692 }
693 
694 } // namespace detail_
695 
696 } // namespace compilation_output
697 
698 } // namespace rtc
699 
700 namespace module {
701 
702 template<> inline module_t create<cuda_cpp>(
703  const context_t& context,
704  const rtc::compilation_output_t<cuda_cpp>& compilation_output,
705  const link::options_t& options)
706 {
707  if (not compilation_output.succeeded()) {
708  throw ::std::invalid_argument("Attempt to create a module after compilation failure of "
709  + cuda::rtc::program::detail_::identify<cuda_cpp>(compilation_output.program_handle()));
710  }
711 #if CUDA_VERSION >= 11010
712  auto program_handle = compilation_output.program_handle();
713  auto program_name = compilation_output.program_name().c_str();
714  static const bool dont_fail_on_missing_cubin { false };
715  auto cubin_size = rtc::program::detail_::get_cubin_size<cuda_cpp, dont_fail_on_missing_cubin>(program_handle, program_name);
716  // Note: The above won't fail even if no CUBIN was produced
717  bool has_cubin = (cubin_size > 0);
718  if (has_cubin) {
719  auto cubin = make_unique_span<char>(cubin_size);
720  rtc::program::detail_::get_cubin<cuda_cpp>(cubin.data(), program_handle, program_name);
721  return module::create(context, cubin.get(), options);
722  }
723  // Note: At this point, we must have PTX in the output, as otherwise the compilation could
724  // not have succeeded
725 #endif
726  auto ptx = compilation_output.ptx();
727  return module::create(context, ptx.get(), options);
728 }
729 
730 #if CUDA_VERSION >= 11010
731 template<> inline module_t create<source_kind_t::ptx>(
732  const context_t& context,
733  const rtc::compilation_output_t<source_kind_t::ptx>& compilation_output,
734  const link::options_t& options)
735 {
736  if (not compilation_output.succeeded()) {
737  throw ::std::invalid_argument("Attempt to create a module after compilation failure of "
738  + cuda::rtc::program::detail_::identify<source_kind_t::ptx>(compilation_output.program_handle()));
739  }
740  auto cubin = compilation_output.cubin();
741  return module::create(context, cubin.get(), options);
742 }
743 #endif // CUDA_VERSION >= 11010
744 
745 
748 template <source_kind_t Kind>
749 inline module_t create(
750  device_t& device,
751  const rtc::compilation_output_t<Kind>& compilation_output,
752  const link::options_t& options = {})
753 {
754  return create(device.primary_context(), compilation_output, options);
755 }
756 
757 } // namespace module
758 
759 } // namespace cuda
760 
761 #endif // CUDA_API_WRAPPERS_RTC_OUTPUT_HPP_
bool failed() const
Definition: compilation_output.hpp:291
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries&#39;s API calls: // The NVRTC librar...
Definition: types.hpp:124
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
typename detail_::types< Kind >::status_type status_t
Status values returned by the NVIDIA run-time compilation libraries&#39;s API calls: The NVRTC library fo...
Definition: types.hpp:131
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
Wrapper class for a CUDA code module.
Definition: module.hpp:123
bool has_ptx() const
Definition: compilation_output.hpp:477
The result of the compilation of an {rtc::program_t}, whether successful or failed, with any related byproducts.
Definition: compilation_output.hpp:279
Type definitions used in CUDA real-time compilation work wrappers.
span< char > log(span< char > buffer) const
Write a copy of the program compilation log into a user-provided buffer.
Definition: compilation_output.hpp:310
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:410
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device&#39;s primary context - the one used by runtime API calls.
Definition: device.hpp:152
bool succeeded() const
Definition: compilation_output.hpp:288
const char * get_mangling_of(const ::std::string &unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:613
unique_span< char > log() const
Obtain a copy of the compilation log.
Definition: compilation_output.hpp:330
NVIDIA&#39;s architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44
const char * get_mangling_of(const char *unmangled_name) const
Obtain the mangled/lowered form of an expression registered earlier, after the compilation.
Definition: compilation_output.hpp:603
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
unique_span< char > ptx() const
Obtain a copy of the PTX resulting from the program compilation.
Definition: compilation_output.hpp:462
Wrapper class for a CUDA device.
Definition: device.hpp:135
span< char > ptx(span< char > buffer) const
Obtain a (nul-terminated) copy of the PTX result of the last compilation.
Definition: compilation_output.hpp:443
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.