cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
program.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_
8 #define CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_
9 
10 #include "compilation_options.hpp"
11 #include "compilation_output.hpp"
12 #include "error.hpp"
13 #include "types.hpp"
14 #include "../api.hpp"
15 
16 #include <vector>
17 
18 namespace cuda {
19 
20 namespace rtc {
21 
22 namespace program {
23 
24 namespace detail_ {
25 
39 template <source_kind_t Kind = cuda_cpp>
40 inline program::handle_t<Kind> create(
41  const char *program_name,
42  string_view program_source,
43  int num_headers = 0,
44  const char *const *header_sources = nullptr,
45  const char *const *header_names = nullptr);
46 
47 template <> inline program::handle_t<cuda_cpp> create<cuda_cpp>(
48  const char *program_name, string_view program_source, int num_headers,
49  const char *const *header_sources, const char *const *header_names)
50 {
51  program::handle_t<cuda_cpp> program_handle;
52  auto status = nvrtcCreateProgram(&program_handle, program_source.data(), program_name, num_headers, header_sources, header_names);
53  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed creating " + detail_::identify<cuda_cpp>(program_name));
54  return program_handle;
55 }
56 
57 #if CUDA_VERSION >= 11010
58 template <> inline program::handle_t<ptx> create<ptx>(
59  const char *program_name, string_view program_source,
60  int, const char *const *, const char *const *)
61 {
62  program::handle_t<ptx> program_handle;
63  auto status = nvPTXCompilerCreate(&program_handle, program_source.size(), program_source.data());
64  throw_if_rtc_error_lazy(ptx, status, "Failed creating " + detail_::identify<ptx>(program_name));
65  return program_handle;
66 }
67 #endif // CUDA_VERSION >= 11010
68 
70 inline void register_global(handle_t<cuda_cpp> program_handle, const char *global_to_register)
71 {
72  auto status = nvrtcAddNameExpression(program_handle, global_to_register);
73  throw_if_rtc_error_lazy(cuda_cpp, status, "Failed registering global entity " + ::std::string(global_to_register)
74  + " with " + identify<cuda_cpp>(program_handle));
75 }
76 
79 inline ::std::string get_concatenated_options(const const_cstrings_span& raw_options)
80 {
81  static ::std::ostringstream oss;
82  oss.str("");
83  for (const auto option: raw_options) {
84  oss << " \"" << option << '\"';
85  }
86  return oss.str();
87 }
88 
89 template <source_kind_t Kind>
90 inline void maybe_handle_invalid_option(
91  status_t<Kind>,
92  const char *,
93  const const_cstrings_span&,
94  handle_t<Kind>)
95 { }
96 
97 template <>
98 inline void maybe_handle_invalid_option<cuda_cpp>(
99  status_t<cuda_cpp> status,
100  const char * program_name,
101  const const_cstrings_span& raw_options,
102  handle_t<cuda_cpp> program_handle)
103 {
104  if (status == static_cast<status_t<cuda_cpp>>(status::named_t<cuda_cpp>::invalid_option)) {
105  throw rtc::runtime_error<cuda_cpp>::with_message_override(status,
106  "Compilation options rejected when compiling " + identify<cuda_cpp>(program_handle, program_name) + ':'
107  + get_concatenated_options(raw_options));
108  }
109 }
110 
111 template <source_kind_t Kind>
112 inline status_t<Kind> compile_and_return_status(
113  handle_t<Kind> program_handle,
114  const const_cstrings_span& raw_options);
115 
116 #if CUDA_VERSION >= 11010
117 template <>
118 inline status_t<ptx> compile_and_return_status<ptx>(
119  handle_t<ptx> program_handle,
120  const const_cstrings_span& raw_options)
121 {
122  return nvPTXCompilerCompile(program_handle, static_cast<int>(raw_options.size()), raw_options.data());
123 }
124 #endif
125 
126 template <>
127 inline status_t<cuda_cpp> compile_and_return_status<cuda_cpp>(
128  handle_t<cuda_cpp> program_handle,
129  const const_cstrings_span& raw_options)
130 {
131  return nvrtcCompileProgram(program_handle, static_cast<int>(raw_options.size()), raw_options.data());
132 }
133 
134 
135 template <source_kind_t Kind>
136 inline compilation_output_t<Kind> compile(
137  const char * program_name,
138  const const_cstrings_span& raw_options,
139  handle_t<Kind> program_handle)
140 {
141  auto status = compile_and_return_status<Kind>(program_handle, raw_options);
142  bool succeeded = is_success<Kind>(status);
143  switch(status) {
144  case (rtc::status_t<Kind>) status::named_t<Kind>::success:
145  case (rtc::status_t<Kind>) status::named_t<Kind>::compilation_failure:
146  return compilation_output::detail_::wrap<Kind>(program_handle, program_name, succeeded, do_take_ownership);
147  default:
148  maybe_handle_invalid_option<Kind>(status, program_name, raw_options, program_handle);
149  throw rtc::runtime_error<Kind>(status, "Failed invoking compiler for " + identify<Kind>(program_handle, program_name));
150  }
151 }
152 
153 // Note: The program_source _cannot_ be nullptr; if all of your source code is preincluded headers,
154 // pas the address of an empty string.
155 inline compilation_output_t<cuda_cpp> compile(
156  const char *program_name,
157  const char *program_source,
158  const_cstrings_span header_sources,
159  const_cstrings_span header_names,
160  const_cstrings_span raw_options,
161  const_cstrings_span globals_to_register)
162 {
163  assert(header_names.size() <= ::std::numeric_limits<int>::max());
164  if (program_name == nullptr or *program_name == '\0') {
165  throw ::std::invalid_argument("Attempt to compile a CUDA program without specifying a name");
166  }
167  // Note: Not rejecting empty/missing source, because we may be pre-including source files
168  auto num_headers = static_cast<int>(header_names.size());
169  auto program_handle = create<cuda_cpp>(
170  program_name, program_source, num_headers, header_sources.data(), header_names.data());
171 
172  for (const auto global_to_register: globals_to_register) {
173  register_global(program_handle, global_to_register);
174  }
175 
176  // Note: compilation is outside of any context
177  return compile<cuda_cpp>(program_name, raw_options, program_handle);
178 }
179 
180 #if CUDA_VERSION >= 11010
181 inline compilation_output_t<ptx> compile_ptx(
182  const char *program_name,
183  const char *program_source,
184  const_cstrings_span raw_options)
185 {
186  if (program_name == nullptr or *program_name == '\0') {
187  throw ::std::invalid_argument("Attempt to compile a CUDA program without specifying a name");
188  }
189  // Note: Not rejecting empty/missing source, because we may be pre-including source files
190  auto program_handle = create<ptx>(program_name, program_source);
191 
192  // Note: compilation is outside of any context
193  return compile<ptx>(program_name, raw_options, program_handle);
194 }
195 #endif // CUDA_VERSION >= 11010
196 
197 template <source_kind_t Kind>
198 class base_t {
199 public: // types and constants
200  constexpr static const source_kind_t source_kind { Kind };
201  using handle_type = program::handle_t<source_kind>;
202  using status_type = status_t<source_kind>;
203 
204 public: // getters
207  const ::std::string& name() const { return name_; }
208 
210  const char* source() const { return source_; }
211 
213  const compilation_options_t<Kind>& options() const { return options_; }
214  // TODO: Think of a way to set compilation options without having
215  // to break the statement, e.g. if options had a reflected enum value
216  // or some such arrangement.
217 
219  compilation_options_t<Kind>& options() { return options_; }
221 
222 public: // constructors and destructor
223  explicit base_t(::std::string name) : name_(::std::move(name)) {};
224  base_t(const base_t&) = default;
225  base_t(base_t&&) noexcept = default;
226  ~base_t() = default;
227 
228 public: // operators
229 
230  base_t& operator=(const base_t& other) noexcept = default;
231  base_t& operator=(base_t&& other) noexcept = default;
232 
233 protected: // data members
234  const char* source_ { nullptr };
235  ::std::string name_;
236  compilation_options_t<Kind> options_;
237 }; // base_t
238 
239 } // namespace detail_
240 
241 } // namespace program
242 
243 template <source_kind_t Kind>
244 class program_t;
245 
256 template <>
257 class program_t<cuda_cpp> : public program::detail_::base_t<cuda_cpp> {
258 public: // types
259  using parent = base_t<source_kind>;
260 
261 public: // getters
262 
266  {
267  return { headers_.names.data(),headers_.names.size()};
268  }
269 
276  {
277  return { headers_.sources.data(), headers_.sources.size()};
278  }
279 
282  size_t num_headers() const { return headers_.sources.size(); }
283 
284 public: // setters - duplicated with PTX programs
285 
288  {
289  options_.set_target(target_compute_capability);
290  return *this;
291  }
292 
295  program_t& set_target(const device_t& device) { return set_target(device.compute_capability());}
296 
299  program_t& set_target(const context_t& context) { return set_target(context.device()); }
300 
303  program_t& clear_targets() { options_.targets_.clear(); return *this; }
304 
307  template <typename Container>
308  program_t& set_targets(Container target_compute_capabilities)
309  {
310  clear_targets();
311  for(const auto& compute_capability : target_compute_capabilities) {
312  options_.add_target(compute_capability);
313  }
314  return *this;
315  }
316 
320  {
321  options_.add_target(target_compute_capability);
322  return *this;
323  }
324 
327  void add_target(const device_t& device) { add_target(device.compute_capability()); }
328 
331  void add_target(const context_t& context) { add_target(context.device()); }
332 
333  program_t& set_source(const char* source) { source_ = source; return *this; }
334  program_t& set_source(const ::std::string& source) { source_ = source.c_str(); return *this; }
335  program_t& set_options(const compilation_options_t<source_kind>& options)
336  {
337  options_ = options;
338  return *this;
339  }
340  program_t& set_options(compilation_options_t<source_kind>&& options)
341  {
342  options_ = ::std::move(options);
343  return *this;
344  }
345 
346 protected:
347  template <typename String>
348  static void check_string_type()
349  {
350  using no_cref_string_type = typename ::std::remove_const<typename ::std::remove_reference<String>::type>::type;
351  static_assert(
352  ::std::is_same<no_cref_string_type, const char*>::value or
353  ::std::is_same<no_cref_string_type, char*>::value or
354  ::std::is_same<String, const ::std::string&>::value or
355  ::std::is_same<String, ::std::string&>::value,
356  "Cannot use this type for a named header name or source; use char*, const char* or a "
357  "reference to a string you own"
358  );
359  }
360 
361  // Note: All methods involved in adding headers - which eventually call one of the
362  // three adders of each kind here - are written carefully to support both C-style strings
363  // and lvalue references to ::std::string's - but _not_ rvalue strings or rvalue string
364  // references, as the latter are not owned by the caller, and this class' code does not
365  // make a copy or take ownership. If you make any changes, you must be very careful not
366  // to _copy_ anything by mistake, but rather carry forward reference-types all the way
367  // to here.
368 
369  void add_header_name_ (const char* name) { headers_.names.emplace_back(name); }
370  void add_header_name_ (const ::std::string& name) { add_header_name_(name.c_str()); }
371  void add_header_name_ (::std::string&& name) = delete;
372 
373  void add_header_source_(const char* source) { headers_.sources.emplace_back(source); }
374  void add_header_source_(const ::std::string& source) { add_header_source_(source.c_str()); }
375  void add_header_source_(::std::string&& source) = delete;
376 
377 public: // mutators
387  template <typename String1, typename String2>
388  program_t& add_header(String1&& name, String2&& source)
389  {
390  add_header_name_(name);
391  add_header_source_(source);
392  return *this;
393  }
394 
404  template <typename String1, typename String2>
405  program_t& add_header(const ::std::pair<String1, String2>& name_and_source)
406  {
407  add_header_name_(name_and_source.first);
408  add_header_source_(name_and_source.second);
409  return *this;
410  }
411 
413  template <typename String1, typename String2>
414  program_t& add_header(::std::pair<String1, String2>&& name_and_source)
415  {
416  check_string_type<String1>();
417  check_string_type<String2>();
418  return add_header(name_and_source);
419  }
420 
430  template <typename RangeOfNames, typename RangeOfSources>
432  RangeOfNames header_names,
433  RangeOfSources header_sources)
434  {
435  check_string_type<typename RangeOfNames::const_reference>();
436  check_string_type<typename RangeOfSources::const_reference>();
437 #ifndef NDEBUG
438  if (header_names.size() != header_sources.size()) {
439  throw ::std::invalid_argument(
440  "Got a different number of header names (" + ::std::to_string(header_names.size())
441  + ") and header source (" + ::std::to_string(header_sources.size()) + ')');
442  }
443 #endif
444  auto new_num_headers = headers_.names.size() + header_names.size();
445 #ifndef NDEBUG
446  if (new_num_headers > ::std::numeric_limits<int>::max()) {
447  throw ::std::invalid_argument("Cannot use more than "
448  + ::std::to_string(::std::numeric_limits<int>::max()) + " headers.");
449  }
450 #endif
451  headers_.names.reserve(new_num_headers);
452  headers_.sources.reserve(new_num_headers);
453  // TODO: Use a zip iterator
454  for(auto name_it = header_names.begin(), source_it = header_sources.begin();
455  name_it < header_names.end();
456  name_it++, source_it++) {
457  add_header(*name_it, *source_it);
458  }
459  return *this;
460  }
461 
471  template <typename RangeOfNameAndSourcePairs>
472  program_t& add_headers(RangeOfNameAndSourcePairs&& named_header_pairs)
473  {
474  // TODO: Accept ranges without a size method and no iterator arithmetic
475  auto num_headers_to_add = named_header_pairs.size();
476  auto new_num_headers = headers_.names.size() + num_headers_to_add;
477 #ifndef NDEBUG
478  if (new_num_headers > ::std::numeric_limits<int>::max()) {
479  throw ::std::invalid_argument("Cannot use more than "
480  + ::std::to_string(::std::numeric_limits<int>::max()) + " headers.");
481  }
482 #endif
483  headers_.names.reserve(new_num_headers);
484  headers_.sources.reserve(new_num_headers);
485  // Using auto&& to notice the case of getting rvalue references (which we would like to reject)
486  for(auto&& pair : named_header_pairs) {
487  add_header(pair.first, pair.second);
488  }
489  return *this;
490  }
491 
501  template <typename RangeOfNames, typename RangeOfSources>
503  RangeOfNames&& names,
504  RangeOfSources&& sources)
505  {
506  clear_headers();
507  return add_headers(names, sources);
508  }
509 
519  template <typename RangeOfNameAndSourcePairs>
520  program_t& set_headers(RangeOfNameAndSourcePairs&& named_header_pairs)
521  {
522  clear_headers();
523  add_headers(named_header_pairs);
524  return *this;
525  }
526 
529  {
530  headers_.names.clear();
531  headers_.sources.clear();
532  return *this;
533  }
534 
537  program_t& clear_options() { options_ = {}; return *this; }
538 
539 public:
540 
541  // TODO: Support specifying all compilation option in a single string and parsing it
542 
551  {
552  if ((source_ == nullptr or *source_ == '\0') and options_.preinclude_files.empty()) {
553  throw ::std::invalid_argument("Attempt to compile a CUDA program without any source code");
554  }
555  auto marshalled_options = cuda::marshalling::marshal(options_);
556  ::std::vector<const char*> option_ptrs = marshalled_options.option_ptrs();
557  return program::detail_::compile(
558  name_.c_str(),
559  source_ == nullptr ? "" : source_,
560  {headers_.sources.data(), headers_.sources.size()},
561  {headers_.names.data(), headers_.names.size()},
562  {option_ptrs.data(), option_ptrs.size()},
563  {globals_to_register_.data(), globals_to_register_.size()});
564  }
565 
575  program_t& add_registered_global(const char* unmangled_name)
576  {
577  globals_to_register_.push_back(unmangled_name);
578  return *this;
579  }
580 
582  program_t& add_registered_global(const ::std::string& unmangled_name)
583  {
584  globals_to_register_.push_back(unmangled_name.c_str());
585  return *this;
586  }
587  // TODO: Accept string_view's with C++17
588 
600  template <typename Container>
601  program_t& add_registered_globals(const Container& globals_to_register)
602  {
603  globals_to_register_.reserve(globals_to_register_.size() + globals_to_register.size());
604  for(const auto& global_name : globals_to_register) {
605  add_registered_global(global_name);
606  }
607  return *this;
608  }
609 
611  template <typename Container>
612  program_t& add_registered_globals(Container&& globals_to_register)
613  {
614  static_assert(::std::is_same<typename Container::value_type, const char*>::value,
615  "For an rvalue container, we only accept raw C strings as the value type, to prevent"
616  "the possible passing of string-like objects at the end of their lifetime");
617  return add_registered_globals(static_cast<const Container&>(globals_to_register));
618  }
619 
620 public: // constructors and destructor
621  program_t(::std::string name) : base_t(::std::move(name)) {}
622  program_t(const program_t&) = default;
623  program_t(program_t&&) = default;
624  ~program_t() = default;
625 
626 public: // operators
628  program_t& operator=(const program_t& other) = default;
629  program_t& operator=(program_t&& other) = default;
631 
632 protected: // data members
633  struct {
634  ::std::vector<const char*> names;
635  ::std::vector<const char*> sources;
636  } headers_;
637  ::std::vector<const char*> globals_to_register_;
638 }; // class program_t<cuda_cpp>
639 
640 #if CUDA_VERSION >= 11010
641 
652 template <>
653 class program_t<ptx> : public program::detail_::base_t<ptx> {
654 public: // types
656  using parent = program::detail_::base_t<source_kind>;
658 
659 public: // setters - duplicated with CUDA-C++/NVRTC programs
660 
662  program_t& set_target(device::compute_capability_t target_compute_capability)
663  {
664  options_.set_target(target_compute_capability);
665  return *this;
666  }
667 
669  program_t& set_target(const device_t& device) { return set_target(device.compute_capability());}
670 
672  program_t& set_target(const context_t& context) { return set_target(context.device()); }
673 
675  program_t& clear_targets() { options_.targets_.clear(); return *this; }
676 
678  template <typename Container>
679  program_t& set_targets(Container target_compute_capabilities)
680  {
681  clear_targets();
682  for(const auto& compute_capability : target_compute_capabilities) {
683  options_.add_target(compute_capability);
684  }
685  return *this;
686  }
687 
689  program_t& add_target(device::compute_capability_t target_compute_capability)
690  {
691  options_.add_target(target_compute_capability);
692  return *this;
693  }
694 
696  void add_target(const device_t& device) { add_target(device.compute_capability()); }
697 
699  void add_target(const context_t& context) { add_target(context.device()); }
700 
702  program_t& set_source(char const* source) { source_ = source; return *this; }
703 
705  program_t& set_source(const ::std::string& source) { source_ = source.c_str(); return *this; }
706 
708  program_t& set_options(compilation_options_t<source_kind> options)
709  {
710  options_ = ::std::move(options);
711  return *this;
712  }
714  program_t& clear_options() { options_ = {}; return *this; }
715 
716 public:
717 
718  // TODO: Support specifying all compilation option in a single string and parsing it
719 
721  compilation_output_t<ptx> compile() const
722  {
723  if (source_ == nullptr or *source_ == '\0') {
724  throw ::std::invalid_argument("Attempt to compile a CUDA program without any source code");
725  }
726  auto marshalled_options = cuda::marshalling::marshal(options_);
727  ::std::vector<const char*> option_ptrs = marshalled_options.option_ptrs();
728  return program::detail_::compile_ptx(
729  name_.c_str(),
730  source_,
731  {option_ptrs.data(), option_ptrs.size()});
732  }
733 
734 public: // constructors and destructor
735  program_t(::std::string name) : parent(::std::move(name)) {}
736  program_t(const program_t&) = default;
737  program_t(program_t&&) = default;
738  ~program_t() = default;
739 
740 public: // operators
741 
743  program_t& operator=(const program_t& other) = default;
744  program_t& operator=(program_t&& other) = default;
746 }; // class program_t<ptx>
747 
748 #endif // CUDA_VERSION >= 11010
749 
750 namespace program {
751 
756 template <source_kind_t Kind>
757 inline program_t<Kind> create(const char* program_name)
758 {
759  return program_t<Kind>(program_name);
760 }
761 
763 template <source_kind_t Kind>
764 inline program_t<Kind> create(const ::std::string& program_name)
765 {
766  return program_t<Kind>(program_name);
767 }
768 
769 } // namespace program
770 
777 #if CUDA_VERSION >= 11020
778 inline unique_span<device::compute_capability_t>
779 supported_targets()
780 {
781  int num_supported_archs;
782  auto status = nvrtcGetNumSupportedArchs(&num_supported_archs);
783  throw_if_error<cuda_cpp>(status, "Failed obtaining the number of target NVRTC architectures");
784  auto raw_archs = ::std::unique_ptr<int[]>(new int[num_supported_archs]);
785  status = nvrtcGetSupportedArchs(raw_archs.get());
786  throw_if_error<cuda_cpp>(status, "Failed obtaining the architectures supported by NVRTC");
787  auto result = make_unique_span<device::compute_capability_t>(num_supported_archs);
788  ::std::transform(raw_archs.get(), raw_archs.get() + num_supported_archs, ::std::begin(result),
789  [](int raw_arch) { return device::compute_capability_t::from_combined_number(raw_arch); });
790  return result;
791 }
792 #endif
793 
794 } // namespace rtc
795 
796 } // namespace cuda
797 
798 #endif // CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_
program_t & add_registered_globals(const Container &globals_to_register)
Register multiple pre-mangled names of global, to make available for use after compilation.
Definition: program.hpp:601
Contains the cuda::rtc::compilation_output_t class and related code.
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
program_t & clear_options()
Clears any forced values of compilation options, reverting the compilation to the default values...
Definition: program.hpp:537
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
void add_target(const context_t &context)
Have the compilation also produce code for devices with the same compute capability as the device of ...
Definition: program.hpp:331
program_t & set_target(const device_t &device)
Have the compilation produce code for devices with the same compute capability as a given device...
Definition: program.hpp:295
size_t num_headers() const
Definition: program.hpp:282
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
void add_target(const device_t &device)
Have the compilation also produce code for devices with the same compute capability as a given device...
Definition: program.hpp:327
program_t & add_target(device::compute_capability_t target_compute_capability)
Have the compilation also produce code for devices with a given compute capability.
Definition: program.hpp:319
STL namespace.
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
A numeric designator of the computational capabilities of a CUDA device.
Definition: device_properties.hpp:75
Options to be passed to one of the NVIDIA JIT compilers along with a program&#39;s source code...
Definition: compilation_options.hpp:164
program_t & set_targets(Container target_compute_capabilities)
Remove all compute capabilities which were chosen to have code produced for them by the compilation...
Definition: program.hpp:308
const program_t & add_headers(RangeOfNames header_names, RangeOfSources header_sources)
Adds multiple "memoized" headers to the program.
Definition: program.hpp:431
Type definitions used in CUDA real-time compilation work wrappers.
program_t & set_target(const context_t &context)
Have the compilation produce code for devices with the same compute capability as the device of a giv...
Definition: program.hpp:299
program_t & set_target(device::compute_capability_t target_compute_capability)
Have the compilation produce code for devices with a given compute capability.
Definition: program.hpp:287
program_t & clear_headers()
Removes all "memoized" headers to be used in the program&#39;s compilation.
Definition: program.hpp:528
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
static constexpr compute_capability_t from_combined_number(unsigned combined) noexcept
Converts a single-number representation of a compute capability into a proper structured instance of ...
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:410
program_t & add_headers(RangeOfNameAndSourcePairs &&named_header_pairs)
Adds multiple "memoized" headers to the program.
Definition: program.hpp:472
const program_t & set_headers(RangeOfNames &&names, RangeOfSources &&sources)
Replaces the set of "memoized" headers used in the program&#39;s compilation.
Definition: program.hpp:502
program_t & add_header(String1 &&name, String2 &&source)
Adds another "memoized" header to the program.
Definition: program.hpp:388
device::compute_capability_t compute_capability() const
Obtains the device&#39;s compute capability; see cuda::device::compute_capability_t.
Definition: device.hpp:415
program_t & add_registered_global(const ::std::string &unmangled_name)
Register a pre-mangled name of a global, to make available for use after compilation.
Definition: program.hpp:582
program_t & add_registered_globals(Container &&globals_to_register)
add_registered_globals(const Container&)
Definition: program.hpp:612
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
program_t & add_header(::std::pair< String1, String2 > &&name_and_source)
String2>(String1&&, String2&&)
Definition: program.hpp:414
program_t & add_registered_global(const char *unmangled_name)
Register a pre-mangled name of a global, to make available for use after compilation.
Definition: program.hpp:575
const_cstrings_span header_sources() const
Sources of the "memoized"/off-file-system headers made available to the program (and usable as identi...
Definition: program.hpp:275
program_t< Kind > create(const char *program_name)
Create a new (not-yet-compiled) program without setting most of its constituent fields.
Definition: program.hpp:757
Definition: program.hpp:244
program_t & add_header(const ::std::pair< String1, String2 > &name_and_source)
Adds another "memoized" header to the program.
Definition: program.hpp:405
program_t & set_headers(RangeOfNameAndSourcePairs &&named_header_pairs)
Replaces the set of "memoized" headers used in the program&#39;s compilation.
Definition: program.hpp:520
const_cstrings_span header_names() const
Names of the "memoized"/off-file-system headers made available to the program (and usable as identifi...
Definition: program.hpp:265
program_t & clear_targets()
Remove all compute capabilities which were chosen to have code produced for them by the compilation...
Definition: program.hpp:303
span< const char *const > const_cstrings_span
A span of C-style strings the contents of which must not be modified.
Definition: types.hpp:57
Wrapper class for a CUDA device.
Definition: device.hpp:135
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.
compilation_output_t< cuda_cpp > compile() const
Compiles the program represented by this object (which, until this point, is just a bunch of unrelate...
Definition: program.hpp:550