7 #ifndef CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_ 8 #define CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_ 39 template <source_kind_t Kind = cuda_cpp>
40 inline program::handle_t<Kind>
create(
41 const char *program_name,
42 string_view program_source,
44 const char *
const *header_sources =
nullptr,
45 const char *
const *header_names =
nullptr);
47 template <>
inline program::handle_t<cuda_cpp> create<cuda_cpp>(
48 const char *program_name, string_view program_source,
int num_headers,
49 const char *
const *header_sources,
const char *
const *header_names)
51 program::handle_t<cuda_cpp> program_handle;
52 auto status = nvrtcCreateProgram(&program_handle, program_source.data(), program_name, num_headers, header_sources, header_names);
54 return program_handle;
57 #if CUDA_VERSION >= 11010 58 template <>
inline program::handle_t<ptx> create<ptx>(
59 const char *program_name, string_view program_source,
60 int,
const char *
const *,
const char *
const *)
62 program::handle_t<ptx> program_handle;
63 auto status = nvPTXCompilerCreate(&program_handle, program_source.size(), program_source.data());
65 return program_handle;
67 #endif // CUDA_VERSION >= 11010 70 inline void register_global(handle_t<cuda_cpp> program_handle,
const char *global_to_register)
72 auto status = nvrtcAddNameExpression(program_handle, global_to_register);
74 +
" with " + identify<cuda_cpp>(program_handle));
81 static ::std::ostringstream oss;
83 for (
const auto option: raw_options) {
84 oss <<
" \"" << option <<
'\"';
89 template <source_kind_t Kind>
90 inline void maybe_handle_invalid_option(
98 inline void maybe_handle_invalid_option<cuda_cpp>(
99 status_t<cuda_cpp> status,
100 const char * program_name,
102 handle_t<cuda_cpp> program_handle)
104 if (status ==
static_cast<status_t<cuda_cpp>
>(status::named_t<cuda_cpp>::invalid_option)) {
105 throw rtc::runtime_error<cuda_cpp>::with_message_override(status,
106 "Compilation options rejected when compiling " + identify<cuda_cpp>(program_handle, program_name) +
':' 107 + get_concatenated_options(raw_options));
111 template <source_kind_t Kind>
112 inline status_t<Kind> compile_and_return_status(
113 handle_t<Kind> program_handle,
116 #if CUDA_VERSION >= 11010 118 inline status_t<ptx> compile_and_return_status<ptx>(
119 handle_t<ptx> program_handle,
122 return nvPTXCompilerCompile(program_handle, static_cast<int>(raw_options.size()), raw_options.data());
127 inline status_t<cuda_cpp> compile_and_return_status<cuda_cpp>(
128 handle_t<cuda_cpp> program_handle,
131 return nvrtcCompileProgram(program_handle, static_cast<int>(raw_options.size()), raw_options.data());
135 template <source_kind_t Kind>
136 inline compilation_output_t<Kind> compile(
137 const char * program_name,
139 handle_t<Kind> program_handle)
141 auto status = compile_and_return_status<Kind>(program_handle, raw_options);
142 bool succeeded = is_success<Kind>(status);
144 case (rtc::status_t<Kind>) status::named_t<Kind>::success:
145 case (rtc::status_t<Kind>) status::named_t<Kind>::compilation_failure:
146 return compilation_output::detail_::wrap<Kind>(program_handle, program_name, succeeded, do_take_ownership);
148 maybe_handle_invalid_option<Kind>(status, program_name, raw_options, program_handle);
149 throw rtc::runtime_error<Kind>(status,
"Failed invoking compiler for " + identify<Kind>(program_handle, program_name));
155 inline compilation_output_t<cuda_cpp> compile(
156 const char *program_name,
157 const char *program_source,
163 assert(header_names.size() <= ::std::numeric_limits<int>::max());
164 if (program_name ==
nullptr or *program_name ==
'\0') {
165 throw ::std::invalid_argument(
"Attempt to compile a CUDA program without specifying a name");
168 auto num_headers =
static_cast<int>(header_names.size());
169 auto program_handle = create<cuda_cpp>(
170 program_name, program_source, num_headers, header_sources.data(), header_names.data());
172 for (
const auto global_to_register: globals_to_register) {
173 register_global(program_handle, global_to_register);
177 return compile<cuda_cpp>(program_name, raw_options, program_handle);
180 #if CUDA_VERSION >= 11010 181 inline compilation_output_t<ptx> compile_ptx(
182 const char *program_name,
183 const char *program_source,
186 if (program_name ==
nullptr or *program_name ==
'\0') {
187 throw ::std::invalid_argument(
"Attempt to compile a CUDA program without specifying a name");
190 auto program_handle = create<ptx>(program_name, program_source);
193 return compile<ptx>(program_name, raw_options, program_handle);
195 #endif // CUDA_VERSION >= 11010 197 template <source_kind_t Kind>
201 using handle_type = program::handle_t<source_kind>;
202 using status_type = status_t<source_kind>;
207 const ::std::string& name()
const {
return name_; }
210 const char* source()
const {
return source_; }
213 const compilation_options_t<Kind>& options()
const {
return options_; }
219 compilation_options_t<Kind>& options() {
return options_; }
223 explicit base_t(::std::string name) : name_(::
std::move(name)) {};
224 base_t(
const base_t&) =
default;
225 base_t(base_t&&) noexcept = default;
230 base_t& operator=(const base_t& other) noexcept = default;
231 base_t& operator=(base_t&& other) noexcept = default;
234 const
char* source_ {
nullptr };
236 compilation_options_t<Kind> options_;
243 template <source_kind_t Kind>
259 using parent = base_t<source_kind>;
267 return { headers_.names.data(),headers_.names.size()};
277 return { headers_.sources.data(), headers_.sources.size()};
289 options_.set_target(target_compute_capability);
307 template <
typename Container>
311 for(
const auto& compute_capability : target_compute_capabilities) {
312 options_.add_target(compute_capability);
321 options_.add_target(target_compute_capability);
333 program_t& set_source(
const char* source) { source_ = source;
return *
this; }
334 program_t& set_source(const ::std::string& source) { source_ = source.c_str();
return *
this; }
342 options_ = ::std::move(options);
347 template <
typename String>
348 static void check_string_type()
350 using no_cref_string_type = typename ::std::remove_const<typename ::std::remove_reference<String>::type>::type;
352 ::std::is_same<no_cref_string_type, const char*>::value or
353 ::std::is_same<no_cref_string_type, char*>::value or
354 ::std::is_same<String, const ::std::string&>::value or
355 ::std::is_same<String, ::std::string&>::value,
356 "Cannot use this type for a named header name or source; use char*, const char* or a " 357 "reference to a string you own" 369 void add_header_name_ (
const char* name) { headers_.names.emplace_back(name); }
370 void add_header_name_ (const ::std::string& name) { add_header_name_(name.c_str()); }
371 void add_header_name_ (::std::string&& name) =
delete;
373 void add_header_source_(
const char* source) { headers_.sources.emplace_back(source); }
374 void add_header_source_(const ::std::string& source) { add_header_source_(source.c_str()); }
375 void add_header_source_(::std::string&& source) =
delete;
387 template <
typename String1,
typename String2>
390 add_header_name_(name);
391 add_header_source_(source);
404 template <
typename String1,
typename String2>
407 add_header_name_(name_and_source.first);
408 add_header_source_(name_and_source.second);
413 template <
typename String1,
typename String2>
416 check_string_type<String1>();
417 check_string_type<String2>();
418 return add_header(name_and_source);
430 template <
typename RangeOfNames,
typename RangeOfSources>
432 RangeOfNames header_names,
433 RangeOfSources header_sources)
435 check_string_type<typename RangeOfNames::const_reference>();
436 check_string_type<typename RangeOfSources::const_reference>();
438 if (header_names.size() != header_sources.size()) {
439 throw ::std::invalid_argument(
440 "Got a different number of header names (" + ::std::to_string(header_names.size())
441 +
") and header source (" + ::std::to_string(header_sources.size()) +
')');
444 auto new_num_headers = headers_.names.size() + header_names.size();
446 if (new_num_headers > ::std::numeric_limits<int>::max()) {
447 throw ::std::invalid_argument(
"Cannot use more than " 448 + ::std::to_string(::std::numeric_limits<int>::max()) +
" headers.");
451 headers_.names.reserve(new_num_headers);
452 headers_.sources.reserve(new_num_headers);
454 for(
auto name_it = header_names.begin(), source_it = header_sources.begin();
455 name_it < header_names.end();
456 name_it++, source_it++) {
457 add_header(*name_it, *source_it);
471 template <
typename RangeOfNameAndSourcePairs>
475 auto num_headers_to_add = named_header_pairs.size();
476 auto new_num_headers = headers_.names.size() + num_headers_to_add;
478 if (new_num_headers > ::std::numeric_limits<int>::max()) {
479 throw ::std::invalid_argument(
"Cannot use more than " 480 + ::std::to_string(::std::numeric_limits<int>::max()) +
" headers.");
483 headers_.names.reserve(new_num_headers);
484 headers_.sources.reserve(new_num_headers);
486 for(
auto&& pair : named_header_pairs) {
487 add_header(pair.first, pair.second);
501 template <
typename RangeOfNames,
typename RangeOfSources>
503 RangeOfNames&& names,
504 RangeOfSources&& sources)
507 return add_headers(names, sources);
519 template <
typename RangeOfNameAndSourcePairs>
523 add_headers(named_header_pairs);
530 headers_.names.clear();
531 headers_.sources.clear();
552 if ((source_ ==
nullptr or *source_ ==
'\0') and options_.preinclude_files.empty()) {
553 throw ::std::invalid_argument(
"Attempt to compile a CUDA program without any source code");
555 auto marshalled_options = cuda::marshalling::marshal(options_);
556 ::std::vector<const char*> option_ptrs = marshalled_options.option_ptrs();
557 return program::detail_::compile(
559 source_ ==
nullptr ?
"" : source_,
560 {headers_.sources.data(), headers_.sources.size()},
561 {headers_.names.data(), headers_.names.size()},
562 {option_ptrs.data(), option_ptrs.size()},
563 {globals_to_register_.data(), globals_to_register_.size()});
577 globals_to_register_.push_back(unmangled_name);
584 globals_to_register_.push_back(unmangled_name.c_str());
600 template <
typename Container>
603 globals_to_register_.reserve(globals_to_register_.size() + globals_to_register.size());
604 for(
const auto& global_name : globals_to_register) {
605 add_registered_global(global_name);
611 template <
typename Container>
614 static_assert(::std::is_same<typename Container::value_type, const char*>::value,
615 "For an rvalue container, we only accept raw C strings as the value type, to prevent" 616 "the possible passing of string-like objects at the end of their lifetime");
617 return add_registered_globals(static_cast<const Container&>(globals_to_register));
621 program_t(::std::string name) : base_t(::std::move(name)) {}
634 ::std::vector<const char*> names;
635 ::std::vector<const char*> sources;
637 ::std::vector<const char*> globals_to_register_;
640 #if CUDA_VERSION >= 11010 653 class program_t<ptx> :
public program::detail_::base_t<ptx> {
656 using parent = program::detail_::base_t<source_kind>;
664 options_.set_target(target_compute_capability);
672 program_t& set_target(
const context_t& context) {
return set_target(context.device()); }
675 program_t& clear_targets() { options_.targets_.clear();
return *
this; }
678 template <
typename Container>
679 program_t& set_targets(Container target_compute_capabilities)
682 for(
const auto& compute_capability : target_compute_capabilities) {
683 options_.add_target(compute_capability);
691 options_.add_target(target_compute_capability);
699 void add_target(
const context_t& context) { add_target(context.device()); }
702 program_t& set_source(
char const* source) { source_ = source;
return *
this; }
705 program_t& set_source(const ::std::string& source) { source_ = source.c_str();
return *
this; }
710 options_ = ::std::move(options);
714 program_t& clear_options() { options_ = {};
return *
this; }
723 if (source_ ==
nullptr or *source_ ==
'\0') {
724 throw ::std::invalid_argument(
"Attempt to compile a CUDA program without any source code");
726 auto marshalled_options = cuda::marshalling::marshal(options_);
727 ::std::vector<const char*> option_ptrs = marshalled_options.option_ptrs();
728 return program::detail_::compile_ptx(
731 {option_ptrs.data(), option_ptrs.size()});
735 program_t(::std::string name) : parent(::std::move(name)) {}
748 #endif // CUDA_VERSION >= 11010 756 template <source_kind_t Kind>
763 template <source_kind_t Kind>
777 #if CUDA_VERSION >= 11020 778 inline unique_span<device::compute_capability_t>
781 int num_supported_archs;
782 auto status = nvrtcGetNumSupportedArchs(&num_supported_archs);
783 throw_if_error<cuda_cpp>(status,
"Failed obtaining the number of target NVRTC architectures");
784 auto raw_archs = ::std::unique_ptr<int[]>(
new int[num_supported_archs]);
785 status = nvrtcGetSupportedArchs(raw_archs.get());
786 throw_if_error<cuda_cpp>(status,
"Failed obtaining the architectures supported by NVRTC");
787 auto result = make_unique_span<device::compute_capability_t>(num_supported_archs);
788 ::std::transform(raw_archs.get(), raw_archs.get() + num_supported_archs, ::std::begin(result),
798 #endif // CUDA_API_WRAPPERS_RTC_PROGRAM_HPP_ program_t & add_registered_globals(const Container &globals_to_register)
Register multiple pre-mangled names of global, to make available for use after compilation.
Definition: program.hpp:601
Contains the cuda::rtc::compilation_output_t class and related code.
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
program_t & clear_options()
Clears any forced values of compilation options, reverting the compilation to the default values...
Definition: program.hpp:537
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
void add_target(const context_t &context)
Have the compilation also produce code for devices with the same compute capability as the device of ...
Definition: program.hpp:331
program_t & set_target(const device_t &device)
Have the compilation produce code for devices with the same compute capability as a given device...
Definition: program.hpp:295
size_t num_headers() const
Definition: program.hpp:282
source_kind_t
The API wrappers support different kinds of source code, accepted by different NVIDIA run-time compil...
Definition: types.hpp:39
void add_target(const device_t &device)
Have the compilation also produce code for devices with the same compute capability as a given device...
Definition: program.hpp:327
program_t & add_target(device::compute_capability_t target_compute_capability)
Have the compilation also produce code for devices with a given compute capability.
Definition: program.hpp:319
Definitions and utility functions relating to run-time compilation (RTC) of CUDA code using the NVRTC...
A numeric designator of the computational capabilities of a CUDA device.
Definition: device_properties.hpp:75
Options to be passed to one of the NVIDIA JIT compilers along with a program's source code...
Definition: compilation_options.hpp:164
program_t & set_targets(Container target_compute_capabilities)
Remove all compute capabilities which were chosen to have code produced for them by the compilation...
Definition: program.hpp:308
const program_t & add_headers(RangeOfNames header_names, RangeOfSources header_sources)
Adds multiple "memoized" headers to the program.
Definition: program.hpp:431
Type definitions used in CUDA real-time compilation work wrappers.
program_t & set_target(const context_t &context)
Have the compilation produce code for devices with the same compute capability as the device of a giv...
Definition: program.hpp:299
program_t & set_target(device::compute_capability_t target_compute_capability)
Have the compilation produce code for devices with a given compute capability.
Definition: program.hpp:287
program_t & clear_headers()
Removes all "memoized" headers to be used in the program's compilation.
Definition: program.hpp:528
The output produced by a compilation process by one of the CUDA libraries, including any byproducts...
Definition: compilation_output.hpp:36
static constexpr compute_capability_t from_combined_number(unsigned combined) noexcept
Converts a single-number representation of a compute capability into a proper structured instance of ...
Output of CUDA C++ code JIT-compilation.
Definition: compilation_output.hpp:410
program_t & add_headers(RangeOfNameAndSourcePairs &&named_header_pairs)
Adds multiple "memoized" headers to the program.
Definition: program.hpp:472
const program_t & set_headers(RangeOfNames &&names, RangeOfSources &&sources)
Replaces the set of "memoized" headers used in the program's compilation.
Definition: program.hpp:502
program_t & add_header(String1 &&name, String2 &&source)
Adds another "memoized" header to the program.
Definition: program.hpp:388
device::compute_capability_t compute_capability() const
Obtains the device's compute capability; see cuda::device::compute_capability_t.
Definition: device.hpp:415
program_t & add_registered_global(const ::std::string &unmangled_name)
Register a pre-mangled name of a global, to make available for use after compilation.
Definition: program.hpp:582
program_t & add_registered_globals(Container &&globals_to_register)
add_registered_globals(const Container&)
Definition: program.hpp:612
#define throw_if_rtc_error_lazy(Kind, status__,...)
Throws a cuda::rtc::runtime_error exception if the status is not success.
Definition: error.hpp:201
program_t & add_header(::std::pair< String1, String2 > &&name_and_source)
String2>(String1&&, String2&&)
Definition: program.hpp:414
program_t & add_registered_global(const char *unmangled_name)
Register a pre-mangled name of a global, to make available for use after compilation.
Definition: program.hpp:575
const_cstrings_span header_sources() const
Sources of the "memoized"/off-file-system headers made available to the program (and usable as identi...
Definition: program.hpp:275
program_t< Kind > create(const char *program_name)
Create a new (not-yet-compiled) program without setting most of its constituent fields.
Definition: program.hpp:757
Definition: program.hpp:244
program_t & add_header(const ::std::pair< String1, String2 > &name_and_source)
Adds another "memoized" header to the program.
Definition: program.hpp:405
program_t & set_headers(RangeOfNameAndSourcePairs &&named_header_pairs)
Replaces the set of "memoized" headers used in the program's compilation.
Definition: program.hpp:520
const_cstrings_span header_names() const
Names of the "memoized"/off-file-system headers made available to the program (and usable as identifi...
Definition: program.hpp:265
program_t & clear_targets()
Remove all compute capabilities which were chosen to have code produced for them by the compilation...
Definition: program.hpp:303
span< const char *const > const_cstrings_span
A span of C-style strings the contents of which must not be modified.
Definition: types.hpp:57
Wrapper class for a CUDA device.
Definition: device.hpp:135
Facilities for exception-based handling of errors originating to the NVRTC library, including a basic exception class wrapping ::std::runtime_error.
compilation_output_t< cuda_cpp > compile() const
Compiles the program represented by this object (which, until this point, is just a bunch of unrelate...
Definition: program.hpp:550