12 #include "cuda_runtime.h" 13 #include "cuda_runtime_api.h" 15 #define cuda_check(call) \ 18 if (status != cudaSuccess) { \ 19 std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \ 20 << "from " << __FILE__ << ":" << __LINE__ << std::endl; \ 24 #define cuda_check_assert(call) \ 27 if (status != cudaSuccess) { \ 28 std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \ 29 << "from " << __FILE__ << ":" << __LINE__ << std::endl; \ 51 cuda_memory() noexcept : memory(
nullptr), size(0) {}
56 cuda_memory(T* memory,
size_t size) noexcept : memory(memory), size(size) {}
61 cuda_memory([[maybe_unused]]
const cuda_memory& rhs) noexcept : memory(
nullptr), size(0) {
62 cpp_assert(!rhs.is_set(),
"copy of cuda_memory is only possible when not allocated");
68 cuda_memory& operator=(
const cuda_memory& rhs) noexcept {
70 cpp_assert(!is_set(),
"copy of cuda_memory is only possible when not allocated");
71 cpp_assert(!rhs.is_set(),
"copy of cuda_memory is only possible when not allocated");
81 cuda_memory(cuda_memory&& rhs) noexcept : memory(rhs.memory), size(rhs.size) {
90 cuda_memory& operator=(cuda_memory&& rhs) noexcept {
118 cuda_memory& operator=(T* new_memory) {
137 bool is_set()
const {
157 gpu_memory_allocator::release(memory, size);
167 template <
typename E>
168 auto cuda_allocate_only(
size_t size) -> cuda_memory<E> {
169 auto* memory = gpu_memory_allocator::allocate<E>(size);
170 return cuda_memory<E>{memory, size};
178 template <
typename E>
179 auto cuda_allocate(
const E& expr,
bool copy =
false) -> cuda_memory<value_t<E>> {
180 auto* memory = gpu_memory_allocator::allocate<value_t<E>>(
etl::size(expr));
183 cuda_check(cudaMemcpy(memory, expr.memory_start(),
etl::size(expr) *
sizeof(value_t<E>), cudaMemcpyHostToDevice));
186 return cuda_memory<value_t<E>>{memory,
etl::size(expr)};
193 template <
typename E>
194 auto cuda_allocate_copy(
const E& expr) -> cuda_memory<value_t<E>> {
195 return cuda_allocate(expr,
true);
205 template <
typename E>
206 auto cuda_allocate(E* ptr,
size_t n,
bool copy =
false) -> cuda_memory<E> {
207 auto* memory = gpu_memory_allocator::allocate<E>(n);
210 cuda_check(cudaMemcpy(memory, ptr, n *
sizeof(E), cudaMemcpyHostToDevice));
213 return cuda_memory<E>{memory};
222 template <
typename T>
223 auto cuda_allocate_copy(T* ptr,
size_t n) -> cuda_memory<T> {
224 return cuda_allocate(ptr, n,
true);
232 template <
typename T>
Wrapper for CUDA memory (when disabled CUDA support)
Definition: cuda_memory.hpp:233
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
Definition: cuda_memory.hpp:36