wichtounet/etl/cuda__memory_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #ifdef ETL_CUDA
 #include "cuda.h"
 #include "cuda_runtime.h"
 #include "cuda_runtime_api.h"

 #define cuda_check(call)                                                                                \
     {                                                                                                   \
         auto status = call;                                                                             \
         if (status != cudaSuccess) {                                                                    \
             std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \
                       << "from " << __FILE__ << ":" << __LINE__ << std::endl;                           \
         }                                                                                               \
     }

 #define cuda_check_assert(call)                                                                         \
     {                                                                                                   \
         auto status = call;                                                                             \
         if (status != cudaSuccess) {                                                                    \
             std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \
                       << "from " << __FILE__ << ":" << __LINE__ << std::endl;                           \
             std::abort();                                                                               \
         }                                                                                               \
     }

 #endif

 namespace etl::impl::cuda {

 #ifdef ETL_CUDA

 template <typename T>
 struct cuda_memory {
     T* memory;
     size_t size;

     cuda_memory() noexcept : memory(nullptr), size(0) {}

     cuda_memory(T* memory, size_t size) noexcept : memory(memory), size(size) {}

     cuda_memory([[maybe_unused]] const cuda_memory& rhs) noexcept : memory(nullptr), size(0) {
         cpp_assert(!rhs.is_set(), "copy of cuda_memory is only possible when not allocated");
     }

     cuda_memory& operator=(const cuda_memory& rhs) noexcept {
         if (this != &rhs) {
             cpp_assert(!is_set(), "copy of cuda_memory is only possible when not allocated");
             cpp_assert(!rhs.is_set(), "copy of cuda_memory is only possible when not allocated");
         }

         return *this;
     }

     cuda_memory(cuda_memory&& rhs) noexcept : memory(rhs.memory), size(rhs.size) {
         rhs.memory = nullptr;
         rhs.size   = 0;
     }

     cuda_memory& operator=(cuda_memory&& rhs) noexcept {
         if (this != &rhs) {
             free_memory();

             memory = rhs.memory;
             size   = rhs.size;

             rhs.memory = nullptr;
             rhs.size   = 0;
         }

         return *this;
     }

     ~cuda_memory() {
         free_memory();
     }

     cuda_memory& operator=(T* new_memory) {
         free_memory();

         memory = new_memory;

         return *this;
     }

     T* get() const {
         return memory;
     }

     bool is_set() const {
         return memory;
     }

     void reset() {
         free_memory();
         memory = nullptr;
     }

 private:
     void free_memory() {
         if (memory) {
             gpu_memory_allocator::release(memory, size);
         }
     }
 };

 template <typename E>
 auto cuda_allocate_only(size_t size) -> cuda_memory<E> {
     auto* memory = gpu_memory_allocator::allocate<E>(size);
     return cuda_memory<E>{memory, size};
 }

 template <typename E>
 auto cuda_allocate(const E& expr, bool copy = false) -> cuda_memory<value_t<E>> {
     auto* memory = gpu_memory_allocator::allocate<value_t<E>>(etl::size(expr));

     if (copy) {
         cuda_check(cudaMemcpy(memory, expr.memory_start(), etl::size(expr) * sizeof(value_t<E>), cudaMemcpyHostToDevice));
     }

     return cuda_memory<value_t<E>>{memory, etl::size(expr)};
 }

 template <typename E>
 auto cuda_allocate_copy(const E& expr) -> cuda_memory<value_t<E>> {
     return cuda_allocate(expr, true);
 }

 template <typename E>
 auto cuda_allocate(E* ptr, size_t n, bool copy = false) -> cuda_memory<E> {
     auto* memory = gpu_memory_allocator::allocate<E>(n);

     if (copy) {
         cuda_check(cudaMemcpy(memory, ptr, n * sizeof(E), cudaMemcpyHostToDevice));
     }

     return cuda_memory<E>{memory};
 }

 template <typename T>
 auto cuda_allocate_copy(T* ptr, size_t n) -> cuda_memory<T> {
     return cuda_allocate(ptr, n, true);
 }

 #else

 template <typename T>
 struct cuda_memory {
     //Nothing is enought
 };

 #endif

 } //end of namespace etl::impl::cuda
etl::impl::cuda::cuda_memory
Wrapper for CUDA memory (when disabled CUDA support)
Definition: cuda_memory.hpp:233

etl::size
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108

etl::impl::cuda
Definition: cuda_memory.hpp:36