Expression Templates Library (ETL)
cuda_memory.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #ifdef ETL_CUDA
11 #include "cuda.h"
12 #include "cuda_runtime.h"
13 #include "cuda_runtime_api.h"
14 
15 #define cuda_check(call) \
16  { \
17  auto status = call; \
18  if (status != cudaSuccess) { \
19  std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \
20  << "from " << __FILE__ << ":" << __LINE__ << std::endl; \
21  } \
22  }
23 
24 #define cuda_check_assert(call) \
25  { \
26  auto status = call; \
27  if (status != cudaSuccess) { \
28  std::cerr << "CUDA error: " << cudaGetErrorString(status) << " from " << #call << std::endl \
29  << "from " << __FILE__ << ":" << __LINE__ << std::endl; \
30  std::abort(); \
31  } \
32  }
33 
34 #endif
35 
36 namespace etl::impl::cuda {
37 
38 #ifdef ETL_CUDA
39 
43 template <typename T>
44 struct cuda_memory {
45  T* memory;
46  size_t size;
47 
51  cuda_memory() noexcept : memory(nullptr), size(0) {}
52 
56  cuda_memory(T* memory, size_t size) noexcept : memory(memory), size(size) {}
57 
61  cuda_memory([[maybe_unused]] const cuda_memory& rhs) noexcept : memory(nullptr), size(0) {
62  cpp_assert(!rhs.is_set(), "copy of cuda_memory is only possible when not allocated");
63  }
64 
68  cuda_memory& operator=(const cuda_memory& rhs) noexcept {
69  if (this != &rhs) {
70  cpp_assert(!is_set(), "copy of cuda_memory is only possible when not allocated");
71  cpp_assert(!rhs.is_set(), "copy of cuda_memory is only possible when not allocated");
72  }
73 
74  return *this;
75  }
76 
81  cuda_memory(cuda_memory&& rhs) noexcept : memory(rhs.memory), size(rhs.size) {
82  rhs.memory = nullptr;
83  rhs.size = 0;
84  }
85 
90  cuda_memory& operator=(cuda_memory&& rhs) noexcept {
91  if (this != &rhs) {
92  free_memory();
93 
94  memory = rhs.memory;
95  size = rhs.size;
96 
97  rhs.memory = nullptr;
98  rhs.size = 0;
99  }
100 
101  return *this;
102  }
103 
109  ~cuda_memory() {
110  free_memory();
111  }
112 
118  cuda_memory& operator=(T* new_memory) {
119  free_memory();
120 
121  memory = new_memory;
122 
123  return *this;
124  }
125 
129  T* get() const {
130  return memory;
131  }
132 
137  bool is_set() const {
138  return memory;
139  }
140 
146  void reset() {
147  free_memory();
148  memory = nullptr;
149  }
150 
151 private:
155  void free_memory() {
156  if (memory) {
157  gpu_memory_allocator::release(memory, size);
158  }
159  }
160 };
161 
167 template <typename E>
168 auto cuda_allocate_only(size_t size) -> cuda_memory<E> {
169  auto* memory = gpu_memory_allocator::allocate<E>(size);
170  return cuda_memory<E>{memory, size};
171 }
172 
178 template <typename E>
179 auto cuda_allocate(const E& expr, bool copy = false) -> cuda_memory<value_t<E>> {
180  auto* memory = gpu_memory_allocator::allocate<value_t<E>>(etl::size(expr));
181 
182  if (copy) {
183  cuda_check(cudaMemcpy(memory, expr.memory_start(), etl::size(expr) * sizeof(value_t<E>), cudaMemcpyHostToDevice));
184  }
185 
186  return cuda_memory<value_t<E>>{memory, etl::size(expr)};
187 }
188 
193 template <typename E>
194 auto cuda_allocate_copy(const E& expr) -> cuda_memory<value_t<E>> {
195  return cuda_allocate(expr, true);
196 }
197 
205 template <typename E>
206 auto cuda_allocate(E* ptr, size_t n, bool copy = false) -> cuda_memory<E> {
207  auto* memory = gpu_memory_allocator::allocate<E>(n);
208 
209  if (copy) {
210  cuda_check(cudaMemcpy(memory, ptr, n * sizeof(E), cudaMemcpyHostToDevice));
211  }
212 
213  return cuda_memory<E>{memory};
214 }
215 
222 template <typename T>
223 auto cuda_allocate_copy(T* ptr, size_t n) -> cuda_memory<T> {
224  return cuda_allocate(ptr, n, true);
225 }
226 
227 #else
228 
232 template <typename T>
233 struct cuda_memory {
234  //Nothing is enought
235 };
236 
237 #endif
238 
239 } //end of namespace etl::impl::cuda
Wrapper for CUDA memory (when disabled CUDA support)
Definition: cuda_memory.hpp:233
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
Definition: cuda_memory.hpp:36