Expression Templates Library (ETL)
cudnn.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 #include "cudnn.h"
16 
17 #define cudnn_check(call) \
18  { \
19  cudnnStatus_t status = call; \
20  if (status != CUDNN_STATUS_SUCCESS) { \
21  std::cerr << "CUDNN error: " << cudnnGetErrorString(status) << " from " << #call << std::endl \
22  << "from " << __FILE__ << ":" << __LINE__ << std::endl; \
23  } \
24  }
25 
26 namespace etl::impl::cudnn {
27 
31 struct cudnn_handle {
32  cudnnHandle_t handle;
33 
38  cudnn_check(cudnnCreate(&handle));
39  }
40 
45  explicit cudnn_handle(cudnnHandle_t handle) : handle(handle) {}
46 
47  cudnn_handle(const cudnn_handle& rhs) = delete;
48  cudnn_handle& operator=(const cudnn_handle& rhs) = delete;
49 
50  cudnn_handle(cudnn_handle&& rhs) noexcept = default;
51  cudnn_handle& operator=(cudnn_handle&& rhs) noexcept = default;
52 
57  cudnnHandle_t get() {
58  return handle;
59  }
60 
65  cudnn_check(cudnnDestroy(handle));
66  }
67 };
68 
69 #ifndef ETL_CUDNN_LOCAL_HANDLE
70 
76  static cudnn_handle handle;
77  return handle;
78 }
79 
80 #else
81 
86 inline cudnn_handle start_cudnn() {
87  return {};
88 }
89 
90 #endif
91 
97 template <typename T>
98 struct cudnn_wrapper {
99  T tensor;
100 
104  explicit cudnn_wrapper(T tensor) : tensor(tensor) {}
105 
114  tensor = rhs.tensor;
115  rhs.tensor = nullptr;
116  }
117 
118  cudnn_wrapper(const cudnn_wrapper& rhs) = delete;
119  cudnn_wrapper& operator=(const cudnn_wrapper& rhs) = delete;
120 
121  cudnn_wrapper& operator=(cudnn_wrapper&& rhs) = delete;
122 
127  T operator*() {
128  return tensor;
129  }
130 
134  ~cudnn_wrapper();
135 };
136 
140 template <>
142  if (tensor) {
143  cudnn_check(cudnnDestroyTensorDescriptor(tensor));
144  }
145 }
146 
150 template <>
152  if (tensor) {
153  cudnn_check(cudnnDestroyFilterDescriptor(tensor));
154  }
155 }
156 
160 template <>
162  if (tensor) {
163  cudnn_check(cudnnDestroyPoolingDescriptor(tensor));
164  }
165 }
166 
172 template <etl_1d I>
173 cudnnTensorDescriptor_t create_tensor(I&& input) {
174  using T = value_t<I>;
175 
176  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
177 
178  cudnnTensorDescriptor_t tensor;
179  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
180  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(input), 1, 1));
181 
182  return tensor;
183 }
184 
190 template <etl_2d I>
191 cudnnTensorDescriptor_t create_tensor(I&& input) {
192  using T = value_t<I>;
193 
194  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
195 
196  cudnnTensorDescriptor_t tensor;
197  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
198  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, etl::dim<0>(input), etl::dim<1>(input)));
199 
200  return tensor;
201 }
202 
208 template <etl_3d I>
209 cudnnTensorDescriptor_t create_tensor(I&& input) {
210  using T = value_t<I>;
211 
212  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
213 
214  cudnnTensorDescriptor_t tensor;
215  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
216  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input)));
217 
218  return tensor;
219 }
220 
226 template <etl_4d I>
227 cudnnTensorDescriptor_t create_tensor(I&& input) {
228  using T = value_t<I>;
229 
230  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
231 
232  cudnnTensorDescriptor_t tensor;
233  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
234  cudnn_check(
235  cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input), etl::dim<3>(input)));
236 
237  return tensor;
238 }
239 
245 template <typename I>
248 }
249 
255 template <etl_3d I>
256 cudnnTensorDescriptor_t create_tensor_5d(I&& input) {
257  using T = value_t<I>;
258 
259  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
260 
261  const int D1 = etl::dim<0>(input);
262  const int D2 = etl::dim<1>(input);
263  const int D3 = etl::dim<2>(input);
264 
265  int dims[] = {1, 1, D1, D2, D3};
266  int strides[] = {D1 * D2 * D3, D1 * D2 * D3, D2 * D3, D3, 1};
267 
268  cudnnTensorDescriptor_t tensor;
269  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
270  cudnn_check(cudnnSetTensorNdDescriptor(tensor, data_type, 5, dims, strides));
271 
272  return tensor;
273 }
274 
280 template <etl_4d I>
281 cudnnTensorDescriptor_t create_tensor_5d(I&& input) {
282  using T = value_t<I>;
283 
284  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
285 
286  const int D1 = etl::dim<0>(input);
287  const int D2 = etl::dim<1>(input);
288  const int D3 = etl::dim<2>(input);
289  const int D4 = etl::dim<3>(input);
290 
291  int dims[] = {1, D1, D2, D3, D4};
292  int strides[] = {D1 * D2 * D3 * D4, D2 * D3 * D4, D3 * D4, D4, 1};
293 
294  cudnnTensorDescriptor_t tensor;
295  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
296  cudnn_check(cudnnSetTensorNdDescriptor(tensor, data_type, 5, dims, strides));
297 
298  return tensor;
299 }
300 
306 template <typename I>
309 }
310 
316 template <typename I>
317 cudnnTensorDescriptor_t create_tensor_flat(I&& input) {
318  using T = value_t<I>;
319 
320  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
321 
322  // Surprisingly, CUDNN does not do any optimization for flat vectors
323  // It means that the position of the dimension is very important
324  // Putting at the first position (N) is generally the slowest case
325  // But putting it at the last (W) seems better
326 
327  cudnnTensorDescriptor_t tensor;
328  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
329  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, 1, etl::size(input)));
330 
331  return tensor;
332 }
333 
339 template <typename I>
342 }
343 
349 template <etl_1d I>
350 cudnnTensorDescriptor_t create_tensor_front(I&& input) {
351  using T = value_t<I>;
352 
353  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
354 
355  cudnnTensorDescriptor_t tensor;
356  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
357  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), 1, 1, 1));
358 
359  return tensor;
360 }
361 
367 template <etl_2d I>
368 cudnnTensorDescriptor_t create_tensor_front(I&& input) {
369  using T = value_t<I>;
370 
371  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
372 
373  cudnnTensorDescriptor_t tensor;
374  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
375  cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), 1, 1));
376 
377  return tensor;
378 }
379 
385 template <etl_4d I>
386 cudnnTensorDescriptor_t create_tensor_front(I&& input) {
387  using T = value_t<I>;
388 
389  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
390 
391  cudnnTensorDescriptor_t tensor;
392  cudnn_check(cudnnCreateTensorDescriptor(&tensor));
393  cudnn_check(
394  cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input), etl::dim<3>(input)));
395 
396  return tensor;
397 }
398 
404 template <typename I>
407 }
408 
414 template <etl_2d I>
415 cudnnFilterDescriptor_t create_filter(I&& kernel) {
416  using T = value_t<I>;
417 
418  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
419 
420  cudnnFilterDescriptor_t filter;
421  cudnn_check(cudnnCreateFilterDescriptor(&filter));
422  cudnn_check(cudnnSetFilter4dDescriptor(filter, data_type, CUDNN_TENSOR_NCHW, 1, 1, etl::dim<0>(kernel), etl::dim<1>(kernel)));
423 
424  return filter;
425 }
426 
432 template <etl_4d I>
433 cudnnFilterDescriptor_t create_filter(I&& kernel) {
434  using T = value_t<I>;
435 
436  auto data_type = std::is_same_v<std::remove_const_t<T>, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
437 
438  cudnnFilterDescriptor_t filter;
439  cudnn_check(cudnnCreateFilterDescriptor(&filter));
440  cudnn_check(
441  cudnnSetFilter4dDescriptor(filter, data_type, CUDNN_TENSOR_NCHW, etl::dim<0>(kernel), etl::dim<1>(kernel), etl::dim<2>(kernel), etl::dim<3>(kernel)));
442 
443  return filter;
444 }
445 
451 template <typename I>
454 }
455 
460 inline cudnnPoolingDescriptor_t create_pooling_desc(cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
461  cudnnPoolingDescriptor_t pooling_desc;
462  cudnn_check(cudnnCreatePoolingDescriptor(&pooling_desc));
463  cudnn_check(cudnnSetPooling2dDescriptor(pooling_desc, mode, CUDNN_PROPAGATE_NAN, c1, c2, p1, p2, s1, s2));
464 
465  return pooling_desc;
466 }
467 
473  cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
474  return cudnn_wrapper<cudnnPoolingDescriptor_t>{create_pooling_desc(mode, c1, c2, s1, s2, p1, p2)};
475 }
476 
481 inline cudnnPoolingDescriptor_t create_pooling_desc(
482  cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
483  int c[] = {int(c1), int(c2), int(c3)};
484  int s[] = {int(s1), int(s2), int(s3)};
485  int p[] = {int(p1), int(p2), int(p3)};
486 
487  cudnnPoolingDescriptor_t pooling_desc;
488  cudnn_check(cudnnCreatePoolingDescriptor(&pooling_desc));
489  cudnn_check(cudnnSetPoolingNdDescriptor(pooling_desc, mode, CUDNN_PROPAGATE_NAN, 3, c, p, s));
490 
491  return pooling_desc;
492 }
493 
499  cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
500  return cudnn_wrapper<cudnnPoolingDescriptor_t>{create_pooling_desc(mode, c1, c2, c3, s1, s2, s3, p1, p2, p3)};
501 }
502 
503 template <typename Value>
505  bool found;
506 
507 #ifdef ETL_CUDNN_DESC_CACHE
508  Value & value;
509 #else
510  Value value;
511 #endif
512 
513  operator bool() const {
514  return found;
515  }
516 
517  Value * operator->() {
518  return &value;
519  }
520 };
521 
522 template <typename Key, typename Value>
524 #ifdef ETL_CUDNN_DESC_CACHE
525  std::vector<std::pair<Key, std::unique_ptr<Value>>> cache;
526 
527  cudnn_desc_cache_value<Value> operator[]([[maybe_unused]] const Key& key) {
528  for (auto & [k, value] : cache) {
529  if (k == key) {
530  return {true, *value};
531  }
532  }
533 
534  cache.emplace_back(key, std::make_unique<Value>());
535 
536  return {false, *cache.back().second};
537  }
538 #else
539  cudnn_desc_cache_value<Value> operator[]([[maybe_unused]] const Key& key) {
540  return {false, Value{}};
541  }
542 #endif
543 };
544 
545 } //end of namespace etl::impl::cudnn
auto s(T &&value)
Force the evaluation of the given expression.
Definition: stop.hpp:18
Definition: bias_add.hpp:24
cudnn_wrapper(cudnn_wrapper &&rhs)
Move construct a cudnn_wrapper.
Definition: cudnn.hpp:113
T tensor
The CUDNN tensor.
Definition: cudnn.hpp:99
cudnnTensorDescriptor_t create_tensor_flat(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:317
cudnnTensorDescriptor_t create_tensor_front(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:350
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_flat_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:340
cudnn_handle(cudnnHandle_t handle)
Construct the helper from the raw handle.
Definition: cudnn.hpp:45
cudnn_wrapper(T tensor)
Create a new cudnn_wrapper for the given tensor.
Definition: cudnn.hpp:104
cudnn_wrapper< cudnnPoolingDescriptor_t > create_pooling_desc_wrapper(cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Create a CUDNN pooling descriptor for the given input matrix.
Definition: cudnn.hpp:472
~cudnn_wrapper()
Delete the cudnn_wrapper, releasing the tensor.
cudnnPoolingDescriptor_t create_pooling_desc(cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Create a CUDNN pooling descriptor for the given input matrix.
Definition: cudnn.hpp:460
~cudnn_handle()
Destruct the helper and release the raw cudnn handle.
Definition: cudnn.hpp:64
T operator*()
Convert back to the CUDNN tensor type.
Definition: cudnn.hpp:127
cudnn_handle & start_cudnn()
Start cudnn and return a RTTI helper over a raw cudnn handle.
Definition: cudnn.hpp:75
cudnn_handle()
Construct the helper and create the handle directly.
Definition: cudnn.hpp:37
cudnnTensorDescriptor_t create_tensor_5d(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:256
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:246
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
cudnnFilterDescriptor_t create_filter(I &&kernel)
Create a CUDNN filter tensor for the given input matrix.
Definition: cudnn.hpp:415
Wrapper for CUDNN tensor.
Definition: cudnn.hpp:98
cudnnTensorDescriptor_t create_tensor(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:173
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_wrapper_5d(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:307
cudnnHandle_t handle
The raw cudnn handle.
Definition: cudnn.hpp:32
const_return_type operator[](size_t j) const
Returns the element at the given index.
Definition: dyn_matrix_view.hpp:71
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_front_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:405
Definition: cudnn.hpp:523
RTTI helper to manage CUDNN handle.
Definition: cudnn.hpp:31
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
cudnn_wrapper< cudnnFilterDescriptor_t > create_filter_wrapper(I &&kernel)
Create a CUDNN filter tensor for the given input matrix.
Definition: cudnn.hpp:452