17 #define cudnn_check(call) \ 19 cudnnStatus_t status = call; \ 20 if (status != CUDNN_STATUS_SUCCESS) { \ 21 std::cerr << "CUDNN error: " << cudnnGetErrorString(status) << " from " << #call << std::endl \ 22 << "from " << __FILE__ << ":" << __LINE__ << std::endl; \ 38 cudnn_check(cudnnCreate(&handle));
65 cudnn_check(cudnnDestroy(handle));
69 #ifndef ETL_CUDNN_LOCAL_HANDLE 115 rhs.tensor =
nullptr;
143 cudnn_check(cudnnDestroyTensorDescriptor(tensor));
153 cudnn_check(cudnnDestroyFilterDescriptor(tensor));
163 cudnn_check(cudnnDestroyPoolingDescriptor(tensor));
176 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
178 cudnnTensorDescriptor_t tensor;
179 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
180 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(input), 1, 1));
194 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
196 cudnnTensorDescriptor_t tensor;
197 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
198 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, etl::dim<0>(input), etl::dim<1>(input)));
212 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
214 cudnnTensorDescriptor_t tensor;
215 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
216 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input)));
230 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
232 cudnnTensorDescriptor_t tensor;
233 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
235 cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input), etl::dim<3>(input)));
245 template <
typename I>
259 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
261 const int D1 = etl::dim<0>(input);
262 const int D2 = etl::dim<1>(input);
263 const int D3 = etl::dim<2>(input);
265 int dims[] = {1, 1, D1, D2, D3};
266 int strides[] = {D1 * D2 * D3, D1 * D2 * D3, D2 * D3, D3, 1};
268 cudnnTensorDescriptor_t tensor;
269 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
270 cudnn_check(cudnnSetTensorNdDescriptor(tensor, data_type, 5, dims, strides));
284 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
286 const int D1 = etl::dim<0>(input);
287 const int D2 = etl::dim<1>(input);
288 const int D3 = etl::dim<2>(input);
289 const int D4 = etl::dim<3>(input);
291 int dims[] = {1, D1, D2, D3, D4};
292 int strides[] = {D1 * D2 * D3 * D4, D2 * D3 * D4, D3 * D4, D4, 1};
294 cudnnTensorDescriptor_t tensor;
295 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
296 cudnn_check(cudnnSetTensorNdDescriptor(tensor, data_type, 5, dims, strides));
306 template <
typename I>
316 template <
typename I>
320 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
327 cudnnTensorDescriptor_t tensor;
328 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
329 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, 1,
etl::size(input)));
339 template <
typename I>
353 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
355 cudnnTensorDescriptor_t tensor;
356 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
357 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), 1, 1, 1));
371 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
373 cudnnTensorDescriptor_t tensor;
374 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
375 cudnn_check(cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), 1, 1));
389 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
391 cudnnTensorDescriptor_t tensor;
392 cudnn_check(cudnnCreateTensorDescriptor(&tensor));
394 cudnnSetTensor4dDescriptor(tensor, CUDNN_TENSOR_NCHW, data_type, etl::dim<0>(input), etl::dim<1>(input), etl::dim<2>(input), etl::dim<3>(input)));
404 template <
typename I>
418 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
420 cudnnFilterDescriptor_t filter;
421 cudnn_check(cudnnCreateFilterDescriptor(&filter));
422 cudnn_check(cudnnSetFilter4dDescriptor(filter, data_type, CUDNN_TENSOR_NCHW, 1, 1, etl::dim<0>(kernel), etl::dim<1>(kernel)));
436 auto data_type = std::is_same_v<std::remove_const_t<T>,
float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
438 cudnnFilterDescriptor_t filter;
439 cudnn_check(cudnnCreateFilterDescriptor(&filter));
441 cudnnSetFilter4dDescriptor(filter, data_type, CUDNN_TENSOR_NCHW, etl::dim<0>(kernel), etl::dim<1>(kernel), etl::dim<2>(kernel), etl::dim<3>(kernel)));
451 template <
typename I>
460 inline cudnnPoolingDescriptor_t
create_pooling_desc(cudnnPoolingMode_t mode,
size_t c1,
size_t c2,
size_t s1,
size_t s2,
size_t p1,
size_t p2) {
461 cudnnPoolingDescriptor_t pooling_desc;
462 cudnn_check(cudnnCreatePoolingDescriptor(&pooling_desc));
463 cudnn_check(cudnnSetPooling2dDescriptor(pooling_desc, mode, CUDNN_PROPAGATE_NAN, c1, c2, p1, p2, s1, s2));
473 cudnnPoolingMode_t mode,
size_t c1,
size_t c2,
size_t s1,
size_t s2,
size_t p1,
size_t p2) {
482 cudnnPoolingMode_t mode,
size_t c1,
size_t c2,
size_t c3,
size_t s1,
size_t s2,
size_t s3,
size_t p1,
size_t p2,
size_t p3) {
483 int c[] = {int(c1), int(c2), int(c3)};
484 int s[] = {int(s1), int(s2), int(s3)};
485 int p[] = {int(p1), int(p2), int(p3)};
487 cudnnPoolingDescriptor_t pooling_desc;
488 cudnn_check(cudnnCreatePoolingDescriptor(&pooling_desc));
489 cudnn_check(cudnnSetPoolingNdDescriptor(pooling_desc, mode, CUDNN_PROPAGATE_NAN, 3, c, p, s));
499 cudnnPoolingMode_t mode,
size_t c1,
size_t c2,
size_t c3,
size_t s1,
size_t s2,
size_t s3,
size_t p1,
size_t p2,
size_t p3) {
503 template <
typename Value>
507 #ifdef ETL_CUDNN_DESC_CACHE 513 operator bool()
const {
517 Value * operator->() {
522 template <
typename Key,
typename Value>
524 #ifdef ETL_CUDNN_DESC_CACHE 525 std::vector<std::pair<Key, std::unique_ptr<Value>>> cache;
528 for (
auto & [k, value] : cache) {
530 return {
true, *value};
534 cache.emplace_back(key, std::make_unique<Value>());
536 return {
false, *cache.back().second};
540 return {
false, Value{}};
auto s(T &&value)
Force the evaluation of the given expression.
Definition: stop.hpp:18
Definition: bias_add.hpp:24
cudnn_wrapper(cudnn_wrapper &&rhs)
Move construct a cudnn_wrapper.
Definition: cudnn.hpp:113
T tensor
The CUDNN tensor.
Definition: cudnn.hpp:99
cudnnTensorDescriptor_t create_tensor_flat(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:317
cudnnTensorDescriptor_t create_tensor_front(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:350
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_flat_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:340
cudnn_handle(cudnnHandle_t handle)
Construct the helper from the raw handle.
Definition: cudnn.hpp:45
cudnn_wrapper(T tensor)
Create a new cudnn_wrapper for the given tensor.
Definition: cudnn.hpp:104
cudnn_wrapper< cudnnPoolingDescriptor_t > create_pooling_desc_wrapper(cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Create a CUDNN pooling descriptor for the given input matrix.
Definition: cudnn.hpp:472
~cudnn_wrapper()
Delete the cudnn_wrapper, releasing the tensor.
cudnnPoolingDescriptor_t create_pooling_desc(cudnnPoolingMode_t mode, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Create a CUDNN pooling descriptor for the given input matrix.
Definition: cudnn.hpp:460
~cudnn_handle()
Destruct the helper and release the raw cudnn handle.
Definition: cudnn.hpp:64
T operator*()
Convert back to the CUDNN tensor type.
Definition: cudnn.hpp:127
cudnn_handle & start_cudnn()
Start cudnn and return a RTTI helper over a raw cudnn handle.
Definition: cudnn.hpp:75
cudnn_handle()
Construct the helper and create the handle directly.
Definition: cudnn.hpp:37
cudnnTensorDescriptor_t create_tensor_5d(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:256
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:246
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
cudnnFilterDescriptor_t create_filter(I &&kernel)
Create a CUDNN filter tensor for the given input matrix.
Definition: cudnn.hpp:415
Wrapper for CUDNN tensor.
Definition: cudnn.hpp:98
cudnnTensorDescriptor_t create_tensor(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:173
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_wrapper_5d(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:307
cudnnHandle_t handle
The raw cudnn handle.
Definition: cudnn.hpp:32
const_return_type operator[](size_t j) const
Returns the element at the given index.
Definition: dyn_matrix_view.hpp:71
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_front_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:405
Definition: cudnn.hpp:523
RTTI helper to manage CUDNN handle.
Definition: cudnn.hpp:31
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
Definition: cudnn.hpp:504
cudnn_wrapper< cudnnFilterDescriptor_t > create_filter_wrapper(I &&kernel)
Create a CUDNN filter tensor for the given input matrix.
Definition: cudnn.hpp:452