Expression Templates Library (ETL)
bias_add.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 #ifdef ETL_CUDNN_MODE
16 
17 #include "etl/impl/cublas/cuda.hpp"
19 #include "etl/impl/cublas/axpy.hpp"
20 #include "etl/impl/cudnn/cudnn.hpp"
21 
22 #endif
23 
24 namespace etl::impl::cudnn {
25 
26 #ifdef ETL_CUDNN_MODE
27 
34 template <typename I, typename K, typename C>
35 void bias_add_4d(I&& x, K&& b, C&& y) {
36  using type = std::remove_const_t<value_t<I>>;
37 
38  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
39 
40  type alpha[] = {1.0f};
41  type beta[] = {0.0f};
42 
43  decltype(auto) handle = start_cudnn();
44 
45  // Prepare the tensors
46  auto x_tensor = create_tensor_wrapper(x);
47  auto y_tensor = create_tensor_wrapper(y);
48 
49  cudnnTensorDescriptor_t b_tensor;
50  cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
51  cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(b), 1, 1));
52 
53  // Allocate GPU memory, if necessary
54 
56  b.ensure_gpu_up_to_date();
58 
59  // Copy x -> y
60 
61  cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));
62 
63  // Add b -> y
64 
65  cudnn_check(cudnnAddTensor(handle.get(), alpha, b_tensor, b.gpu_memory(), alpha, *y_tensor, y.gpu_memory()));
66 
67  y.validate_gpu();
68  y.invalidate_cpu();
69 
70  // Release the resources
71  cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
72 }
73 
80 template <typename I, typename K, typename C>
81 void bias_add_2d(I&& x, K&& b, C&& y) {
82  using type = std::remove_const_t<value_t<I>>;
83 
84  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
85 
86  type alpha[] = {1.0f};
87  type beta[] = {0.0f};
88 
89  decltype(auto) handle = start_cudnn();
90 
91  // Prepare the tensors
92  auto x_tensor = create_tensor_wrapper(x);
93  auto y_tensor = create_tensor_wrapper(y);
94 
95  cudnnTensorDescriptor_t b_tensor;
96  cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
97  cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, 1, etl::dim<0>(b)));
98 
99  // Allocate GPU memory, if necessary
100 
101  x.ensure_gpu_up_to_date();
102  b.ensure_gpu_up_to_date();
103  y.ensure_gpu_allocated();
104 
105  // Copy x -> y
106 
107  cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));
108 
109  // Add b -> y
110 
111  // This is highly retarded stuff :(
112  // Unfortunately cudnnAddTensor does not support 2D tensors :(
113  // This is solved when EGBLAS is available, since this will be
114  // computed with EGBLAS first
115 
116  {
117  decltype(auto) handle = etl::impl::cublas::start_cublas();
118 
119  for (size_t i = 0; i < etl::dim<0>(x); ++i) {
120  impl::cublas::cublas_axpy(handle.get(), etl::dim<1>(y), alpha, b.gpu_memory(), 1, y.gpu_memory() + i * etl::dim<1>(y), 1);
121  }
122  }
123 
124  y.validate_gpu();
125  y.invalidate_cpu();
126 
127  // Release the resources
128  cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
129 }
130 
131 #else
132 
133 //COVERAGE_EXCLUDE_BEGIN
134 
141 template <typename I, typename K, typename C>
142 void bias_add_4d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
143  cpp_unreachable("CUDNN not available/enabled");
144 }
145 
152 template <typename I, typename K, typename C>
153 void bias_add_2d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
154  cpp_unreachable("CUDNN not available/enabled");
155 }
156 
157  //COVERAGE_EXCLUDE_END
158 
159 #endif
160 
161 } //end of namespace etl::impl::cudnn
Definition: bias_add.hpp:24
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
CUBLAS wrappers for the axpy operation.
Root namespace for the ETL library.
Definition: adapter.hpp:15
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
Utility functions for cublas.
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
bias_add_2d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_2d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_2d_expr.hpp:378
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
bias_add_4d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_4d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_4d_expr.hpp:388
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
Utility functions for cudnn.
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674