17 #include "etl/impl/cublas/cuda.hpp" 34 template <
typename I,
typename K,
typename C>
36 using type = std::remove_const_t<value_t<I>>;
38 auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
40 type alpha[] = {1.0f};
43 decltype(
auto) handle = start_cudnn();
46 auto x_tensor = create_tensor_wrapper(x);
47 auto y_tensor = create_tensor_wrapper(y);
49 cudnnTensorDescriptor_t b_tensor;
50 cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
51 cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1,
etl::
dim<0>(b), 1, 1));
56 b.ensure_gpu_up_to_date();
61 cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.
gpu_memory(), beta, *y_tensor, y.gpu_memory()));
65 cudnn_check(cudnnAddTensor(handle.get(), alpha, b_tensor, b.gpu_memory(), alpha, *y_tensor, y.gpu_memory()));
71 cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
80 template <typename I, typename K, typename C>
82 using type = std::remove_const_t<value_t<I>>;
84 auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
86 type alpha[] = {1.0f};
89 decltype(
auto) handle = start_cudnn();
92 auto x_tensor = create_tensor_wrapper(x);
93 auto y_tensor = create_tensor_wrapper(y);
95 cudnnTensorDescriptor_t b_tensor;
96 cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
97 cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, 1,
etl::
dim<0>(b)));
101 x.ensure_gpu_up_to_date();
102 b.ensure_gpu_up_to_date();
103 y.ensure_gpu_allocated();
107 cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));
117 decltype(
auto) handle =
etl::impl::cublas::start_cublas();
119 for (
size_t i = 0; i <
etl::
dim<0>(x); ++i) {
120 impl::cublas::cublas_axpy(handle.get(), etl::dim<1>(y), alpha, b.gpu_memory(), 1, y.gpu_memory() + i * etl::dim<1>(y), 1);
128 cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
141 template <
typename I,
typename K,
typename C>
142 void bias_add_4d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
143 cpp_unreachable(
"CUDNN not available/enabled");
152 template <
typename I,
typename K,
typename C>
153 void bias_add_2d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
154 cpp_unreachable(
"CUDNN not available/enabled");
Definition: bias_add.hpp:24
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
CUBLAS wrappers for the axpy operation.
Root namespace for the ETL library.
Definition: adapter.hpp:15
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
Utility functions for cublas.
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
bias_add_2d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_2d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_2d_expr.hpp:378
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
bias_add_4d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_4d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_4d_expr.hpp:388
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
Utility functions for cudnn.
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674