wichtounet/etl/cudnn_2bias__add_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #ifdef ETL_CUDNN_MODE

 #include "etl/impl/cublas/cuda.hpp"
 #include "etl/impl/cublas/cublas.hpp"
 #include "etl/impl/cublas/axpy.hpp"
 #include "etl/impl/cudnn/cudnn.hpp"

 #endif

 namespace etl::impl::cudnn {

 #ifdef ETL_CUDNN_MODE

 template <typename I, typename K, typename C>
 void bias_add_4d(I&& x, K&& b, C&& y) {
     using type = std::remove_const_t<value_t<I>>;

     auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_wrapper(x);
     auto y_tensor = create_tensor_wrapper(y);

     cudnnTensorDescriptor_t b_tensor;
     cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
     cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(b), 1, 1));

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     b.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // Copy x -> y

     cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     // Add b -> y

     cudnn_check(cudnnAddTensor(handle.get(), alpha, b_tensor, b.gpu_memory(), alpha, *y_tensor, y.gpu_memory()));

     y.validate_gpu();
     y.invalidate_cpu();

     // Release the resources
     cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
 }

 template <typename I, typename K, typename C>
 void bias_add_2d(I&& x, K&& b, C&& y) {
     using type = std::remove_const_t<value_t<I>>;

     auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_wrapper(x);
     auto y_tensor = create_tensor_wrapper(y);

     cudnnTensorDescriptor_t b_tensor;
     cudnn_check(cudnnCreateTensorDescriptor(&b_tensor));
     cudnn_check(cudnnSetTensor4dDescriptor(b_tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, 1, etl::dim<0>(b)));

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     b.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // Copy x -> y

     cudnn_check(cudnnTransformTensor(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     // Add b -> y

     // This is highly retarded stuff :(
     // Unfortunately cudnnAddTensor does not support 2D tensors :(
     // This is solved when EGBLAS is available, since this will be
     // computed with EGBLAS first

     {
         decltype(auto) handle = etl::impl::cublas::start_cublas();

         for (size_t i = 0; i < etl::dim<0>(x); ++i) {
             impl::cublas::cublas_axpy(handle.get(), etl::dim<1>(y), alpha, b.gpu_memory(), 1, y.gpu_memory() + i * etl::dim<1>(y), 1);
         }
     }

     y.validate_gpu();
     y.invalidate_cpu();

     // Release the resources
     cudnn_check(cudnnDestroyTensorDescriptor(b_tensor));
 }

 #else

 //COVERAGE_EXCLUDE_BEGIN

 template <typename I, typename K, typename C>
 void bias_add_4d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename I, typename K, typename C>
 void bias_add_2d([[maybe_unused]] I&& x, [[maybe_unused]] K&& b, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

     //COVERAGE_EXCLUDE_END

 #endif

 } //end of namespace etl::impl::cudnn
etl::impl::cudnn
Definition: bias_add.hpp:24

etl::ensure_gpu_allocated
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717

axpy.hpp
CUBLAS wrappers for the axpy operation.

etl
Root namespace for the ETL library.
Definition: adapter.hpp:15

etl::dim
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25

cublas.hpp
Utility functions for cublas.

etl::invalidate_cpu
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688

etl::bias_add_2d
bias_add_2d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_2d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_2d_expr.hpp:378

etl::ensure_gpu_up_to_date
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280

etl::bias_add_4d
bias_add_4d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_4d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_4d_expr.hpp:388

etl::validate_gpu
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709

cudnn.hpp
Utility functions for cudnn.

etl::gpu_memory
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674