wichtounet/etl/bias__batch__mean_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #ifdef ETL_CUDNN_MODE

 #include "etl/impl/cublas/cuda.hpp"
 #include "etl/impl/cudnn/cudnn.hpp"

 #endif

 namespace etl::impl::cudnn {

 #ifdef ETL_CUDNN_MODE

 template <typename X, typename Y>
 void bias_batch_mean_4d(X&& x, Y&& y) {
     using type = value_t<X>;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_wrapper(x);
     auto y_tensor = create_tensor_wrapper(y);

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // Perform the convolution

     cudnn_check(cudnnConvolutionBackwardBias(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     y.validate_gpu();
     y.invalidate_cpu();
 }

 template <typename X, typename Y>
 void bias_batch_mean_2d(X&& x, Y&& y) {
     using type = value_t<X>;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_front_wrapper(x);
     auto y_tensor = create_tensor_wrapper(y);

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // Perform the convolution

     cudnn_check(cudnnConvolutionBackwardBias(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     y.validate_gpu();
     y.invalidate_cpu();
 }

 #else

 //COVERAGE_EXCLUDE_BEGIN

 template <typename X, typename Y>
 void bias_batch_mean_4d([[maybe_unused]] X&& x, [[maybe_unused]] Y&& y) {}

 template <typename X, typename Y>
 void bias_batch_mean_2d([[maybe_unused]] X&& x, [[maybe_unused]] Y&& y) {}

 //COVERAGE_EXCLUDE_END

 #endif

 } //end of namespace etl::impl::cudnn
etl::impl::cudnn
Definition: bias_add.hpp:24

etl::ensure_gpu_allocated
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717

etl::invalidate_cpu
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688

etl::ensure_gpu_up_to_date
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280

etl::validate_gpu
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709

cudnn.hpp
Utility functions for cudnn.

etl::bias_batch_mean_2d
bias_batch_mean_2d_expr< detail::build_type< E >, true > bias_batch_mean_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:441

etl::bias_batch_mean_4d
bias_batch_mean_4d_expr< detail::build_type< E >, true > bias_batch_mean_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:503

etl::gpu_memory
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674