17 #include "etl/impl/cublas/cuda.hpp" 31 template <
typename X,
typename Y>
33 using type = value_t<X>;
35 type alpha[] = {1.0f};
38 decltype(
auto) handle = start_cudnn();
41 auto x_tensor = create_tensor_wrapper(x);
42 auto y_tensor = create_tensor_wrapper(y);
51 cudnn_check(cudnnConvolutionBackwardBias(handle.get(), alpha, *x_tensor, x.
gpu_memory(), beta, *y_tensor, y.gpu_memory()));
62 template <typename X, typename Y>
64 using type = value_t<X>;
66 type alpha[] = {1.0f};
69 decltype(
auto) handle = start_cudnn();
72 auto x_tensor = create_tensor_front_wrapper(x);
73 auto y_tensor = create_tensor_wrapper(y);
77 x.ensure_gpu_up_to_date();
78 y.ensure_gpu_allocated();
82 cudnn_check(cudnnConvolutionBackwardBias(handle.get(), alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));
97 template <
typename X,
typename Y>
105 template <
typename X,
typename Y>
Definition: bias_add.hpp:24
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
Utility functions for cudnn.
bias_batch_mean_2d_expr< detail::build_type< E >, true > bias_batch_mean_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:441
bias_batch_mean_4d_expr< detail::build_type< E >, true > bias_batch_mean_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:503
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674