wichtounet/etl/impl_2cudnn_2sigmoid_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #ifdef ETL_CUDNN_MODE

 #include "etl/impl/cublas/cuda.hpp"
 #include "etl/impl/cublas/cublas.hpp"
 #include "etl/impl/cudnn/cudnn.hpp"

 #endif

 namespace etl::impl::cudnn {

 #ifdef ETL_CUDNN_MODE

 template <typename I, typename C>
 void activation(I&& x, C&& y, cudnnActivationMode_t mode) {
     using type = std::remove_const_t<value_t<I>>;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_flat_wrapper(x);
     auto y_tensor = create_tensor_flat_wrapper(y);

     cudnnActivationDescriptor_t func_tensor;
     cudnn_check(cudnnCreateActivationDescriptor(&func_tensor));
     cudnn_check(cudnnSetActivationDescriptor(func_tensor, mode, CUDNN_PROPAGATE_NAN, 0.0));

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // y = activation(x)

     cudnn_check(cudnnActivationForward(handle.get(), func_tensor, alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     y.validate_gpu();
     y.invalidate_cpu();

     // Release the resources
     cudnn_check(cudnnDestroyActivationDescriptor(func_tensor));
 }

 template <typename Y, typename DY, typename DX>
 void backward_activation(Y&& y, DY&& dy, DX&& dx, cudnnActivationMode_t mode) {
     using type = std::remove_const_t<value_t<Y>>;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto y_tensor  = create_tensor_flat_wrapper(y);
     auto dy_tensor = create_tensor_flat_wrapper(dy);
     auto dx_tensor = create_tensor_flat_wrapper(dx);

     cudnnActivationDescriptor_t func_tensor;
     cudnn_check(cudnnCreateActivationDescriptor(&func_tensor));
     cudnn_check(cudnnSetActivationDescriptor(func_tensor, mode, CUDNN_PROPAGATE_NAN, 0.0));

     // Allocate GPU memory, if necessary

     y.ensure_gpu_up_to_date();
     dy.ensure_gpu_up_to_date();
     dx.ensure_gpu_allocated();

     // y = activation(x)

     cudnn_check(cudnnActivationBackward(handle.get(), func_tensor, alpha, *y_tensor, y.gpu_memory(), *dy_tensor, dy.gpu_memory(), *y_tensor, y.gpu_memory(),
                                         beta, *dx_tensor, dx.gpu_memory()));

     dx.validate_gpu();
     dx.invalidate_cpu();

     // Release the resources
     cudnn_check(cudnnDestroyActivationDescriptor(func_tensor));
 }

 template <typename I, typename C>
 void sigmoid(I&& x, C&& y) {
     activation(x, y, CUDNN_ACTIVATION_SIGMOID);
 }

 template <typename I, typename C>
 void relu(I&& x, C&& y) {
     activation(x, y, CUDNN_ACTIVATION_RELU);
 }

 template <typename O, typename E, typename C>
 void sigmoid_backward(O&& o, E&& e, C&& y) {
     backward_activation(o, e, y, CUDNN_ACTIVATION_SIGMOID);
 }

 template <typename O, typename E, typename C>
 void relu_backward(O&& o, E&& e, C&& y) {
     backward_activation(o, e, y, CUDNN_ACTIVATION_RELU);
 }

 template <typename I, typename C>
 void softmax_activation(I&& x, C&& y, cudnnSoftmaxAlgorithm_t mode) {
     using type = std::remove_const_t<value_t<I>>;

     type alpha[] = {1.0f};
     type beta[]  = {0.0f};

     decltype(auto) handle = start_cudnn();

     // Prepare the tensors
     auto x_tensor = create_tensor_front_wrapper(x);
     auto y_tensor = create_tensor_front_wrapper(y);

     // Allocate GPU memory, if necessary

     x.ensure_gpu_up_to_date();
     y.ensure_gpu_allocated();

     // y = activation(x)

     cudnn_check(cudnnSoftmaxForward(handle.get(), mode, CUDNN_SOFTMAX_MODE_INSTANCE, alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));

     y.validate_gpu();
     y.invalidate_cpu();
 }

 template <typename I, typename C>
 void softmax(I&& x, C&& y) {
     softmax_activation(x, y, CUDNN_SOFTMAX_FAST);
 }

 template <typename I, typename C>
 void stable_softmax(I&& x, C&& y) {
     softmax_activation(x, y, CUDNN_SOFTMAX_ACCURATE);
 }

 #else

 //COVERAGE_EXCLUDE_BEGIN

 template <typename I, typename C>
 void sigmoid([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename I, typename C>
 void relu([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename O, typename E, typename C>
 void sigmoid_backward([[maybe_unused]] O&& o, [[maybe_unused]] E&& e, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename O, typename E, typename C>
 void relu_backward([[maybe_unused]] O&& o, [[maybe_unused]] E&& e, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename I, typename C>
 void softmax([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

 template <typename I, typename C>
 void stable_softmax([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
     cpp_unreachable("CUDNN not available/enabled");
 }

     //COVERAGE_EXCLUDE_END

 #endif

 } //end of namespace etl::impl::cudnn
etl::impl::cudnn
Definition: bias_add.hpp:24

etl::ensure_gpu_allocated
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717

etl::softmax
auto softmax(E &&e)
Return the softmax function of the given ETL expression.
Definition: function_expression_builder.hpp:253

etl::relu
auto relu(const E &value) -> detail::unary_helper< E, relu_unary_op >
Return the relu activation of the given ETL expression.
Definition: function_expression_builder.hpp:207

etl::sigmoid
auto sigmoid(const E &value) -> detail::unary_helper< E, sigmoid_unary_op >
Return the logistic sigmoid of the given ETL expression.
Definition: function_expression_builder.hpp:197

etl::ml::relu_backward
auto relu_backward(O &&output, E &&errors) -> detail::left_binary_helper< O, E, relu_derivative_binary_op >
Return the backward activation of the RELU function.
Definition: ml_expression_builder.hpp:532

etl::ml::sigmoid_backward
auto sigmoid_backward(O &&output, E &&errors) -> detail::left_binary_helper< O, E, sigmoid_derivative_binary_op >
Return the backward activation of the sigmoid function.
Definition: ml_expression_builder.hpp:521

cublas.hpp
Utility functions for cublas.

etl::invalidate_cpu
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688

etl::ensure_gpu_up_to_date
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280

etl::stable_softmax
auto stable_softmax(E &&e)
Returns the softmax function of the given ETL expression. This version is implemented so that numeric...
Definition: function_expression_builder.hpp:268

etl::validate_gpu
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709

cudnn.hpp
Utility functions for cudnn.

etl::gpu_memory
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674