17 #include "etl/impl/cublas/cuda.hpp" 33 template <
typename I,
typename C>
34 void activation(I&& x, C&& y, cudnnActivationMode_t mode) {
35 using type = std::remove_const_t<value_t<I>>;
37 type alpha[] = {1.0f};
40 decltype(
auto) handle = start_cudnn();
43 auto x_tensor = create_tensor_flat_wrapper(x);
44 auto y_tensor = create_tensor_flat_wrapper(y);
46 cudnnActivationDescriptor_t func_tensor;
47 cudnn_check(cudnnCreateActivationDescriptor(&func_tensor));
48 cudnn_check(cudnnSetActivationDescriptor(func_tensor, mode, CUDNN_PROPAGATE_NAN, 0.0));
57 cudnn_check(cudnnActivationForward(handle.get(), func_tensor, alpha, *x_tensor, x.
gpu_memory(), beta, *y_tensor, y.gpu_memory()));
63 cudnn_check(cudnnDestroyActivationDescriptor(func_tensor));
72 template <typename Y, typename DY, typename DX>
73 void backward_activation(Y&& y, DY&& dy, DX&& dx, cudnnActivationMode_t mode) {
74 using type = std::remove_const_t<value_t<Y>>;
76 type alpha[] = {1.0f};
79 decltype(
auto) handle = start_cudnn();
82 auto y_tensor = create_tensor_flat_wrapper(y);
83 auto dy_tensor = create_tensor_flat_wrapper(dy);
84 auto dx_tensor = create_tensor_flat_wrapper(dx);
86 cudnnActivationDescriptor_t func_tensor;
87 cudnn_check(cudnnCreateActivationDescriptor(&func_tensor));
88 cudnn_check(cudnnSetActivationDescriptor(func_tensor, mode, CUDNN_PROPAGATE_NAN, 0.0));
92 y.ensure_gpu_up_to_date();
93 dy.ensure_gpu_up_to_date();
94 dx.ensure_gpu_allocated();
98 cudnn_check(cudnnActivationBackward(handle.get(), func_tensor, alpha, *y_tensor, y.gpu_memory(), *dy_tensor, dy.gpu_memory(), *y_tensor, y.gpu_memory(),
99 beta, *dx_tensor, dx.gpu_memory()));
105 cudnn_check(cudnnDestroyActivationDescriptor(func_tensor));
113 template <typename I, typename C>
115 activation(x, y, CUDNN_ACTIVATION_SIGMOID);
123 template <
typename I,
typename C>
124 void relu(I&& x, C&& y) {
125 activation(x, y, CUDNN_ACTIVATION_RELU);
134 template <
typename O,
typename E,
typename C>
136 backward_activation(o, e, y, CUDNN_ACTIVATION_SIGMOID);
145 template <
typename O,
typename E,
typename C>
147 backward_activation(o, e, y, CUDNN_ACTIVATION_RELU);
156 template <
typename I,
typename C>
157 void softmax_activation(I&& x, C&& y, cudnnSoftmaxAlgorithm_t mode) {
158 using type = std::remove_const_t<value_t<I>>;
160 type alpha[] = {1.0f};
161 type beta[] = {0.0f};
163 decltype(
auto) handle = start_cudnn();
166 auto x_tensor = create_tensor_front_wrapper(x);
167 auto y_tensor = create_tensor_front_wrapper(y);
171 x.ensure_gpu_up_to_date();
172 y.ensure_gpu_allocated();
176 cudnn_check(cudnnSoftmaxForward(handle.get(), mode, CUDNN_SOFTMAX_MODE_INSTANCE, alpha, *x_tensor, x.gpu_memory(), beta, *y_tensor, y.gpu_memory()));
187 template <typename I, typename C>
189 softmax_activation(x, y, CUDNN_SOFTMAX_FAST);
197 template <
typename I,
typename C>
199 softmax_activation(x, y, CUDNN_SOFTMAX_ACCURATE);
211 template <
typename I,
typename C>
212 void sigmoid([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
213 cpp_unreachable(
"CUDNN not available/enabled");
221 template <
typename I,
typename C>
222 void relu([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
223 cpp_unreachable(
"CUDNN not available/enabled");
232 template <
typename O,
typename E,
typename C>
233 void sigmoid_backward([[maybe_unused]] O&& o, [[maybe_unused]] E&& e, [[maybe_unused]] C&& y) {
234 cpp_unreachable(
"CUDNN not available/enabled");
243 template <
typename O,
typename E,
typename C>
244 void relu_backward([[maybe_unused]] O&& o, [[maybe_unused]] E&& e, [[maybe_unused]] C&& y) {
245 cpp_unreachable(
"CUDNN not available/enabled");
253 template <
typename I,
typename C>
254 void softmax([[maybe_unused]] I&& x, [[maybe_unused]] C&& y) {
255 cpp_unreachable(
"CUDNN not available/enabled");
263 template <
typename I,
typename C>
265 cpp_unreachable(
"CUDNN not available/enabled");
Definition: bias_add.hpp:24
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
auto softmax(E &&e)
Return the softmax function of the given ETL expression.
Definition: function_expression_builder.hpp:253
auto relu(const E &value) -> detail::unary_helper< E, relu_unary_op >
Return the relu activation of the given ETL expression.
Definition: function_expression_builder.hpp:207
auto sigmoid(const E &value) -> detail::unary_helper< E, sigmoid_unary_op >
Return the logistic sigmoid of the given ETL expression.
Definition: function_expression_builder.hpp:197
auto relu_backward(O &&output, E &&errors) -> detail::left_binary_helper< O, E, relu_derivative_binary_op >
Return the backward activation of the RELU function.
Definition: ml_expression_builder.hpp:532
auto sigmoid_backward(O &&output, E &&errors) -> detail::left_binary_helper< O, E, sigmoid_derivative_binary_op >
Return the backward activation of the sigmoid function.
Definition: ml_expression_builder.hpp:521
Utility functions for cublas.
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
auto stable_softmax(E &&e)
Returns the softmax function of the given ETL expression. This version is implemented so that numeric...
Definition: function_expression_builder.hpp:268
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
Utility functions for cudnn.
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674