Expression Templates Library (ETL)
relu.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 #pragma once
8 
9 namespace etl {
10 
15 template <typename T>
16 struct relu_unary_op {
17  static constexpr bool linear = true;
18  static constexpr bool thread_safe = true;
19 
25  template <vector_mode_t V>
26  static constexpr bool vectorizable = !is_complex_t<T>;
27 
31  template <typename E>
32  static constexpr bool gpu_computable = is_floating<E>&& cudnn_enabled;
33 
38  static constexpr int complexity() {
39  return 1;
40  }
41 
45  template <typename V = default_vec>
46  using vec_type = typename V::template vec_type<T>;
47 
53  static T apply(const T& x) {
54  return std::max(x, T(0));
55  }
56 
63  template <typename V = default_vec>
64  static vec_type<V> load(const vec_type<V>& x) noexcept {
65  return V::max(x, V::set(T(0)));
66  }
67 
75  template <typename X, typename Y>
76  static auto gpu_compute_hint(const X& x, Y& y) noexcept {
77  decltype(auto) t1 = smart_gpu_compute_hint(x, y);
78 
79  auto t2 = force_temporary_gpu_dim_only(t1);
80  impl::cudnn::relu(t1, t2);
81  return t2;
82  }
89  template <typename X, typename Y>
90  static Y& gpu_compute(const X& x, Y& y) noexcept {
91  decltype(auto) t1 = select_smart_gpu_compute(x, y);
92 
93  impl::cudnn::relu(t1, y);
94 
95  return y;
96  }
97 
102  static std::string desc() noexcept {
103  return "relu";
104  }
105 };
106 
107 } //end of namespace etl
static Y & gpu_compute(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: relu.hpp:90
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: relu.hpp:18
auto max(L &&lhs, R &&rhs)
Create an expression with the max value of lhs or rhs.
Definition: expression_builder.hpp:65
Unary operation computing the RELU operation.
Definition: relu.hpp:16
typename V::template vec_type< T > vec_type
Definition: relu.hpp:46
auto relu(const E &value) -> detail::unary_helper< E, relu_unary_op >
Return the relu activation of the given ETL expression.
Definition: function_expression_builder.hpp:207
decltype(auto) select_smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and possibly store this represent...
Definition: helpers.hpp:434
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: relu.hpp:102
static auto gpu_compute_hint(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: relu.hpp:76
Root namespace for the ETL library.
Definition: adapter.hpp:15
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: relu.hpp:26
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: relu.hpp:32
static constexpr bool linear
Indicates if the operator is linear.
Definition: relu.hpp:17
decltype(auto) force_temporary_gpu_dim_only(E &&expr)
Force a temporary out of the expression, without copying its content.
Definition: temporary.hpp:223
static constexpr int complexity()
Estimate the complexity of operator.
Definition: relu.hpp:38
static T apply(const T &x)
Apply the unary operator on x.
Definition: relu.hpp:53
static vec_type< V > load(const vec_type< V > &x) noexcept
Compute several applications of the operator at a time.
Definition: relu.hpp:64
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368