15 #ifdef ETL_CUBLAS_MODE 17 #include "etl/impl/cublas/cuda.hpp" 24 #ifdef ETL_CUBLAS_MODE 32 template <etl_single_precision A, etl_single_precision B, etl_single_precision C>
34 decltype(
auto) handle = start_cublas();
45 b.ensure_gpu_up_to_date();
58 template <etl_double_precision A, etl_double_precision B, etl_double_precision C>
60 decltype(
auto) handle = start_cublas();
65 a.ensure_gpu_up_to_date();
66 b.ensure_gpu_up_to_date();
67 c.ensure_gpu_allocated();
81 template <
typename A,
typename B,
typename C>
83 cpp_unreachable(
"CUBLAS not enabled/available");
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
batch_outer_product_expr< detail::build_type< A >, detail::build_type< B > > batch_outer(A &&a, B &&b)
Batch Outer product multiplication of two matrices.
Definition: batch_outer_product_expr.hpp:333
Root namespace for the ETL library.
Definition: adapter.hpp:15
Utility functions for cublas.
size_t columns(const E &expr)
Returns the number of columns of the given ETL expression.
Definition: helpers.hpp:78
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
size_t rows(const E &expr)
Returns the number of rows of the given ETL expression.
Definition: helpers.hpp:58
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674