wichtounet/etl/cublas_2dot_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #ifdef ETL_CUBLAS_MODE

 #include "etl/impl/cublas/cuda.hpp"
 #include "etl/impl/cublas/cublas.hpp"

 #endif

 namespace etl::impl::cublas {

 #ifdef ETL_CUBLAS_MODE

 template <etl_dma_single_precision A, etl_dma_single_precision B>
 float dot(const A& a, const B& b) {
     decltype(auto) handle = start_cublas();

     a.ensure_gpu_up_to_date();
     b.ensure_gpu_up_to_date();

     float prod = 0.0;
     cublas_check(cublasSdot(handle.get(), etl::size(a), a.gpu_memory(), 1, b.gpu_memory(), 1, &prod));
     return prod;
 }

 template <etl_dma_double_precision A, etl_dma_double_precision B>
 double dot(const A& a, const B& b) {
     decltype(auto) handle = start_cublas();

     a.ensure_gpu_up_to_date();
     b.ensure_gpu_up_to_date();

     double prod = 0.0;
     cublas_check(cublasDdot(handle.get(), etl::size(a), a.gpu_memory(), 1, b.gpu_memory(), 1, &prod));
     return prod;
 }

 #else

 template <typename A, typename B>
 value_t<A> dot(const A& /*a*/, const B& /*b*/) {
     cpp_unreachable("CUBLAS not enabled/available");
     return 0.0;
 }

 #endif

 } //end of namespace etl::impl::cublas
etl::dot
value_t< A > dot(const A &a, const B &b)
Returns the dot product of the two given expressions.
Definition: expression_builder.hpp:594

etl::impl::cublas
Definition: axpy.hpp:22

etl
Root namespace for the ETL library.
Definition: adapter.hpp:15

cublas.hpp
Utility functions for cublas.

etl::ensure_gpu_up_to_date
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280

etl::value_t
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81

etl::gpu_memory
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674