10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 27 template<
typename Derived,
typename Device>
30 typedef typename Derived::Index Index;
31 typedef typename Derived::Scalar Scalar;
32 typedef typename Derived::Scalar CoeffReturnType;
34 typedef typename Derived::Dimensions Dimensions;
41 IsAligned = Derived::IsAligned,
43 Layout = Derived::Layout,
44 CoordAccess = NumCoords > 0,
53 const Derived& derived()
const {
return m_impl; }
54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
56 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
58 m_device.memcpy((
void*)dest, m_data,
sizeof(Scalar) * m_dims.TotalSize());
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
71 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
76 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
77 PacketReturnType packet(Index index)
const 79 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
82 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
83 void writePacket(Index index,
const PacketReturnType& x)
85 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
90 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
91 return m_data[m_dims.IndexOfColMajor(coords)];
93 return m_data[m_dims.IndexOfRowMajor(coords)];
99 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
100 return m_data[m_dims.IndexOfColMajor(coords)];
102 return m_data[m_dims.IndexOfRowMajor(coords)];
106 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorOpCost costPerCoeff(
bool vectorized)
const {
107 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
114 const Device&
device()
const{
return m_device;}
119 const Device& m_device;
120 const Derived& m_impl;
124 template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
125 T loadConstant(
const T* address) {
129 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 130 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
131 float loadConstant(
const float* address) {
132 return __ldg(address);
134 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
135 double loadConstant(
const double* address) {
136 return __ldg(address);
138 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
140 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
147 template<
typename Derived,
typename Device>
150 typedef typename Derived::Index Index;
151 typedef typename Derived::Scalar Scalar;
152 typedef typename Derived::Scalar CoeffReturnType;
154 typedef typename Derived::Dimensions Dimensions;
161 IsAligned = Derived::IsAligned,
163 Layout = Derived::Layout,
164 CoordAccess = NumCoords > 0,
169 const Derived& derived()
const {
return m_impl; }
172 : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
175 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* data) {
179 m_device.memcpy((
void*)data, m_data, m_dims.TotalSize() *
sizeof(Scalar));
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
188 eigen_assert(m_data);
189 return loadConstant(m_data+index);
192 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
193 PacketReturnType packet(Index index)
const 195 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
199 eigen_assert(m_data);
200 const Index index = (
static_cast<int>(Layout) == static_cast<int>(
ColMajor)) ? m_dims.IndexOfColMajor(coords)
201 : m_dims.IndexOfRowMajor(coords);
202 return loadConstant(m_data+index);
205 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorOpCost costPerCoeff(
bool vectorized)
const {
206 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
213 const Device&
device()
const{
return m_device;}
218 const Device& m_device;
219 const Derived& m_impl;
227 template<
typename NullaryOp,
typename ArgType,
typename Device>
242 : m_functor(op.functor()), m_argImpl(op.nestedExpression(),
device), m_wrapper()
245 typedef typename XprType::Index Index;
246 typedef typename XprType::Scalar Scalar;
252 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
254 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
return true; }
255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
257 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 259 return m_wrapper(m_functor, index);
262 template<
int LoadMode>
263 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 265 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
269 costPerCoeff(
bool vectorized)
const {
270 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
274 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
279 NullaryOp
functor()
const {
return m_functor; }
283 const NullaryOp m_functor;
292 template<
typename UnaryOp,
typename ArgType,
typename Device>
306 : m_functor(op.functor()),
310 typedef typename XprType::Index Index;
311 typedef typename XprType::Scalar Scalar;
317 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
319 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar*) {
320 m_argImpl.evalSubExprsIfNeeded(NULL);
323 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
327 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 329 return m_functor(m_argImpl.coeff(index));
332 template<
int LoadMode>
333 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 335 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
338 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorOpCost costPerCoeff(
bool vectorized)
const {
340 return m_argImpl.costPerCoeff(vectorized) +
341 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
344 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
353 const UnaryOp m_functor;
360 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
375 : m_functor(op.functor()),
377 m_rightImpl(op.rhsExpression(),
device)
380 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
383 typedef typename XprType::Index Index;
384 typedef typename XprType::Scalar Scalar;
390 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 393 return m_leftImpl.dimensions();
396 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
397 m_leftImpl.evalSubExprsIfNeeded(NULL);
398 m_rightImpl.evalSubExprsIfNeeded(NULL);
401 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
402 m_leftImpl.cleanup();
403 m_rightImpl.cleanup();
406 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 408 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
410 template<
int LoadMode>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 413 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
417 costPerCoeff(
bool vectorized)
const {
419 return m_leftImpl.costPerCoeff(vectorized) +
420 m_rightImpl.costPerCoeff(vectorized) +
421 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
424 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
430 BinaryOp
functor()
const {
return m_functor; }
433 const BinaryOp m_functor;
440 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
455 : m_functor(op.functor()),
457 m_arg2Impl(op.arg2Expression(),
device),
458 m_arg3Impl(op.arg3Expression(),
device)
464 STORAGE_KIND_MUST_MATCH)
467 STORAGE_KIND_MUST_MATCH)
470 STORAGE_INDEX_MUST_MATCH)
473 STORAGE_INDEX_MUST_MATCH)
475 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
478 typedef typename XprType::Index Index;
479 typedef typename XprType::Scalar Scalar;
485 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 488 return m_arg1Impl.dimensions();
491 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
492 m_arg1Impl.evalSubExprsIfNeeded(NULL);
493 m_arg2Impl.evalSubExprsIfNeeded(NULL);
494 m_arg3Impl.evalSubExprsIfNeeded(NULL);
497 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
498 m_arg1Impl.cleanup();
499 m_arg2Impl.cleanup();
500 m_arg3Impl.cleanup();
503 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 505 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
507 template<
int LoadMode>
508 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 510 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
511 m_arg2Impl.template packet<LoadMode>(index),
512 m_arg3Impl.template packet<LoadMode>(index));
516 costPerCoeff(
bool vectorized)
const {
518 return m_arg1Impl.costPerCoeff(vectorized) +
519 m_arg2Impl.costPerCoeff(vectorized) +
520 m_arg3Impl.costPerCoeff(vectorized) +
521 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
524 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
534 const TernaryOp m_functor;
543 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
547 typedef typename XprType::Scalar Scalar;
559 : m_condImpl(op.ifExpression(),
device),
560 m_thenImpl(op.thenExpression(),
device),
561 m_elseImpl(op.elseExpression(),
device)
565 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
566 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
569 typedef typename XprType::Index Index;
575 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 578 return m_condImpl.dimensions();
581 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
582 m_condImpl.evalSubExprsIfNeeded(NULL);
583 m_thenImpl.evalSubExprsIfNeeded(NULL);
584 m_elseImpl.evalSubExprsIfNeeded(NULL);
587 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
588 m_condImpl.cleanup();
589 m_thenImpl.cleanup();
590 m_elseImpl.cleanup();
593 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 595 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
597 template<
int LoadMode>
598 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const 601 for (Index i = 0; i < PacketSize; ++i) {
602 select.select[i] = m_condImpl.coeff(index+i);
604 return internal::pblend(select,
605 m_thenImpl.template packet<LoadMode>(index),
606 m_elseImpl.template packet<LoadMode>(index));
610 costPerCoeff(
bool vectorized)
const {
611 return m_condImpl.costPerCoeff(vectorized) +
612 m_thenImpl.costPerCoeff(vectorized)
613 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
616 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data()
const {
return NULL; }
633 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H const TensorEvaluator< ElseArgType, Device > & else_impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:622
Definition: TensorExpr.h:187
Definition: TensorExpr.h:111
Definition: TensorCostModel.h:25
Storage order is column major (see TopicStorageOrders).
Definition: Constants.h:320
const Device & device() const
added for sycl in order to construct the buffer from the sycl device
Definition: TensorEvaluator.h:213
Definition: XprHelper.h:158
EIGEN_DEVICE_FUNC const internal::remove_all< typename LhsXprType::Nested >::type & lhsExpression() const
Definition: TensorExpr.h:208
NullaryOp functor() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:279
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:85
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
Holds information about the various numeric (i.e.
Definition: NumTraits.h:150
EIGEN_DEVICE_FUNC const internal::remove_all< typename Arg1XprType::Nested >::type & arg1Expression() const
Definition: TensorExpr.h:283
const TensorEvaluator< Arg3Type, Device > & arg3Impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:531
const TensorEvaluator< IfArgType, Device > & cond_impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:618
const TensorEvaluator< Arg1Type, Device > & arg1Impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:527
UnaryOp functor() const
added for sycl in order to construct the buffer from sycl device
Definition: TensorEvaluator.h:349
Definition: GenericPacketMath.h:96
const TensorEvaluator< Arg2Type, Device > & arg2Impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:529
Definition: TensorExpr.h:264
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition: TensorEvaluator.h:114
BinaryOp functor() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:430
const TensorEvaluator< ArgType, Device > & impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:347
const TensorEvaluator< ArgType, Device > & impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:277
Definition: EmulateArray.h:21
Definition: XprHelper.h:146
const TensorEvaluator< LeftArgType, Device > & left_impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:426
const TensorEvaluator< ThenArgType, Device > & then_impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:620
Definition: TensorExpr.h:335
Definition: TensorExpr.h:52
Definition: ForwardDeclarations.h:17
Definition: GenericPacketMath.h:552
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & nestedExpression() const
Definition: TensorExpr.h:132
const TensorEvaluator< RightArgType, Device > & right_impl() const
required by sycl in order to extract the accessor
Definition: TensorEvaluator.h:428