10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 23 template<
typename Shuffle,
typename XprType>
26 typedef typename XprType::Scalar Scalar;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index
Index;
30 typedef typename XprType::Nested Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
36 template<
typename Shuffle,
typename XprType>
42 template<
typename Shuffle,
typename XprType>
52 template<
typename Shuffle,
typename XprType>
58 typedef typename XprType::CoeffReturnType CoeffReturnType;
63 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp(
const XprType& expr,
const Shuffle& shuffle)
64 : m_xpr(expr), m_shuffle(shuffle) {}
67 const Shuffle& shufflePermutation()
const {
return m_shuffle; }
71 expression()
const {
return m_xpr; }
74 EIGEN_STRONG_INLINE TensorShufflingOp& operator = (
const TensorShufflingOp& other)
77 Assign assign(*
this, other);
82 template<
typename OtherDerived>
84 EIGEN_STRONG_INLINE TensorShufflingOp& operator = (
const OtherDerived& other)
87 Assign assign(*
this, other);
93 typename XprType::Nested m_xpr;
94 const Shuffle m_shuffle;
99 template<
typename Shuffle,
typename ArgType,
typename Device>
103 typedef typename XprType::Index
Index;
106 typedef typename XprType::Scalar Scalar;
107 typedef typename XprType::CoeffReturnType CoeffReturnType;
119 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const XprType& op,
const Device& device)
120 : m_impl(op.expression(), device)
123 const Shuffle& shuffle = op.shufflePermutation();
124 for (
int i = 0; i < NumDims; ++i) {
125 m_dimensions[i] = input_dims[shuffle[i]];
130 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
132 m_outputStrides[0] = 1;
133 for (
int i = 1; i < NumDims; ++i) {
134 inputStrides[i] = inputStrides[i - 1] * input_dims[i - 1];
135 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
138 inputStrides[NumDims - 1] = 1;
139 m_outputStrides[NumDims - 1] = 1;
140 for (
int i = NumDims - 2; i >= 0; --i) {
141 inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
142 m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
146 for (
int i = 0; i < NumDims; ++i) {
147 m_inputStrides[i] = inputStrides[shuffle[i]];
151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar* ) {
154 m_impl.evalSubExprsIfNeeded(NULL);
157 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
161 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const 163 return m_impl.coeff(srcCoeff(index));
166 template<
int LoadMode>
167 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 169 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
170 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
173 for (
int i = 0; i < PacketSize; ++i) {
174 values[i] = coeff(index+i);
176 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
180 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorOpCost costPerCoeff(
bool vectorized)
const {
181 const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
182 2 * TensorOpCost::MulCost<Index>() +
183 TensorOpCost::DivCost<Index>());
184 return m_impl.costPerCoeff(vectorized) +
188 EIGEN_DEVICE_FUNC Scalar* data()
const {
return NULL; }
191 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const {
192 Index inputIndex = 0;
193 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
194 for (
int i = NumDims - 1; i > 0; --i) {
195 const Index idx = index / m_outputStrides[i];
196 inputIndex += idx * m_inputStrides[i];
197 index -= idx * m_outputStrides[i];
199 return inputIndex + index * m_inputStrides[0];
201 for (
int i = 0; i < NumDims - 1; ++i) {
202 const Index idx = index / m_outputStrides[i];
203 inputIndex += idx * m_inputStrides[i];
204 index -= idx * m_outputStrides[i];
206 return inputIndex + index * m_inputStrides[NumDims - 1];
210 Dimensions m_dimensions;
218 template<
typename Shuffle,
typename ArgType,
typename Device>
220 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
225 typedef typename XprType::Index
Index;
228 typedef typename XprType::Scalar Scalar;
229 typedef typename XprType::CoeffReturnType CoeffReturnType;
239 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const XprType& op,
const Device& device)
243 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
245 return this->m_impl.coeffRef(this->srcCoeff(index));
248 template <
int StoreMode> EIGEN_STRONG_INLINE
249 void writePacket(Index index,
const PacketReturnType& x)
251 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
254 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
255 for (
int i = 0; i < PacketSize; ++i) {
256 this->coeffRef(index+i) = values[i];
264 #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H Definition: TensorExecutor.h:27
Definition: TensorCostModel.h:25
Storage order is column major (see TopicStorageOrders).
Definition: Constants.h:320
Definition: XprHelper.h:158
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:85
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
Definition: TensorAssign.h:60
Definition: GenericPacketMath.h:96
Definition: TensorShuffling.h:100
Definition: TensorForwardDeclarations.h:54
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
Definition: TensorDeviceDefault.h:17
The tensor base class.
Definition: TensorBase.h:827
Definition: BandTriangularSolver.h:13
Definition: TensorTraits.h:170
The type used to identify a dense storage.
Definition: Constants.h:491
Generic expression where a coefficient-wise unary operator is applied to an expression.
Definition: CwiseUnaryOp.h:55
Definition: ForwardDeclarations.h:17
Definition: XprHelper.h:312
Definition: EmulateArray.h:203