10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 24 template<DenseIndex DimId,
typename XprType>
27 typedef typename XprType::Scalar Scalar;
29 typedef typename XprTraits::StorageKind StorageKind;
30 typedef typename XprTraits::Index
Index;
31 typedef typename XprType::Nested Nested;
33 static const int NumDimensions = XprTraits::NumDimensions - 1;
34 static const int Layout = XprTraits::Layout;
37 template<DenseIndex DimId,
typename XprType>
43 template<DenseIndex DimId,
typename XprType>
49 template <DenseIndex DimId>
52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
DimensionId(DenseIndex dim) {
53 eigen_assert(dim == DimId);
55 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
DimensionId(DenseIndex dim) : actual_dim(dim) {
63 eigen_assert(dim >= 0);
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
69 const DenseIndex actual_dim;
77 template<DenseIndex DimId,
typename XprType>
83 typedef typename XprType::CoeffReturnType CoeffReturnType;
88 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp(
const XprType& expr,
const Index offset,
const Index dim)
89 : m_xpr(expr), m_offset(offset), m_dim(dim) {
93 const Index offset()
const {
return m_offset; }
95 const Index dim()
const {
return m_dim.actualDim(); }
99 expression()
const {
return m_xpr; }
102 EIGEN_STRONG_INLINE TensorChippingOp& operator = (
const TensorChippingOp& other)
105 Assign assign(*
this, other);
110 template<
typename OtherDerived>
112 EIGEN_STRONG_INLINE TensorChippingOp& operator = (
const OtherDerived& other)
115 Assign assign(*
this, other);
121 typename XprType::Nested m_xpr;
122 const Index m_offset;
128 template<DenseIndex DimId,
typename ArgType,
typename Device>
133 static const int NumDims = NumInputDims-1;
134 typedef typename XprType::Index
Index;
136 typedef typename XprType::Scalar Scalar;
137 typedef typename XprType::CoeffReturnType CoeffReturnType;
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const XprType& op,
const Device& device)
153 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
155 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
156 eigen_assert(NumInputDims > m_dim.actualDim());
159 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
162 for (
int i = 0; i < NumInputDims; ++i) {
163 if (i != m_dim.actualDim()) {
164 m_dimensions[j] = input_dims[i];
171 if (static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
172 for (
int i = 0; i < m_dim.actualDim(); ++i) {
173 m_stride *= input_dims[i];
174 m_inputStride *= input_dims[i];
177 for (
int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
178 m_stride *= input_dims[i];
179 m_inputStride *= input_dims[i];
182 m_inputStride *= input_dims[m_dim.actualDim()];
183 m_inputOffset = m_stride * op.offset();
186 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
188 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar* ) {
189 m_impl.evalSubExprsIfNeeded(NULL);
193 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
197 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const 199 return m_impl.coeff(srcCoeff(index));
202 template<
int LoadMode>
203 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 205 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
206 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
208 if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) && m_dim.actualDim() == 0) ||
209 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
211 eigen_assert(m_stride == 1);
212 Index inputIndex = index * m_inputStride + m_inputOffset;
214 for (
int i = 0; i < PacketSize; ++i) {
215 values[i] = m_impl.coeff(inputIndex);
216 inputIndex += m_inputStride;
218 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
220 }
else if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
221 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == 0)) {
223 eigen_assert(m_stride > index);
224 return m_impl.template packet<LoadMode>(index + m_inputOffset);
226 const Index idx = index / m_stride;
227 const Index rem = index - idx * m_stride;
228 if (rem + PacketSize <= m_stride) {
229 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
230 return m_impl.template packet<LoadMode>(inputIndex);
234 for (
int i = 0; i < PacketSize; ++i) {
235 values[i] = coeff(index);
238 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
245 costPerCoeff(
bool vectorized)
const {
247 if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) &&
248 m_dim.actualDim() == 0) ||
249 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
250 m_dim.actualDim() == NumInputDims - 1)) {
251 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
252 }
else if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) &&
253 m_dim.actualDim() == NumInputDims - 1) ||
254 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
255 m_dim.actualDim() == 0)) {
256 cost += TensorOpCost::AddCost<Index>();
258 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
259 3 * TensorOpCost::AddCost<Index>();
262 return m_impl.costPerCoeff(vectorized) +
266 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data()
const {
267 CoeffReturnType* result =
const_cast<CoeffReturnType*
>(m_impl.data());
268 if (((static_cast<int>(Layout) == static_cast<int>(
ColMajor) && m_dim.actualDim() == NumDims) ||
269 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == 0)) &&
271 return result + m_inputOffset;
278 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const 281 if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) && m_dim.actualDim() == 0) ||
282 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
284 eigen_assert(m_stride == 1);
285 inputIndex = index * m_inputStride + m_inputOffset;
286 }
else if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
287 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == 0)) {
289 eigen_assert(m_stride > index);
290 inputIndex = index + m_inputOffset;
292 const Index idx = index / m_stride;
293 inputIndex = idx * m_inputStride + m_inputOffset;
294 index -= idx * m_stride;
300 Dimensions m_dimensions;
306 const Device& m_device;
311 template<DenseIndex DimId,
typename ArgType,
typename Device>
313 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
318 static const int NumDims = NumInputDims-1;
319 typedef typename XprType::Index
Index;
321 typedef typename XprType::Scalar Scalar;
322 typedef typename XprType::CoeffReturnType CoeffReturnType;
332 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const XprType& op,
const Device& device)
336 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
338 return this->m_impl.coeffRef(this->srcCoeff(index));
341 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
342 void writePacket(Index index,
const PacketReturnType& x)
344 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
346 if ((static_cast<int>(this->Layout) == static_cast<int>(
ColMajor) && this->m_dim.actualDim() == 0) ||
347 (static_cast<int>(this->Layout) ==
static_cast<int>(
RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
349 eigen_assert(this->m_stride == 1);
351 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
352 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
353 for (
int i = 0; i < PacketSize; ++i) {
354 this->m_impl.coeffRef(inputIndex) = values[i];
355 inputIndex += this->m_inputStride;
357 }
else if ((static_cast<int>(this->Layout) == static_cast<int>(
ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
358 (static_cast<int>(this->Layout) ==
static_cast<int>(
RowMajor) && this->m_dim.actualDim() == 0)) {
360 eigen_assert(this->m_stride > index);
361 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
363 const Index idx = index / this->m_stride;
364 const Index rem = index - idx * this->m_stride;
365 if (rem + PacketSize <= this->m_stride) {
366 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
367 this->m_impl.template writePacket<StoreMode>(inputIndex, x);
371 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
372 for (
int i = 0; i < PacketSize; ++i) {
373 this->coeffRef(index) = values[i];
384 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H Definition: TensorExecutor.h:27
Definition: TensorCostModel.h:25
Storage order is column major (see TopicStorageOrders).
Definition: Constants.h:320
Definition: XprHelper.h:158
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:85
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
Definition: TensorAssign.h:60
Definition: TensorChipping.h:78
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
Definition: TensorChipping.h:129
Definition: TensorDeviceDefault.h:17
The tensor base class.
Definition: TensorBase.h:827
Definition: BandTriangularSolver.h:13
Storage order is row major (see TopicStorageOrders).
Definition: Constants.h:322
Definition: TensorChipping.h:50
Definition: TensorTraits.h:170
The type used to identify a dense storage.
Definition: Constants.h:491
const int Dynamic
This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is stored in some runtime variable.
Definition: Constants.h:21
Generic expression where a coefficient-wise unary operator is applied to an expression.
Definition: CwiseUnaryOp.h:55
Definition: ForwardDeclarations.h:17
Definition: XprHelper.h:312
Definition: EmulateArray.h:203