Eigen  3.2.92
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30  enum {
31  PacketSize = packet_traits<typename Derived::Scalar>::size,
32  InnerMaxSize = int(Derived::IsRowMajor)
33  ? Derived::MaxColsAtCompileTime
34  : Derived::MaxRowsAtCompileTime
35  };
36 
37  enum {
38  MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39  && (functor_traits<Func>::PacketAccess),
40  MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41  MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42  };
43 
44 public:
45  enum {
46  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48  : int(DefaultTraversal)
49  };
50 
51 public:
52  enum {
53  Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
54  : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
55  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
56  };
57 
58 public:
59  enum {
60  Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
61  };
62 
63 #ifdef EIGEN_DEBUG_ASSIGN
64  static void debug()
65  {
66  std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
67  std::cerr.setf(std::ios::hex, std::ios::basefield);
68  EIGEN_DEBUG_VAR(Derived::Flags)
69  std::cerr.unsetf(std::ios::hex);
70  EIGEN_DEBUG_VAR(InnerMaxSize)
71  EIGEN_DEBUG_VAR(PacketSize)
72  EIGEN_DEBUG_VAR(MightVectorize)
73  EIGEN_DEBUG_VAR(MayLinearVectorize)
74  EIGEN_DEBUG_VAR(MaySliceVectorize)
75  EIGEN_DEBUG_VAR(Traversal)
76  EIGEN_DEBUG_VAR(UnrollingLimit)
77  EIGEN_DEBUG_VAR(Unrolling)
78  std::cerr << std::endl;
79  }
80 #endif
81 };
82 
83 /***************************************************************************
84 * Part 2 : unrollers
85 ***************************************************************************/
86 
87 /*** no vectorization ***/
88 
89 template<typename Func, typename Derived, int Start, int Length>
90 struct redux_novec_unroller
91 {
92  enum {
93  HalfLength = Length/2
94  };
95 
96  typedef typename Derived::Scalar Scalar;
97 
98  EIGEN_DEVICE_FUNC
99  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
100  {
101  return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
102  redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
103  }
104 };
105 
106 template<typename Func, typename Derived, int Start>
107 struct redux_novec_unroller<Func, Derived, Start, 1>
108 {
109  enum {
110  outer = Start / Derived::InnerSizeAtCompileTime,
111  inner = Start % Derived::InnerSizeAtCompileTime
112  };
113 
114  typedef typename Derived::Scalar Scalar;
115 
116  EIGEN_DEVICE_FUNC
117  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
118  {
119  return mat.coeffByOuterInner(outer, inner);
120  }
121 };
122 
123 // This is actually dead code and will never be called. It is required
124 // to prevent false warnings regarding failed inlining though
125 // for 0 length run() will never be called at all.
126 template<typename Func, typename Derived, int Start>
127 struct redux_novec_unroller<Func, Derived, Start, 0>
128 {
129  typedef typename Derived::Scalar Scalar;
130  EIGEN_DEVICE_FUNC
131  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
132 };
133 
134 /*** vectorization ***/
135 
136 template<typename Func, typename Derived, int Start, int Length>
137 struct redux_vec_unroller
138 {
139  enum {
140  PacketSize = packet_traits<typename Derived::Scalar>::size,
141  HalfLength = Length/2
142  };
143 
144  typedef typename Derived::Scalar Scalar;
145  typedef typename packet_traits<Scalar>::type PacketScalar;
146 
147  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
148  {
149  return func.packetOp(
150  redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
151  redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
152  }
153 };
154 
155 template<typename Func, typename Derived, int Start>
156 struct redux_vec_unroller<Func, Derived, Start, 1>
157 {
158  enum {
159  index = Start * packet_traits<typename Derived::Scalar>::size,
160  outer = index / int(Derived::InnerSizeAtCompileTime),
161  inner = index % int(Derived::InnerSizeAtCompileTime),
162  alignment = Derived::Alignment
163  };
164 
165  typedef typename Derived::Scalar Scalar;
166  typedef typename packet_traits<Scalar>::type PacketScalar;
167 
168  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
169  {
170  return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
171  }
172 };
173 
174 /***************************************************************************
175 * Part 3 : implementation of all cases
176 ***************************************************************************/
177 
178 template<typename Func, typename Derived,
179  int Traversal = redux_traits<Func, Derived>::Traversal,
180  int Unrolling = redux_traits<Func, Derived>::Unrolling
181 >
182 struct redux_impl;
183 
184 template<typename Func, typename Derived>
185 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
186 {
187  typedef typename Derived::Scalar Scalar;
188  EIGEN_DEVICE_FUNC
189  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
190  {
191  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
192  Scalar res;
193  res = mat.coeffByOuterInner(0, 0);
194  for(Index i = 1; i < mat.innerSize(); ++i)
195  res = func(res, mat.coeffByOuterInner(0, i));
196  for(Index i = 1; i < mat.outerSize(); ++i)
197  for(Index j = 0; j < mat.innerSize(); ++j)
198  res = func(res, mat.coeffByOuterInner(i, j));
199  return res;
200  }
201 };
202 
203 template<typename Func, typename Derived>
204 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
205  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
206 {};
207 
208 template<typename Func, typename Derived>
209 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
210 {
211  typedef typename Derived::Scalar Scalar;
212  typedef typename packet_traits<Scalar>::type PacketScalar;
213 
214  static Scalar run(const Derived &mat, const Func& func)
215  {
216  const Index size = mat.size();
217 
218  const Index packetSize = packet_traits<Scalar>::size;
219  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
220  enum {
221  alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
222  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
223  };
224  const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
225  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
226  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
227  const Index alignedEnd2 = alignedStart + alignedSize2;
228  const Index alignedEnd = alignedStart + alignedSize;
229  Scalar res;
230  if(alignedSize)
231  {
232  PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
233  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
234  {
235  PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
236  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
237  {
238  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
239  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
240  }
241 
242  packet_res0 = func.packetOp(packet_res0,packet_res1);
243  if(alignedEnd>alignedEnd2)
244  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
245  }
246  res = func.predux(packet_res0);
247 
248  for(Index index = 0; index < alignedStart; ++index)
249  res = func(res,mat.coeff(index));
250 
251  for(Index index = alignedEnd; index < size; ++index)
252  res = func(res,mat.coeff(index));
253  }
254  else // too small to vectorize anything.
255  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
256  {
257  res = mat.coeff(0);
258  for(Index index = 1; index < size; ++index)
259  res = func(res,mat.coeff(index));
260  }
261 
262  return res;
263  }
264 };
265 
266 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
267 template<typename Func, typename Derived, int Unrolling>
268 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
269 {
270  typedef typename Derived::Scalar Scalar;
271  typedef typename packet_traits<Scalar>::type PacketType;
272 
273  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
274  {
275  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
276  const Index innerSize = mat.innerSize();
277  const Index outerSize = mat.outerSize();
278  enum {
279  packetSize = packet_traits<Scalar>::size
280  };
281  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
282  Scalar res;
283  if(packetedInnerSize)
284  {
285  PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
286  for(Index j=0; j<outerSize; ++j)
287  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
288  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
289 
290  res = func.predux(packet_res);
291  for(Index j=0; j<outerSize; ++j)
292  for(Index i=packetedInnerSize; i<innerSize; ++i)
293  res = func(res, mat.coeffByOuterInner(j,i));
294  }
295  else // too small to vectorize anything.
296  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
297  {
298  res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
299  }
300 
301  return res;
302  }
303 };
304 
305 template<typename Func, typename Derived>
306 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
307 {
308  typedef typename Derived::Scalar Scalar;
309  typedef typename packet_traits<Scalar>::type PacketScalar;
310  enum {
311  PacketSize = packet_traits<Scalar>::size,
312  Size = Derived::SizeAtCompileTime,
313  VectorizedSize = (Size / PacketSize) * PacketSize
314  };
315  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
316  {
317  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
318  if (VectorizedSize > 0) {
319  Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
320  if (VectorizedSize != Size)
321  res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
322  return res;
323  }
324  else {
325  return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
326  }
327  }
328 };
329 
330 // evaluator adaptor
331 template<typename _XprType>
332 class redux_evaluator
333 {
334 public:
335  typedef _XprType XprType;
336  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
337 
338  typedef typename XprType::Scalar Scalar;
339  typedef typename XprType::CoeffReturnType CoeffReturnType;
340  typedef typename XprType::PacketScalar PacketScalar;
341  typedef typename XprType::PacketReturnType PacketReturnType;
342 
343  enum {
344  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
345  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
346  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
347  Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
348  IsRowMajor = XprType::IsRowMajor,
349  SizeAtCompileTime = XprType::SizeAtCompileTime,
350  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
351  CoeffReadCost = evaluator<XprType>::CoeffReadCost,
352  Alignment = evaluator<XprType>::Alignment
353  };
354 
355  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
356  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
357  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
358  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
359  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
360 
361  EIGEN_DEVICE_FUNC
362  CoeffReturnType coeff(Index row, Index col) const
363  { return m_evaluator.coeff(row, col); }
364 
365  EIGEN_DEVICE_FUNC
366  CoeffReturnType coeff(Index index) const
367  { return m_evaluator.coeff(index); }
368 
369  template<int LoadMode, typename PacketType>
370  PacketReturnType packet(Index row, Index col) const
371  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
372 
373  template<int LoadMode, typename PacketType>
374  PacketReturnType packet(Index index) const
375  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
376 
377  EIGEN_DEVICE_FUNC
378  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
379  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
380 
381  template<int LoadMode, typename PacketType>
382  PacketReturnType packetByOuterInner(Index outer, Index inner) const
383  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
384 
385  const XprType & nestedExpression() const { return m_xpr; }
386 
387 protected:
388  internal::evaluator<XprType> m_evaluator;
389  const XprType &m_xpr;
390 };
391 
392 } // end namespace internal
393 
394 /***************************************************************************
395 * Part 4 : public API
396 ***************************************************************************/
397 
398 
406 template<typename Derived>
407 template<typename Func>
408 typename internal::traits<Derived>::Scalar
409 DenseBase<Derived>::redux(const Func& func) const
410 {
411  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412 
413  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414  ThisEvaluator thisEval(derived());
415 
416  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
417 }
418 
422 template<typename Derived>
423 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
425 {
426  return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
427 }
428 
432 template<typename Derived>
433 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
435 {
436  return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
437 }
438 
443 template<typename Derived>
444 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
446 {
447  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
448  return Scalar(0);
449  return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
450 }
451 
456 template<typename Derived>
457 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
459 {
460  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
461 }
462 
470 template<typename Derived>
471 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
473 {
474  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
475  return Scalar(1);
476  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
477 }
478 
485 template<typename Derived>
486 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
488 {
489  return derived().diagonal().sum();
490 }
491 
492 } // end namespace Eigen
493 
494 #endif // EIGEN_REDUX_H
Scalar prod() const
Definition: Redux.h:472
const unsigned int DirectAccessBit
Definition: Constants.h:149
Definition: LDLT.h:16
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:434
Definition: StdDeque.h:58
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:41
Scalar mean() const
Definition: Redux.h:458
Definition: Constants.h:228
Scalar sum() const
Definition: Redux.h:445
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:424
Definition: Eigen_Colamd.h:54
Scalar trace() const
Definition: Redux.h:487
const unsigned int ActualPacketAccessBit
Definition: Constants.h:99
const unsigned int LinearAccessBit
Definition: Constants.h:124