CppADCodeGen  HEAD
A C++ Algorithmic Differentiation Package with Source Code Generation
dependent_pattern_matcher.hpp
1 #ifndef CPPAD_CG_DEPENDENT_PATTERN_MATCHER_INCLUDED
2 #define CPPAD_CG_DEPENDENT_PATTERN_MATCHER_INCLUDED
3 /* --------------------------------------------------------------------------
4  * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
5  * Copyright (C) 2013 Ciengis
6  * Copyright (C) 2019 Joao Leal
7  *
8  * CppADCodeGen is distributed under multiple licenses:
9  *
10  * - Eclipse Public License Version 1.0 (EPL1), and
11  * - GNU General Public License Version 3 (GPL3).
12  *
13  * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
14  * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
15  * ----------------------------------------------------------------------------
16  * Author: Joao Leal
17  */
18 
19 //#define CPPADCG_PRINT_DEBUG
20 
21 namespace CppAD {
22 namespace cg {
23 
24 template<class Base>
26 public:
29 public:
30 
32  if (e1->depRefIndex < e2->depRefIndex) {
33  eq1 = e1;
34  eq2 = e2;
35  } else {
36  eq1 = e2;
37  eq2 = e1;
38  }
39  }
40 };
41 
42 template<class Base>
43 inline bool operator<(const UniqueEquationPair<Base>& p1, const UniqueEquationPair<Base>& p2) {
44  return p1.eq1->depRefIndex < p2.eq1->depRefIndex || (!(p2.eq1->depRefIndex < p1.eq1->depRefIndex) && p1.eq2->depRefIndex < p2.eq2->depRefIndex);
45 }
46 
50 template<class Base>
52 public:
53  using CGBase = CG<Base>;
54 private:
55 
56  enum class INDEXED_OPERATION_TYPE {
57  INDEXED,
58  NONINDEXED,
59  BOTH
60  };
61  using Indexed2OpCountType = std::pair<INDEXED_OPERATION_TYPE, size_t>;
62  using Dep1Dep2SharedType = std::map<size_t, std::map<size_t, std::map<OperationNode<Base>*, Indexed2OpCountType> > >;
63  using DepPairType = std::pair<size_t, size_t>;
64  using TotalOps2validDepsType = std::map<size_t, std::map<DepPairType, const std::map<OperationNode<Base>*, Indexed2OpCountType>* > >;
65  using Eq2totalOps2validDepsType = std::map<UniqueEquationPair<Base>, TotalOps2validDepsType*>;
66  using MaxOps2eq2totalOps2validDepsType = std::map<size_t, Eq2totalOps2validDepsType>;
67 
68 private:
69  CodeHandler<Base>* handler_;
71  CodeHandlerVector<Base, bool> varIndexed_; // which nodes depend on indexed independent variables
72  const std::vector<std::set<size_t> >& relatedDepCandidates_;
73  std::vector<CGBase> dependents_; // a copy
74  const std::vector<CGBase>& independents_;
75  std::vector<EquationPattern<Base>*> equations_;
76  EquationPattern<Base>* eqCurr_;
77  std::map<size_t, EquationPattern<Base>*> dep2Equation_;
78  std::map<EquationPattern<Base>*, Loop<Base>*> equation2Loop_;
79  std::vector<Loop<Base>*> loops_;
83  std::map<EquationPattern<Base>*, std::set<EquationPattern<Base>*> > incompatible_;
87  std::map<UniqueEquationPair<Base>, Dep1Dep2SharedType> equationShared_;
92  std::map<OperationNode<Base>*, size_t> origTemp2Index_;
93  std::vector<std::set<size_t> > id2Deps;
94  size_t idCounter_;
99  CodeHandlerVector<Base, size_t> origShareNodeId_;
101  size_t color_;
102 public:
103 
113  DependentPatternMatcher(const std::vector<std::set<size_t> >& relatedDepCandidates,
114  const std::vector<CGBase>& dependents,
115  const std::vector<CGBase>& independents) :
116  handler_(independents[0].getCodeHandler()),
117  varId_(*handler_),
118  varIndexed_(*handler_),
119  relatedDepCandidates_(relatedDepCandidates),
120  dependents_(dependents),
121  independents_(independents),
122  idCounter_(0),
123  origShareNodeId_(*handler_),
124  color_(0) {
125  CPPADCG_ASSERT_UNKNOWN(independents_.size() > 0)
126  CPPADCG_ASSERT_UNKNOWN(independents_[0].getCodeHandler() != nullptr)
127  equations_.reserve(relatedDepCandidates_.size());
128  origShareNodeId_.adjustSize();
129  }
130 
131  const std::vector<EquationPattern<Base>*>& getEquationPatterns() const {
132  return equations_;
133  }
134 
135  const std::vector<Loop<Base>*>& getLoops() const {
136  return loops_;
137  }
138 
148  virtual void generateTapes(LoopFreeModel<Base>*& nonLoopTape,
149  std::set<LoopModel<Base>*>& loopTapes) {
150 
151  for (size_t j = 0; j < independents_.size(); j++) {
152  std::vector<size_t>& info = independents_[j].getOperationNode()->getInfo();
153  info.resize(1);
154  info[0] = j;
155  }
156 
157  findLoops();
158 
159  nonLoopTape = createNewTape();
160 
161  loopTapes.clear();
162  for (size_t l = 0; l < loops_.size(); l++) {
163  Loop<Base>* loop = loops_[l];
164  loopTapes.insert(loop->releaseLoopModel());
165  }
166  }
167 
168  virtual ~DependentPatternMatcher() {
169  for (size_t l = 0; l < loops_.size(); l++) {
170  delete loops_[l];
171  }
172  }
173 
174 private:
175 
182  virtual std::vector<Loop<Base>*> findLoops() {
183  using namespace std;
184 
185  size_t rSize = relatedDepCandidates_.size();
186  for (size_t r = 0; r < rSize; r++) {
187  const std::set<size_t>& candidates = relatedDepCandidates_[r];
188  for (size_t iDep : candidates) {
189  OperationNode<Base>* node = dependents_[iDep].getOperationNode();
190  if (node != nullptr && node->getOperationType() == CGOpCode::Inv) {
199  CPPADCG_ASSERT_UNKNOWN(handler_ == dependents_[iDep].getCodeHandler())
200  dependents_[iDep] = CG<Base>(*handler_->makeNode(CGOpCode::Alias, *node));
201  }
202  }
203  }
204 
205  varId_.adjustSize();
206  varId_.fill(0);
207 
208  // assign a unique Id to each node
209  assignIds();
210  id2Deps.resize(idCounter_ + 1);
211 
215  findRelatedVariables();
216 
217  for (EquationPattern<Base>* eq : equations_) {
218  for (size_t depIt : eq->dependents) {
219  dep2Equation_[depIt] = eq;
220  }
221  }
222 
223  const size_t eq_size = equations_.size();
224  loops_.reserve(eq_size);
225 
226  SmartSetPointer<set<size_t> > dependentRelations;
227  std::vector<set<size_t>*> dep2Relations(dependents_.size(), nullptr);
228  map<size_t, set<size_t> > dependentBlackListRelations;
229 
230  /*******************************************************************
231  * Combine related equations in the same loops
232  * (equations that share temporary variables)
233  ******************************************************************/
237  varIndexed_.adjustSize();
238  varIndexed_.fill(false);
239 
240  for (size_t e = 0; e < eq_size; e++) {
241  EquationPattern<Base>* eq = equations_[e];
242  eqCurr_ = eq;
243 
244  for (size_t depIt : eq->dependents) {
245  OperationNode<Base>* node = dependents_[depIt].getOperationNode();
246  // will define the dependents associated with each operation
247  markOperationsWithDependent(node, depIt);
248  }
249 
253  if (e > 0) {
254  handler_->startNewOperationTreeVisit();
255 
256  for (size_t depIt : eq->dependents) {
257  findSharedTemporaries(dependents_[depIt], depIt); // a color is used to mark indexed paths
258  }
259 
263  for (size_t depIt : eq->dependents) {
264  OperationNode<Base>* node = dependents_[depIt].getOperationNode();
265  EquationPattern<Base>::uncolor(node, varIndexed_); // must uncolor
266  }
267  }
268 
269  // create a loop for this equation
270  auto* loop = new Loop<Base>(*eq);
271  loops_.push_back(loop);
272  equation2Loop_[eq] = loop;
273  }
274 
275  /*******************************************************************
276  * Attempt to combine loops with shared variables
277  ******************************************************************/
278  MaxOps2eq2totalOps2validDepsType maxOps2Eq2totalOps2validDeps;
279  Eq2totalOps2validDepsType eq2totalOps2validDeps;
280  SmartListPointer<TotalOps2validDepsType> totalOps2validDepsMem;
281 
287  for (size_t l1 = 0; l1 < loops_.size(); l1++) {
288  Loop<Base>* loop1 = loops_[l1];
289  CPPADCG_ASSERT_UNKNOWN(loop1->equations.size() == 1)
290  EquationPattern<Base>* eq1 = *loop1->equations.begin();
291 
292  for (size_t l2 = l1 + 1; l2 < loops_.size(); l2++) {
293  Loop<Base>* loop2 = loops_[l2];
294  CPPADCG_ASSERT_UNKNOWN(loop2->equations.size() == 1)
295  EquationPattern<Base>* eq2 = *loop2->equations.begin();
296 
297  UniqueEquationPair<Base> eqRel(eq1, eq2);
298  const auto eqSharedit = equationShared_.find(eqRel);
299  if (eqSharedit == equationShared_.end())
300  continue; // nothing is shared between eq1 and eq2
301 
302  const Dep1Dep2SharedType& dep1Dep2Shared = eqSharedit->second;
303 
307  auto* totalOps2validDeps = new TotalOps2validDepsType();
308  totalOps2validDepsMem.push_back(totalOps2validDeps);
309  size_t maxOps = 0; // the maximum number of shared operations between two dependents
310 
311  bool canCombine = true;
312 
313  /***************************************************
314  * organize relations between dependents
315  **************************************************/
316  for (const auto& itDep1Dep2 : dep1Dep2Shared) {
317  size_t dep1 = itDep1Dep2.first;
318  const map<size_t, map<OperationNode<Base>*, Indexed2OpCountType> >& dep2Shared = itDep1Dep2.second;
319 
320  // multiple deps2 means multiple choices for a relation (only one dep1<->dep2 can be chosen)
321  for (const auto& itDep2 : dep2Shared) {
322  size_t dep2 = itDep2.first;
323  const map<OperationNode<Base>*, Indexed2OpCountType>& sharedTmps = itDep2.second;
324 
325  size_t totalOps = 0; // the total number of operations performed by shared variables with dep2
326  for (const auto& itShared : sharedTmps) {
327  if (itShared.second.first == INDEXED_OPERATION_TYPE::BOTH) {
333  canCombine = false;
334  break;
335  } else {
336  totalOps += itShared.second.second;
337  }
338  }
339 
340  if (!canCombine) break;
341 
342  DepPairType depRel(dep1, dep2);
343  (*totalOps2validDeps)[totalOps][depRel] = &sharedTmps;
344  maxOps = std::max<size_t>(maxOps, totalOps);
345  }
346 
347  if (!canCombine) break;
348  }
349 
350  if (canCombine) {
351  maxOps2Eq2totalOps2validDeps[maxOps][eqRel] = totalOps2validDeps;
352  eq2totalOps2validDeps[eqRel] = totalOps2validDeps;
353  } else {
354  incompatible_[eq1].insert(eq2);
355  incompatible_[eq2].insert(eq1);
356  totalOps2validDepsMem.pop_back();
357  delete totalOps2validDeps;
358  }
359  }
360  }
361 
365  typename MaxOps2eq2totalOps2validDepsType::const_reverse_iterator itMaxOps;
366  for (itMaxOps = maxOps2Eq2totalOps2validDeps.rbegin(); itMaxOps != maxOps2Eq2totalOps2validDeps.rend(); ++itMaxOps) {
367 #ifdef CPPADCG_PRINT_DEBUG
368  std::cout << "\n\nmaxOps: " << itMaxOps->first << " count:" << itMaxOps->second.size() << std::endl;
369 #endif
370 
371  for (const auto& itEqPair : itMaxOps->second) {
372  const UniqueEquationPair<Base>& eqRel = itEqPair.first;
373 #ifdef CPPADCG_PRINT_DEBUG
374  std::cout << " eq1: " << *eqRel.eq1->dependents.begin() << " eq2: " << *eqRel.eq2->dependents.begin() << std::endl;
375 #endif
376 
377  Loop<Base>* loop1 = equation2Loop_.at(eqRel.eq1);
378  Loop<Base>* loop2 = equation2Loop_.at(eqRel.eq2);
379 
380  if (loop1 == loop2)
381  continue; // already done
382  if (contains(incompatible_, eqRel.eq1, eqRel.eq2))
383  continue; // incompatible
384 
389  SmartSetPointer<set<size_t> > dependentRelationsBak;
390  for (const set<size_t>* its : dependentRelations) {
391  dependentRelationsBak.insert(new set<size_t>(*its));
392  }
393 
394  // relationships between dependents for the resulting merged loop
395  set<set<size_t>*> loopRelations;
396 
397  set<EquationPattern<Base>*> indexedLoopRelations;
398  std::vector<std::pair<EquationPattern<Base>*, EquationPattern<Base>*> > nonIndexedLoopRelations;
399 
403  bool compatible = isCompatible(loop1, loop2,
404  eq2totalOps2validDeps,
405  dep2Relations, dependentBlackListRelations, dependentRelations,
406  loopRelations, indexedLoopRelations, nonIndexedLoopRelations);
407 
408  if (compatible) {
409  // merge the two loops
410 
411  // update the loop of the equations
412  for (EquationPattern<Base>* itle : loop2->equations) {
413  equation2Loop_[itle] = loop1;
414  }
415  loop1->merge(*loop2, indexedLoopRelations, nonIndexedLoopRelations);
416 
417  typename std::vector<Loop<Base>*>::const_iterator it = std::find(loops_.cbegin(), loops_.cend(), loop2);
418  CPPADCG_ASSERT_UNKNOWN(it != loops_.end())
419  loops_.erase(it);
420  delete loop2;
421 
422  loop1->setLinkedDependents(loopRelations);
423 
424  // relation between loop1 and loop2 done!
425  } else {
426  // restore dependent relations
427  dependentRelations.s.swap(dependentRelationsBak.s);
428  // map each dependent to the relation set where it is present
429  std::fill(dep2Relations.begin(), dep2Relations.end(), nullptr);
430  for (set<size_t>* relation : dependentRelations) {
431  for (size_t itd : *relation) {
432  dep2Relations[itd] = relation;
433  }
434  }
435 
436  }
437 
438  }
439  }
440 
444  for (size_t l = 0; l < loops_.size(); l++) {
445  loops_[l]->generateDependentLoopIndexes(dep2Equation_);
446  }
447 
448  /*******************************************************************
449  * Attempt to combine unrelated loops
450  ******************************************************************/
451  if (!loops_.empty()) {
452  for (size_t l1 = 0; l1 < loops_.size() - 1; l1++) {
453  Loop<Base>* loop1 = loops_[l1];
454  for (size_t l2 = l1 + 1; l2 < loops_.size();) {
455  Loop<Base>* loop2 = loops_[l2];
456 
457  bool canMerge = loop1->getIterationCount() == loop2->getIterationCount();
458  if (canMerge) {
459  // check if there are equations in the blacklist
460  canMerge = !find(loop1, loop2, incompatible_);
461  }
462 
463  if (canMerge) {
464  loop1->mergeEqGroups(*loop2);
465  loops_.erase(loops_.begin() + l2);
466  delete loop2;
467  } else {
468  l2++;
469  }
470  }
471  }
472  }
473 
474  size_t l_size = loops_.size();
475 
479  for (size_t l = 0; l < l_size; l++) {
480  Loop<Base>* loop = loops_[l];
481 
482  //Generate a local model for the loop
483  loop->createLoopModel(dependents_, independents_, dep2Equation_, origTemp2Index_);
484  }
485 
489  resetHandlerCounters();
490 
491  return loops_;
492  }
493 
502  inline bool isCompatible(Loop<Base>* loop1,
503  Loop<Base>* loop2,
504  const Eq2totalOps2validDepsType& eq2totalOps2validDeps,
505  std::vector<std::set<size_t>* >& dep2Relations,
506  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
507  SmartSetPointer<std::set<size_t> >& dependentRelations,
508  std::set<std::set<size_t>*>& loopRelations,
509  std::set<EquationPattern<Base>*>& indexedLoopRelations,
510  std::vector<std::pair<EquationPattern<Base>*, EquationPattern<Base>*> >& nonIndexedLoopRelations) {
511  using namespace std;
512 
513  bool compatible = true;
514 
519  map<size_t, map<UniqueEquationPair<Base>, TotalOps2validDepsType*> > totalOp2eq;
520 
521  for (EquationPattern<Base>* eq1 : loop1->equations) {
522 
523  for (EquationPattern<Base>* eq2 : loop2->equations) {
524 
525  UniqueEquationPair<Base> eqRel(eq1, eq2);
526 
527  typename Eq2totalOps2validDepsType::const_iterator eqSharedit = eq2totalOps2validDeps.find(eqRel);
528  if (eqSharedit == eq2totalOps2validDeps.end())
529  continue; // nothing is shared between eq1 and eq2
530 
531 
532  size_t maxOps = eqSharedit->second->rbegin()->first;
533  totalOp2eq[maxOps][eqRel] = eqSharedit->second;
534  }
535  }
536 
537  typename map<size_t, map<UniqueEquationPair<Base>, TotalOps2validDepsType*> >::const_reverse_iterator itr;
538  for (itr = totalOp2eq.rbegin(); itr != totalOp2eq.rend(); ++itr) {
539  // loop shared operation count
540 
541  for (const auto& itEq : itr->second) {
542  EquationPattern<Base>* eq1 = itEq.first.eq1;
543  EquationPattern<Base>* eq2 = itEq.first.eq2;
544  TotalOps2validDepsType& totalOps2validDeps = *itEq.second;
545 
546  /***************************************************
547  * attempt to combine dependents which share the
548  * highest number of operations first
549  **************************************************/
550  typename map<size_t, map<DepPairType, const map<OperationNode<Base>*, Indexed2OpCountType>* > >::const_reverse_iterator itOp2Dep2Shared;
551  for (itOp2Dep2Shared = totalOps2validDeps.rbegin(); itOp2Dep2Shared != totalOps2validDeps.rend(); ++itOp2Dep2Shared) {
552 #ifdef CPPADCG_PRINT_DEBUG
553  std::cout << " operation count: " << itOp2Dep2Shared->first << " relations: " << itOp2Dep2Shared->second.size() << std::endl;
554 #endif
555  for (const auto& itDep2Shared : itOp2Dep2Shared->second) {
556  DepPairType depRel = itDep2Shared.first;
557  size_t dep1 = depRel.first;
558  size_t dep2 = depRel.second;
559 
560  const map<OperationNode<Base>*, Indexed2OpCountType>& shared = *itDep2Shared.second;
565  compatible = findDepRelations(eq1, dep1, eq2, dep2, shared,
566  dep2Relations, dependentBlackListRelations, dependentRelations);
567  if (!compatible) break;
568  }
569 
570  loopRelations.clear();
571 
572  if (compatible) {
576  std::vector<Loop<Base>*> loops(2);
577  loops[0] = loop1;
578  loops[1] = loop2;
579  bool nonIndexedOnly = true;
580  for (size_t l = 0; l < 2; l++) {
581  Loop<Base>* loop = loops[l];
582  for (EquationPattern<Base>* eq : loop->equations) { // equation
583  for (size_t dep : eq->dependents) { // dependent
584  if (dep2Relations[dep] != nullptr) {
585  loopRelations.insert(dep2Relations[dep]);
586  nonIndexedOnly = false;
587  }
588  }
589  }
590  }
591 
592 
593 
594  if (nonIndexedOnly) {
595  nonIndexedLoopRelations.push_back(std::make_pair(eq1, eq2));
596  } else {
597  // there are shared indexed temporary variables
598  compatible = false;
599  size_t nNonIndexedRel1 = loop1->getLinkedEquationsByNonIndexedCount();
600  size_t nNonIndexedRel2 = loop2->getLinkedEquationsByNonIndexedCount();
601  size_t requiredSize = loop1->equations.size() + loop2->equations.size() - nNonIndexedRel1 - nNonIndexedRel2;
602 
603  for (set<size_t>* relations : loopRelations) {
604  if (relations->size() == requiredSize) {
605  compatible = true;
606  break;
607  }
608  }
609 #ifdef CPPADCG_PRINT_DEBUG
610  if (compatible) {
611  std::cout << " loopRelations:";
612  print(loopRelations);
613  std::cout << std::endl;
614  }
615 #endif
616  }
617  }
618 
619  if (!compatible) break;
620  }
621 
622  if (!compatible) {
623  incompatible_[eq1].insert(eq2);
624  incompatible_[eq2].insert(eq1);
625  break;
626  } else {
627  indexedLoopRelations.insert(eq1);
628  indexedLoopRelations.insert(eq2);
629  }
630  }
631 
632  if (!compatible) break;
633  }
634 
635  return compatible;
636  }
637 
646  bool findDepRelations(EquationPattern<Base>* eq1,
647  size_t dep1,
649  size_t dep2,
650  const std::map<OperationNode<Base>*, Indexed2OpCountType>& sharedNodes,
651  std::vector<std::set<size_t>* >& dep2Relations,
652  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
653  SmartSetPointer<std::set<size_t> >& dependentRelations) {
654  using namespace std;
655 
656  for (const auto& itShared : sharedNodes) {
657  OperationNode<Base>* sharedNode = itShared.first;
658 
659  // checks independents
660  bool compatible = canCombineEquations(*eq1, dep1, *eq2, dep2, *sharedNode,
661  dep2Relations, dependentBlackListRelations, dependentRelations);
662 
663  if (!compatible) return false;
664  }
665 
666  return true;
667  }
668 
669  void groupByLoopEqOp(EquationPattern<Base>* eq,
670  std::map<Loop<Base>*, std::map<EquationPattern<Base>*, std::map<size_t, std::pair<OperationNode<Base>*, bool> > > >& loopSharedTemps,
671  const std::map<OperationNode<Base>*, std::set<size_t> >& opShared,
672  bool indexed) {
673  using namespace std;
674 
675  for (OperationNode<Base>* shared : opShared) {
676  const set<size_t>& deps = id2Deps[varId_[*shared]];
677 
678  for (size_t dep : deps) {
679  EquationPattern<Base>* otherEq = dep2Equation_.at(dep);
680  if (eq != otherEq) {
681  Loop<Base>* loop = equation2Loop_.at(otherEq);
682  // the original ID (saved in evaluation order) is used to sort shared variables
683  // to ensure reproducibility between different runs
684  loopSharedTemps[loop][otherEq][origShareNodeId_[shared]] = std::make_pair(shared, indexed);
685  }
686  }
687  }
688  }
689 
697  virtual LoopFreeModel<Base>* createNewTape() {
698  CPPADCG_ASSERT_UNKNOWN(handler_ == independents_[0].getCodeHandler());
699 
700  size_t m = dependents_.size();
701  std::vector<bool> inLoop(m, false);
702  size_t eqInLoopCount = 0;
703 
707  size_t l_size = loops_.size();
708 
709  for (size_t l = 0; l < l_size; l++) {
710  Loop<Base>* loop = loops_[l];
711  LoopModel<Base>* loopModel = loop->getModel();
712 
716  const std::vector<std::vector<LoopPosition> >& ldeps = loopModel->getDependentIndexes();
717  for (const auto& ldep : ldeps) {
718  for (const auto& pos : ldep) {
719  if (pos.original != (std::numeric_limits<size_t>::max)()) {// some equations are not present in all iteration
720  inLoop[pos.original] = true;
721  eqInLoopCount++;
722  }
723  }
724  }
725  }
726 
730  assert(m >= eqInLoopCount);
731  size_t nonLoopEq = m - eqInLoopCount;
732  std::vector<CGBase> nonLoopDeps(nonLoopEq + origTemp2Index_.size());
733 
734  if (nonLoopDeps.size() == 0)
735  return nullptr; // there are no equations outside the loops
736 
740  size_t inl = 0;
741  std::vector<size_t> depTape2Orig(nonLoopEq);
742  if (eqInLoopCount < m) {
743  for (size_t i = 0; i < inLoop.size(); i++) {
744  if (!inLoop[i]) {
745  depTape2Orig[inl] = i;
746  nonLoopDeps[inl++] = dependents_[i];
747  }
748  }
749  }
750  CPPADCG_ASSERT_UNKNOWN(inl == nonLoopEq)
751 
752 
755  for (const auto& itTmp : origTemp2Index_) {
756  size_t k = itTmp.second;
757  nonLoopDeps[nonLoopEq + k] = handler_->createCG(Argument<Base>(*itTmp.first));
758  }
759 
763  Evaluator<Base, CGBase> evaluator(*handler_);
764 
765  // set atomic functions
766  const std::map<size_t, CGAbstractAtomicFun<Base>* >& atomicsOrig = handler_->getAtomicFunctions();
767  std::map<size_t, atomic_base<CGBase>* > atomics;
768  atomics.insert(atomicsOrig.begin(), atomicsOrig.end());
769  evaluator.addAtomicFunctions(atomics);
770 
771  std::vector<AD<CGBase> > x(independents_.size());
772  for (size_t j = 0; j < x.size(); j++) {
773  if (independents_[j].isValueDefined())
774  x[j] = independents_[j].getValue();
775  }
776 
777  CppAD::Independent(x);
778  std::vector<AD<CGBase> > y = evaluator.evaluate(x, nonLoopDeps);
779 
780  std::unique_ptr<ADFun<CGBase> > tapeNoLoops(new ADFun<CGBase>());
781  tapeNoLoops->Dependent(y);
782 
783  return new LoopFreeModel<Base>(tapeNoLoops.release(), depTape2Orig);
784  }
785 
786  std::vector<EquationPattern<Base>*> findRelatedVariables() {
787  eqCurr_ = nullptr;
788  CodeHandlerVector<Base, size_t> varColor(*handler_);
789  color_ = 1; // used to mark visited nodes
790 
791  varColor.adjustSize();
792  varColor.fill(0);
793 
794  size_t rSize = relatedDepCandidates_.size();
795  for (size_t r = 0; r < rSize; r++) {
796  const std::set<size_t>& candidates = relatedDepCandidates_[r];
797  std::set<size_t> used;
798 
799  eqCurr_ = nullptr;
800 
801  std::set<size_t>::const_iterator itRef;
802  for (itRef = candidates.begin(); itRef != candidates.end(); ++itRef) {
803  size_t iDepRef = *itRef;
804 
805  // check if it has already been used
806  if (used.find(iDepRef) != used.end()) {
807  continue;
808  }
809 
810  if (eqCurr_ == nullptr || !used.empty()) {
811  eqCurr_ = new EquationPattern<Base>(dependents_[iDepRef], iDepRef);
812  equations_.push_back(eqCurr_);
813  }
814 
815  auto it = itRef;
816  for (++it; it != candidates.end(); ++it) {
817  size_t iDep = *it;
818  // check if it has already been used
819  if (used.find(iDep) != used.end()) {
820  continue;
821  }
822 
823  if (eqCurr_->testAdd(iDep, dependents_[iDep], color_, varColor)) {
824  used.insert(iDep);
825  }
826  }
827 
828  if (eqCurr_->dependents.size() == 1) {
829  // nothing found :(
830  delete eqCurr_;
831  eqCurr_ = nullptr;
832  equations_.pop_back();
833  }
834  }
835  }
836 
841  for (size_t eq = 0; eq < equations_.size(); eq++) {
842  equations_[eq]->detectNonIndexedIndependents();
843  }
844 
845  return equations_;
846  }
847 
855  inline bool findSharedTemporaries(const CG<Base>& value,
856  size_t depIndex) {
857  OperationNode<Base>* depNode = value.getOperationNode();
858  size_t opCount = 0;
859  if (findSharedTemporaries(depNode, depIndex, opCount)) {
860  varIndexed_[*depNode] = true;
861  return true;
862  }
863  return false;
864  }
865 
875  inline bool findSharedTemporaries(OperationNode<Base>* node,
876  size_t depIndex,
877  size_t& opCount) {
878  if (node == nullptr)
879  return false; // nothing to do
880 
881  if (handler_->isVisited(*node)) {
882  opCount++; // this operation
883  return varIndexed_[*node];
884  }
885 
886  handler_->markVisited(*node);
887 
888  bool indexedOperation = false;
889 
890  size_t localOpCount = 1;
891  const std::vector<Argument<Base> >& args = node->getArguments();
892  size_t arg_size = args.size();
893  for (size_t a = 0; a < arg_size; a++) {
894  OperationNode<Base>*argOp = args[a].getOperation();
895  if (argOp != nullptr) {
896  if (argOp->getOperationType() != CGOpCode::Inv) {
897  indexedOperation |= findSharedTemporaries(argOp, depIndex, localOpCount);
898  } else {
899  indexedOperation |= !eqCurr_->containsConstantIndependent(node, a);
900  }
901  }
902  }
903 
904  opCount += localOpCount;
905 
906  varIndexed_[*node] = indexedOperation; // mark this operation as being indexed or not-indexed
907 
908  size_t id = varId_[*node];
909  std::set<size_t>& deps = id2Deps[id];
910 
911  if (deps.size() > 1 && node->getOperationType() != CGOpCode::Inv) {
915  for (size_t otherDep : deps) {
916 
917  EquationPattern<Base>* otherEquation = dep2Equation_.at(otherDep);
918  if (otherEquation != eqCurr_) {
922  UniqueEquationPair<Base> eqPair(eqCurr_, otherEquation);
923  Dep1Dep2SharedType& relation = equationShared_[eqPair];
924 
925  std::map<OperationNode<Base>*, Indexed2OpCountType>* reldepdep;
926  if (eqPair.eq1 == eqCurr_)
927  reldepdep = &relation[depIndex][otherDep];
928  else
929  reldepdep = &relation[otherDep][depIndex];
930 
931  INDEXED_OPERATION_TYPE expected = indexedOperation ? INDEXED_OPERATION_TYPE::INDEXED : INDEXED_OPERATION_TYPE::NONINDEXED;
932  typename std::map<OperationNode<Base>*, Indexed2OpCountType>::iterator itIndexedType = reldepdep->find(node);
933  if (itIndexedType == reldepdep->end()) {
934  (*reldepdep)[node] = Indexed2OpCountType(expected, localOpCount);
935  } else if (itIndexedType->second.first != expected) {
936  itIndexedType->second.first = INDEXED_OPERATION_TYPE::BOTH;
937  }
938 
939  break;
940  }
941  }
942  }
943 
944  return indexedOperation;
945  }
946 
954  inline void markOperationsWithDependent(const OperationNode<Base>* node,
955  size_t dep) {
956  if (node == nullptr || node->getOperationType() == CGOpCode::Inv)
957  return; // nothing to do
958 
959  size_t id = varId_[*node];
960 
961  std::set<size_t>& deps = id2Deps[id];
962 
963  if (deps.empty()) {
964  deps.insert(dep); // here for the first time
965  } else {
966  auto added = deps.insert(dep);
967  if (!added.second) {
968  return; // already been here
969  }
970  }
971 
972  const std::vector<Argument<Base> >& args = node->getArguments();
973  size_t arg_size = args.size();
974  for (size_t i = 0; i < arg_size; i++) {
975  markOperationsWithDependent(args[i].getOperation(), dep);
976  }
977  }
978 
979  void assignIds() {
980  idCounter_ = 1;
981 
982  size_t rSize = relatedDepCandidates_.size();
983  for (size_t r = 0; r < rSize; r++) {
984  const std::set<size_t>& candidates = relatedDepCandidates_[r];
985 
986  for (size_t it : candidates) {
987  assignIds(dependents_[it].getOperationNode());
988  }
989  }
990  }
991 
992  void assignIds(OperationNode<Base>* node) {
993  if (node == nullptr || varId_[*node] > 0)
994  return;
995 
996  varId_[*node] = idCounter_;
997  origShareNodeId_.adjustSize(*node);
998  origShareNodeId_[*node] = idCounter_;
999  idCounter_++;
1000 
1001  const std::vector<Argument<Base> >& args = node->getArguments();
1002  size_t arg_size = args.size();
1003  for (size_t i = 0; i < arg_size; i++) {
1004  assignIds(args[i].getOperation());
1005  }
1006  }
1007 
1008  void resetHandlerCounters() {
1009  size_t rSize = relatedDepCandidates_.size();
1010  for (size_t r = 0; r < rSize; r++) {
1011  const std::set<size_t>& candidates = relatedDepCandidates_[r];
1012 
1013  for (size_t it : candidates) {
1014  resetHandlerCounters(dependents_[it].getOperationNode());
1015  }
1016  }
1017  }
1018 
1019  void resetHandlerCounters(OperationNode<Base>* node) {
1020  if (node == nullptr || varId_[*node] == 0 || origShareNodeId_[*node] == 0)
1021  return;
1022 
1023  varId_[*node] = 0;
1024  origShareNodeId_[*node] = 0;
1025 
1026  const std::vector<Argument<Base> >& args = node->getArguments();
1027  size_t arg_size = args.size();
1028  for (size_t i = 0; i < arg_size; i++) {
1029  resetHandlerCounters(args[i].getOperation());
1030  }
1031  }
1032 
1033  static bool find(Loop<Base>* loop1, Loop<Base>* loop2,
1034  const std::map<EquationPattern<Base>*, std::set<EquationPattern<Base>*> >& blackList) {
1035  for (EquationPattern<Base>* iteq1 : loop1->equations) {
1036 
1037  const auto itBlack = blackList.find(iteq1);
1038  if (itBlack != blackList.end()) {
1039 
1040  for (EquationPattern<Base>* iteq2 : loop2->equations) {
1041  if (itBlack->second.find(iteq2) != itBlack->second.end()) {
1042  return true; // found
1043  }
1044  }
1045  }
1046  }
1047 
1048  return false;
1049  }
1050 
1051  template<class T>
1052  static inline bool contains(const std::map<T, std::set<T> >& map, T eq1, T eq2) {
1053  typename std::map<T, std::set<T> >::const_iterator itb1;
1054  itb1 = map.find(eq1);
1055  if (itb1 != map.end()) {
1056  if (itb1->second.find(eq2) != itb1->second.end()) {
1057  return true;
1058  }
1059  }
1060  return false;
1061  }
1062 
1063  bool canCombineEquations(const EquationPattern<Base>& eq1,
1064  size_t dep1,
1065  const EquationPattern<Base>& eq2,
1066  size_t dep2,
1067  OperationNode<Base>& sharedTemp,
1068  std::vector<std::set<size_t>* >& dep2Relations,
1069  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
1070  SmartSetPointer<std::set<size_t> >& dependentRelations) {
1071  using namespace std;
1072 
1073  // must have indexed independents at the same locations in all equations
1074  const set<const OperationNode<Base>*> opWithIndepArgs = eq1.findOperationsUsingIndependents(sharedTemp);
1075 
1076  // must have indexed independents at the same locations in both equations
1077  for (const OperationNode<Base>* op : opWithIndepArgs) {
1078  // get indexed independent variable information
1079  // - equation 1
1080  typename map<const OperationNode<Base>*, OperationIndexedIndependents<Base> >::const_iterator indexed1It;
1081  OperationNode<Base>* op1 = eq1.operationEO2Reference.at(dep1).at(op); // convert to the reference of equation 1
1082  indexed1It = eq1.indexedOpIndep.op2Arguments.find(op1);
1083 
1084  // - equation 2
1085  typename map<const OperationNode<Base>*, OperationIndexedIndependents<Base> >::const_iterator indexed2It;
1086  OperationNode<Base>* op2 = eq2.operationEO2Reference.at(dep2).at(op); // convert to the reference of equation 2
1087  indexed2It = eq2.indexedOpIndep.op2Arguments.find(op2);
1088 
1092  if (indexed1It == eq1.indexedOpIndep.op2Arguments.end()) {
1093  if (indexed2It != eq2.indexedOpIndep.op2Arguments.end()) {
1094  return false; // indexed in one equation but non-indexed in the other
1095  }
1096  } else {
1097  if (indexed2It == eq2.indexedOpIndep.op2Arguments.end()) {
1098  return false; // indexed in one equation but non-indexed in the other
1099  }
1100 
1104  const OperationIndexedIndependents<Base>& indexed1Ops = indexed1It->second;
1105  const OperationIndexedIndependents<Base>& indexed2Ops = indexed2It->second;
1106 
1107  size_t a1Size = indexed1Ops.arg2Independents.size();
1108  if (a1Size != indexed2Ops.arg2Independents.size()) { // there must be the same number of arguments
1109  return false;
1110  }
1111 
1112  for (size_t a = 0; a < a1Size; a++) {
1113  const map<size_t, const OperationNode<Base>*>& eq1Dep2Indep = indexed1Ops.arg2Independents[a];
1114  const map<size_t, const OperationNode<Base>*>& eq2Dep2Indep = indexed2Ops.arg2Independents[a];
1115 
1116  if (eq1Dep2Indep.empty() != eq2Dep2Indep.empty())
1117  return false; // one is indexed and the other is non-indexed
1118 
1119  // it has to be possible to match dependents from the two equation patterns
1120 
1121  if (eq1Dep2Indep.empty()) {
1122  continue; // not indexed
1123  }
1124 
1125  // indexed independent variable
1126 
1127  // invert eq1Dep2Indep into eq1Indep2Dep
1128  using MapIndep2Dep = map<const OperationNode<Base>*, size_t, IndependentNodeSorter<Base> >;
1129  MapIndep2Dep eq1Indep2Dep;
1130  typename MapIndep2Dep::iterator hint = eq1Indep2Dep.begin();
1131  for (const auto& d2i : eq1Dep2Indep) {
1132  hint = eq1Indep2Dep.insert(hint, std::make_pair(d2i.second, d2i.first));
1133  hint++; // assume that the relation dep<->indep is always ascending
1134  }
1135 
1136  typename map<const OperationNode<Base>*, size_t>::const_iterator itHint = eq1Indep2Dep.begin();
1137 
1138  // check all iterations/dependents
1139  for (const auto& d2i : eq2Dep2Indep) {
1140  size_t dep2 = d2i.first;
1141  const OperationNode<Base>* indep = d2i.second;
1142  typename map<const OperationNode<Base>*, size_t>::const_iterator it;
1143  if (itHint->first == indep) {
1149  it = itHint;
1150  itHint++;
1151  } else {
1152  it = eq1Indep2Dep.find(indep);
1153  }
1154 
1155  if (it != eq1Indep2Dep.end()) {
1156  size_t dep1 = it->second;
1157 
1158  // check if this relation was previous excluded
1159  std::map<size_t, set<size_t> >::const_iterator itBlackL = dependentBlackListRelations.find(dep1);
1160  if (itBlackL != dependentBlackListRelations.end() && itBlackL->second.find(dep2) != itBlackL->second.end()) {
1161  return false; // these dependents cannot be in the same iteration
1162  }
1163 
1164  bool related = makeDependentRelation(eq1, dep1, eq2, dep2,
1165  dep2Relations, dependentRelations);
1166  if (!related)
1167  return false;
1168 
1169  } else {
1170  // equation pattern 1 does not have any iteration with indep from dep2
1171 
1172  // there is no need to have the same number of iterations in both equations!
1173  // but remember that these dependents cannot be in the same iteration from now on
1174  dependentBlackListRelations[dep2].insert(eq1.dependents.begin(), eq1.dependents.end());
1175  }
1176  }
1177 
1178  }
1179 
1180  }
1181 
1182  }
1183 
1184  return true;
1185  }
1186 
1187  bool isNonIndexed(const EquationPattern<Base>& eq2,
1188  size_t dep2,
1189  OperationNode<Base>& sharedTemp) {
1190  using namespace std;
1191 
1192 
1193  // must have indexed independents at the same locations in all equations
1194  const set<const OperationNode<Base>*> opWithIndepArgs = EquationPattern<Base>::findOperationsUsingIndependents(sharedTemp);
1195 
1196  for (const OperationNode<Base>* op : opWithIndepArgs) {
1197  // get indexed independent variable information
1198  // - equation 2
1199  OperationNode<Base>* op2 = eq2.operationEO2Reference.at(dep2).at(op); // convert to the reference of equation 2
1200 
1201  const auto indexed2It = eq2.indexedOpIndep.op2Arguments.find(op2);
1202  if (indexed2It != eq2.indexedOpIndep.op2Arguments.end()) {
1203  return false; // indexed in one equation but non-indexed in the other
1204  }
1205  }
1206 
1207  return true;
1208  }
1209 
1210  bool makeDependentRelation(const EquationPattern<Base>& eq1,
1211  size_t dep1,
1212  const EquationPattern<Base>& eq2,
1213  size_t dep2,
1214  std::vector<std::set<size_t>* >& dep2Relations,
1215  SmartSetPointer<std::set<size_t> >& dependentRelations) {
1216  using namespace std;
1217 
1218  set<size_t>* related1 = dep2Relations[dep1];
1219  set<size_t>* related2 = dep2Relations[dep2];
1220 
1221  // check if relations were established with a different dependent from the same equation pattern
1222  if (related1 != nullptr) {
1223  // dependent 1 already in a relation set
1224 
1225  if (related2 != nullptr) {
1226  // both dependents belong to previously existing relations sets
1227 
1228  if (related1 == related2)
1229  return true; // already done
1230 
1231  // relations must be merged (if possible)!
1232  // merge related2 into related1
1233  bool canMerge = true;
1234 
1235  for (size_t dep3 : *related2) {
1236 
1237  const EquationPattern<Base>& eq3 = *dep2Equation_.at(dep3);
1238  // make sure no other dependent from the same equation pattern was already in this relation set
1239  for (size_t it : eq3.dependents) {
1240  if (it != dep3 && related1->find(it) != related1->end()) {
1241  canMerge = false; // relation with a dependent from a different iteration!
1242  break;
1243  //return false;
1244  }
1245  }
1246 
1247  if (!canMerge)
1248  break;
1249  }
1250 
1251  if (canMerge) {
1252  for (size_t dep3 : *related2) {
1253  related1->insert(dep3);
1254  dep2Relations[dep3] = related1;
1255  }
1256 
1257  dependentRelations.erase(related2);
1258  delete related2;
1259  }
1268  } else {
1269  if (related1->find(dep2) == related1->end()) {
1270  // make sure no other dependent from the same equation pattern was already in this relation set
1271  bool canMerge = true;
1272  for (size_t it : eq2.dependents) {
1273  if (it != dep2 && related1->find(it) != related1->end()) {
1274  canMerge = false; // relation with a dependent from a different iteration!
1275  break;
1276  }
1277  }
1278 
1279  if (canMerge) {
1280  related1->insert(dep2);
1281  dep2Relations[dep2] = related1;
1282  }
1290  }
1291  }
1292 
1293  } else if (related2 != nullptr) {
1294  // dependent 2 already in a relation set
1295 
1296  // make sure no other dependent from the same equation pattern was already in this relation set
1297  bool canMerge = true;
1298  for (size_t it : eq1.dependents) {
1299  if (it != dep1 && related2->find(it) != related2->end()) {
1300  canMerge = false; // relation with a dependent from a different iteration!
1301  break;
1302  //return false;
1303  }
1304  }
1305 
1306  if (canMerge) {
1307  related2->insert(dep1);
1308  dep2Relations[dep1] = related2;
1309  }
1319  } else {
1320  // dependent 1 and dependent 2 not in any relation set
1321  auto* related = new std::set<size_t>();
1322  dependentRelations.insert(related);
1323  related->insert(dep1);
1324  related->insert(dep2);
1325  dep2Relations[dep1] = related;
1326  dep2Relations[dep2] = related;
1327  }
1328 
1329  return true;
1330  }
1331 
1332 };
1333 
1334 } // END cg namespace
1335 } // END CppAD namespace
1336 
1337 #endif
virtual void generateTapes(LoopFreeModel< Base > *&nonLoopTape, std::set< LoopModel< Base > *> &loopTapes)
std::vector< ActiveOut > evaluate(ArrayView< const ActiveOut > indepNew, ArrayView< const CG< ScalarIn > > depOld)
Definition: evaluator.hpp:93
std::set< EquationPattern< Base > * > equations
Definition: loop.hpp:59
std::map< size_t, std::map< const OperationNode< Base > *, OperationNode< Base > * > > operationEO2Reference
void merge(Loop< Base > &other, const std::set< EquationPattern< Base > *> &indexedLoopRelations, const std::vector< std::pair< EquationPattern< Base > *, EquationPattern< Base > *> > &nonIndexedLoopRelations)
Definition: loop.hpp:232
DependentPatternMatcher(const std::vector< std::set< size_t > > &relatedDepCandidates, const std::vector< CGBase > &dependents, const std::vector< CGBase > &independents)
void mergeEqGroups(Loop< Base > &other)
Definition: loop.hpp:262
const std::vector< Argument< Base > > & getArguments() const
size_t getIterationCount() const
Definition: loop.hpp:209
STL namespace.
std::vector< MapDep2Indep_type > arg2Independents
IndexedIndependent< Base > indexedOpIndep
CGOpCode getOperationType() const
const std::map< size_t, CGAbstractAtomicFun< Base > *> & getAtomicFunctions() const
void createLoopModel(const std::vector< CG< Base > > &dependents, const std::vector< CG< Base > > &independents, const std::map< size_t, EquationPattern< Base > *> &dep2Equation, std::map< OperationNode< Base > *, size_t > &origTemp2Index)
Definition: loop.hpp:283
const std::vector< std::vector< LoopPosition > > & getDependentIndexes() const
Definition: loop_model.hpp:291