mlpack
x_tree_split_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_X_TREE_SPLIT_IMPL_HPP
13 #define MLPACK_CORE_TREE_RECTANGLE_TREE_X_TREE_SPLIT_IMPL_HPP
14 
15 #include "x_tree_split.hpp"
16 #include "rectangle_tree.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
28 template<typename TreeType>
29 void XTreeSplit::SplitLeafNode(TreeType *tree, std::vector<bool>& relevels)
30 {
31  // Convenience typedef.
32  typedef typename TreeType::ElemType ElemType;
33 
34  if (tree->Count() <= tree->MaxLeafSize())
35  return;
36 
37  // If we haven't yet reinserted on this level, we try doing so now.
38  if (RStarTreeSplit::ReinsertPoints(tree, relevels) > 0)
39  return;
40 
41  // The procedure of splitting a leaf node is virtually identical to the R*
42  // tree procedure, so we can reuse code.
43  size_t bestAxis;
44  size_t bestIndex;
45  RStarTreeSplit::PickLeafSplit(tree, bestAxis, bestIndex);
46 
51  std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
52  for (size_t i = 0; i < sorted.size(); ++i)
53  {
54  sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
55  sorted[i].second = tree->Point(i);
56  }
57 
58  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
59 
71  TreeType* par = tree->Parent();
72  TreeType* treeOne = (par) ? tree : new TreeType(tree);
73  TreeType* treeTwo = (par) ? new TreeType(par) : new TreeType(tree);
74 
75  // Now clean the node, and we will re-use this.
76  const size_t numPoints = tree->Count();
77 
78  // Reset the original node's values, regardless of whether it will become
79  // the new parent or not.
80  tree->numChildren = 0;
81  tree->numDescendants = 0;
82  tree->count = 0;
83  tree->bound.Clear();
84 
85  // Insert the points into the appropriate tree.
86  for (size_t i = 0; i < numPoints; ++i)
87  {
88  if (i < bestIndex + tree->MinLeafSize())
89  treeOne->InsertPoint(sorted[i].second);
90  else
91  treeTwo->InsertPoint(sorted[i].second);
92  }
93 
94  // Insert the new tree node(s).
95  if (par)
96  {
97  par->children[par->NumChildren()++] = treeTwo;
98  }
99  else
100  {
101  InsertNodeIntoTree(tree, treeOne);
102  InsertNodeIntoTree(tree, treeTwo);
103  }
104 
105  // We now update the split history of each new node.
106  treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
107  treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
108  treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
109  treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
110 
111  // If we overflowed the parent, split it.
112  if (par && par->NumChildren() == par->MaxNumChildren() + 1)
113  XTreeSplit::SplitNonLeafNode(par, relevels);
114 }
115 
123 template<typename TreeType>
124 bool XTreeSplit::SplitNonLeafNode(TreeType *tree, std::vector<bool>& relevels)
125 {
126  // Convenience typedef.
127  typedef typename TreeType::ElemType ElemType;
129 
130  // The X tree paper doesn't explain how to handle the split history when
131  // reinserting nodes and reinserting nodes seems to hurt the performance, so
132  // we don't do it.
133 
134  // We find the split axis that will be used if the topological split fails now
135  // to save CPU time.
136 
137  // Find the next split axis.
138  std::vector<bool> axes(tree->Bound().Dim(), true);
139  std::vector<size_t> dimensionsLastUsed(tree->NumChildren());
140  for (size_t i = 0; i < tree->NumChildren(); ++i)
141  dimensionsLastUsed[i] =
142  tree->Child(i).AuxiliaryInfo().SplitHistory().lastDimension;
143  std::sort(dimensionsLastUsed.begin(), dimensionsLastUsed.end());
144 
145  size_t lastDim = dimensionsLastUsed[dimensionsLastUsed.size() / 2];
146  size_t minOverlapSplitDimension = tree->Bound().Dim();
147 
148  // See if we can use a new dimension.
149  for (size_t i = lastDim + 1; i < axes.size(); ++i)
150  {
151  for (size_t j = 0; j < tree->NumChildren(); ++j)
152  axes[i] = axes[i] &
153  tree->Child(j).AuxiliaryInfo().SplitHistory().history[i];
154  if (axes[i] == true)
155  {
156  minOverlapSplitDimension = i;
157  break;
158  }
159  }
160 
161  if (minOverlapSplitDimension == tree->Bound().Dim())
162  {
163  for (size_t i = 0; i < lastDim + 1; ++i)
164  {
165  axes[i] = true;
166  for (size_t j = 0; j < tree->NumChildren(); ++j)
167  axes[i] = axes[i] &
168  tree->Child(j).AuxiliaryInfo().SplitHistory().history[i];
169  if (axes[i] == true)
170  {
171  minOverlapSplitDimension = i;
172  break;
173  }
174  }
175  }
176 
177  bool minOverlapSplitUsesHi = false;
178  ElemType bestScoreMinOverlapSplit = std::numeric_limits<ElemType>::max();
179  ElemType areaOfBestMinOverlapSplit = 0;
180  int bestIndexMinOverlapSplit = 0;
181 
182  int bestOverlapIndexOnBestAxis = 0;
183  int bestAreaIndexOnBestAxis = 0;
184  bool tiedOnOverlap = false;
185  bool lowIsBest = true;
186  int bestAxis = 0;
187  ElemType bestAxisScore = std::numeric_limits<ElemType>::max();
188  ElemType overlapBestOverlapAxis = 0;
189  ElemType areaBestOverlapAxis = 0;
190  ElemType overlapBestAreaAxis = 0;
191  ElemType areaBestAreaAxis = 0;
192 
193  for (size_t j = 0; j < tree->Bound().Dim(); ++j)
194  {
195  ElemType axisScore = 0.0;
196 
197  // We'll do Bound().Lo() now and use Bound().Hi() later.
198  std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
199  for (size_t i = 0; i < sorted.size(); ++i)
200  {
201  sorted[i].first = tree->Child(i).Bound()[j].Lo();
202  sorted[i].second = &tree->Child(i);
203  }
204 
205  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
206 
207  // We'll store each of the three scores for each distribution.
208  std::vector<ElemType> areas(tree->MaxNumChildren() -
209  2 * tree->MinNumChildren() + 2);
210  std::vector<ElemType> margins(tree->MaxNumChildren() -
211  2 * tree->MinNumChildren() + 2);
212  std::vector<ElemType> overlapedAreas(tree->MaxNumChildren() -
213  2 * tree->MinNumChildren() + 2);
214  for (size_t i = 0; i < areas.size(); ++i)
215  {
216  areas[i] = 0.0;
217  margins[i] = 0.0;
218  overlapedAreas[i] = 0.0;
219  }
220 
221  for (size_t i = 0; i < areas.size(); ++i)
222  {
223  // The ith arrangement is obtained by placing the first
224  // tree->MinNumChildren() + i points in one rectangle and the rest in
225  // another. Then we calculate the three scores for that distribution.
226 
227  size_t cutOff = tree->MinNumChildren() + i;
228 
229  BoundType bound1(tree->Bound().Dim());
230  BoundType bound2(tree->Bound().Dim());
231 
232  for (size_t l = 0; l < cutOff; l++)
233  bound1 |= sorted[l].second->Bound();
234 
235  for (size_t l = cutOff; l < tree->NumChildren(); l++)
236  bound2 |= sorted[l].second->Bound();
237 
238  ElemType area1 = bound1.Volume();
239  ElemType area2 = bound2.Volume();
240  ElemType oArea = bound1.Overlap(bound2);
241 
242  for (size_t k = 0; k < bound1.Dim(); ++k)
243  margins[i] += bound1[k].Width() + bound2[k].Width();
244 
245  areas[i] += area1 + area2;
246  overlapedAreas[i] += oArea;
247  axisScore += margins[i];
248  }
249 
250  if (axisScore < bestAxisScore)
251  {
252  bestAxisScore = axisScore;
253  bestAxis = j;
254  bestOverlapIndexOnBestAxis = 0;
255  bestAreaIndexOnBestAxis = 0;
256  overlapBestOverlapAxis = overlapedAreas[bestOverlapIndexOnBestAxis];
257  areaBestOverlapAxis = areas[bestAreaIndexOnBestAxis];
258  for (size_t i = 1; i < areas.size(); ++i)
259  {
260  if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis])
261  {
262  tiedOnOverlap = false;
263  bestAreaIndexOnBestAxis = i;
264  bestOverlapIndexOnBestAxis = i;
265  overlapBestOverlapAxis = overlapedAreas[i];
266  areaBestOverlapAxis = areas[i];
267  }
268  else if (overlapedAreas[i] ==
269  overlapedAreas[bestOverlapIndexOnBestAxis])
270  {
271  tiedOnOverlap = true;
272  if (areas[i] < areas[bestAreaIndexOnBestAxis])
273  {
274  bestAreaIndexOnBestAxis = i;
275  overlapBestAreaAxis = overlapedAreas[i];
276  areaBestAreaAxis = areas[i];
277  }
278  }
279  }
280  }
281 
282  // Track the minOverlapSplit data
283  if (minOverlapSplitDimension != tree->Bound().Dim() &&
284  j == minOverlapSplitDimension)
285  {
286  for (size_t i = 0; i < overlapedAreas.size(); ++i)
287  {
288  if (overlapedAreas[i] < bestScoreMinOverlapSplit)
289  {
290  bestScoreMinOverlapSplit = overlapedAreas[i];
291  bestIndexMinOverlapSplit = i;
292  areaOfBestMinOverlapSplit = areas[i];
293  }
294  }
295  }
296  }
297 
298  // Now we do the same thing using Bound().Hi() and choose the best of the two.
299  for (size_t j = 0; j < tree->Bound().Dim(); ++j)
300  {
301  ElemType axisScore = 0.0;
302 
303  std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
304  for (size_t i = 0; i < sorted.size(); ++i)
305  {
306  sorted[i].first = tree->Child(i).Bound()[j].Hi();
307  sorted[i].second = &tree->Child(i);
308  }
309 
310  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
311 
312  // We'll store each of the three scores for each distribution.
313  std::vector<ElemType> areas(tree->MaxNumChildren() -
314  2 * tree->MinNumChildren() + 2);
315  std::vector<ElemType> margins(tree->MaxNumChildren() -
316  2 * tree->MinNumChildren() + 2);
317  std::vector<ElemType> overlapedAreas(tree->MaxNumChildren() -
318  2 * tree->MinNumChildren() + 2);
319  for (size_t i = 0; i < areas.size(); ++i)
320  {
321  areas[i] = 0.0;
322  margins[i] = 0.0;
323  overlapedAreas[i] = 0.0;
324  }
325 
326  for (size_t i = 0; i < areas.size(); ++i)
327  {
328  // The ith arrangement is obtained by placing the first
329  // tree->MinNumChildren() + i points in one rectangle and the rest in
330  // another. Then we calculate the three scores for that distribution.
331 
332  size_t cutOff = tree->MinNumChildren() + i;
333 
334  BoundType bound1(tree->Bound().Dim());
335  BoundType bound2(tree->Bound().Dim());
336 
337  for (size_t l = 0; l < cutOff; l++)
338  bound1 |= sorted[l].second->Bound();
339 
340  for (size_t l = cutOff; l < tree->NumChildren(); l++)
341  bound2 |= sorted[l].second->Bound();
342 
343  ElemType area1 = bound1.Volume();
344  ElemType area2 = bound2.Volume();
345  ElemType oArea = bound1.Overlap(bound2);
346 
347  for (size_t k = 0; k < bound1.Dim(); ++k)
348  margins[i] += bound1[k].Width() + bound2[k].Width();
349 
350 
351  areas[i] += area1 + area2;
352  overlapedAreas[i] += oArea;
353  axisScore += margins[i];
354  }
355 
356  if (axisScore < bestAxisScore)
357  {
358  bestAxisScore = axisScore;
359  bestAxis = j;
360  lowIsBest = false;
361  bestOverlapIndexOnBestAxis = 0;
362  bestAreaIndexOnBestAxis = 0;
363  overlapBestOverlapAxis = overlapedAreas[bestOverlapIndexOnBestAxis];
364  areaBestOverlapAxis = areas[bestAreaIndexOnBestAxis];
365  for (size_t i = 1; i < areas.size(); ++i)
366  {
367  if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis])
368  {
369  tiedOnOverlap = false;
370  bestAreaIndexOnBestAxis = i;
371  bestOverlapIndexOnBestAxis = i;
372  overlapBestOverlapAxis = overlapedAreas[i];
373  areaBestOverlapAxis = areas[i];
374  }
375  else if (overlapedAreas[i] ==
376  overlapedAreas[bestOverlapIndexOnBestAxis])
377  {
378  tiedOnOverlap = true;
379  if (areas[i] < areas[bestAreaIndexOnBestAxis])
380  {
381  bestAreaIndexOnBestAxis = i;
382  overlapBestAreaAxis = overlapedAreas[i];
383  areaBestAreaAxis = areas[i];
384  }
385  }
386  }
387  }
388 
389  // Track the minOverlapSplit data
390  if (minOverlapSplitDimension != tree->Bound().Dim() &&
391  j == minOverlapSplitDimension)
392  {
393  for (size_t i = 0; i < overlapedAreas.size(); ++i)
394  {
395  if (overlapedAreas[i] < bestScoreMinOverlapSplit)
396  {
397  minOverlapSplitUsesHi = true;
398  bestScoreMinOverlapSplit = overlapedAreas[i];
399  bestIndexMinOverlapSplit = i;
400  areaOfBestMinOverlapSplit = areas[i];
401  }
402  }
403  }
404  }
405 
406  std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
407  if (lowIsBest)
408  {
409  for (size_t i = 0; i < sorted.size(); ++i)
410  {
411  sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
412  sorted[i].second = &tree->Child(i);
413  }
414  }
415  else
416  {
417  for (size_t i = 0; i < sorted.size(); ++i)
418  {
419  sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
420  sorted[i].second = &tree->Child(i);
421  }
422  }
423 
424  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
425 
426  if (tree->Parent() != NULL)
427  {
428  // Reuse tree as the new child.
429  TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
430  const size_t numChildren = tree->NumChildren();
431  tree->numChildren = 0;
432  tree->count = 0;
433 
434  // Now as per the X-tree paper, we ensure that this split was good enough.
435  bool useMinOverlapSplit = false;
436  if (tiedOnOverlap)
437  {
438  if (areaBestAreaAxis > 0 &&
439  overlapBestAreaAxis / areaBestAreaAxis < MAX_OVERLAP)
440  {
441  tree->numDescendants = 0;
442  tree->bound.Clear();
443  for (size_t i = 0; i < numChildren; ++i)
444  {
445  if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
446  InsertNodeIntoTree(tree, sorted[i].second);
447  else
448  InsertNodeIntoTree(treeTwo, sorted[i].second);
449  }
450  }
451  else
452  useMinOverlapSplit = true;
453  }
454  else
455  {
456  if (overlapBestOverlapAxis / areaBestOverlapAxis < MAX_OVERLAP)
457  {
458  tree->numDescendants = 0;
459  tree->bound.Clear();
460  for (size_t i = 0; i < numChildren; ++i)
461  {
462  if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
463  InsertNodeIntoTree(tree, sorted[i].second);
464  else
465  InsertNodeIntoTree(treeTwo, sorted[i].second);
466  }
467  }
468  else
469  useMinOverlapSplit = true;
470  }
471 
472  // If the split was not good enough, then we try the minimal overlap split.
473  // If that fails, we create a "super node" (more accurately we resize this
474  // one to make it a super node).
475  if (useMinOverlapSplit)
476  {
477  // If there is a dimension that might work, try that.
478  if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
479  (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
480  {
481  std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
482  if (minOverlapSplitUsesHi)
483  {
484  for (size_t i = 0; i < sorted2.size(); ++i)
485  {
486  sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
487  sorted2[i].second = sorted[i].second;
488  }
489  }
490  else
491  {
492  for (size_t i = 0; i < sorted2.size(); ++i)
493  {
494  sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
495  sorted2[i].second = sorted[i].second;
496  }
497  }
498  std::sort(sorted2.begin(), sorted2.end(),
499  PairComp<ElemType, TreeType*>);
500 
501  tree->numDescendants = 0;
502  tree->bound.Clear();
503  for (size_t i = 0; i < numChildren; ++i)
504  {
505  if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
506  InsertNodeIntoTree(tree, sorted2[i].second);
507  else
508  InsertNodeIntoTree(treeTwo, sorted2[i].second);
509  }
510  }
511  else
512  {
513  // We don't create a supernode that would be the only child of the root.
514  // (Note that if you did try to do so you would need to update the
515  // parent field on each child of this new node as creating a supernode
516  // causes the function to return before that is done.
517 
518  // I thought commenting out the bellow would make the tree less
519  // efficient but would still work. It doesn't. I should look into that
520  // to see if there is another bug.
521 
522  if ((tree->Parent()->Parent() == NULL) &&
523  (tree->Parent()->NumChildren() == 1))
524  {
525  // We make the root a supernode instead.
526  tree->Parent()->MaxNumChildren() = tree->MaxNumChildren() +
527  tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
528  tree->Parent()->children.resize(tree->Parent()->MaxNumChildren() + 1);
529  tree->Parent()->NumChildren() = tree->NumChildren();
530  for (size_t i = 0; i < numChildren; ++i)
531  {
532  tree->Parent()->children[i] = sorted[i].second;
533  tree->Parent()->children[i]->Parent() = tree->Parent();
534  tree->children[i] = NULL;
535  }
536 
537  delete tree;
538  delete treeTwo;
539 
540  return false;
541  }
542 
543  // If we don't have to worry about the root, we just enlarge this node.
544  tree->MaxNumChildren() +=
545  tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
546  tree->children.resize(tree->MaxNumChildren() + 1);
547  tree->numChildren = numChildren;
548  for (size_t i = 0; i < numChildren; ++i)
549  tree->Child(i).Parent() = tree;
550 
551  delete treeTwo;
552  return false;
553  }
554  }
555 
556  // Update the split history of each child.
557  tree->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
558  tree->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
559  treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
560  treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
561 
562  // Remove this node and insert treeOne and treeTwo
563  TreeType* par = tree->Parent();
564  par->children[par->NumChildren()++] = treeTwo;
565 
566  // We only add one at a time, so we should only need to test for equality
567  // just in case, we use an assert.
568  if (!(par->NumChildren() <= par->MaxNumChildren() + 1))
569  Log::Debug << "error " << par->NumChildren() << ", "
570  << par->MaxNumChildren() + 1 << std::endl;
571  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
572 
573  if (par->NumChildren() == par->MaxNumChildren() + 1)
574  XTreeSplit::SplitNonLeafNode(par, relevels);
575 
576  // We have to update the children of each of these new nodes so that they
577  // record the correct parent.
578  for (size_t i = 0; i < treeTwo->NumChildren(); ++i)
579  treeTwo->Child(i).Parent() = treeTwo;
580 
581  assert(tree->Parent()->NumChildren() <=
582  tree->Parent()->MaxNumChildren());
583  assert(tree->Parent()->NumChildren() >=
584  tree->Parent()->MinNumChildren());
585  assert(treeTwo->Parent()->NumChildren() <=
586  treeTwo->Parent()->MaxNumChildren());
587  assert(treeTwo->Parent()->NumChildren() >=
588  treeTwo->Parent()->MinNumChildren());
589 
590  return false;
591  }
592  else
593  {
594  // We are the root of the tree, so we need to create two children to add.
595  TreeType* treeOne = new TreeType(tree, tree->MaxNumChildren());
596  TreeType* treeTwo = new TreeType(tree, tree->MaxNumChildren());
597  const size_t numChildren = tree->NumChildren();
598  tree->numChildren = 0;
599 
600  // Now as per the X-tree paper, we ensure that this split was good enough.
601  bool useMinOverlapSplit = false;
602  if (tiedOnOverlap)
603  {
604  if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
605  {
606  for (size_t i = 0; i < numChildren; ++i)
607  {
608  if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
609  InsertNodeIntoTree(treeOne, sorted[i].second);
610  else
611  InsertNodeIntoTree(treeTwo, sorted[i].second);
612  }
613  }
614  else
615  useMinOverlapSplit = true;
616  }
617  else
618  {
619  if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
620  {
621  for (size_t i = 0; i < numChildren; ++i)
622  {
623  if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
624  InsertNodeIntoTree(treeOne, sorted[i].second);
625  else
626  InsertNodeIntoTree(treeTwo, sorted[i].second);
627  }
628  }
629  else
630  useMinOverlapSplit = true;
631  }
632 
633  // If the split was not good enough, then we try the minimal overlap split.
634  // If that fails, we create a "super node" (more accurately we resize this
635  // one to make it a super node).
636  if (useMinOverlapSplit)
637  {
638  // If there is a dimension that might work, try that.
639  if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
640  (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
641  {
642  std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
643  if (minOverlapSplitUsesHi)
644  {
645  for (size_t i = 0; i < sorted2.size(); ++i)
646  {
647  sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
648  sorted2[i].second = sorted[i].second;
649  }
650  }
651  else
652  {
653  for (size_t i = 0; i < sorted2.size(); ++i)
654  {
655  sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
656  sorted2[i].second = sorted[i].second;
657  }
658  }
659  std::sort(sorted2.begin(), sorted2.end(),
660  PairComp<ElemType, TreeType*>);
661 
662  for (size_t i = 0; i < numChildren; ++i)
663  {
664  if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
665  InsertNodeIntoTree(treeOne, sorted2[i].second);
666  else
667  InsertNodeIntoTree(treeTwo, sorted2[i].second);
668  }
669  }
670  else
671  {
672  // Make this node a supernode.
673  tree->MaxNumChildren() +=
674  tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
675  tree->children.resize(tree->MaxNumChildren() + 1);
676  tree->numChildren = numChildren;
677  for (size_t i = 0; i < numChildren; ++i)
678  tree->Child(i).Parent() = tree;
679 
680  delete treeOne;
681  delete treeTwo;
682  return false;
683  }
684  }
685 
686  // Update the split history of each child.
687  treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
688  treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
689  treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
690  treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
691 
692  // Remove this node and insert treeOne and treeTwo
693  tree->children[0] = treeOne;
694  tree->children[1] = treeTwo;
695  tree->numChildren = 2;
696  tree->numDescendants = treeOne->numDescendants + treeTwo->numDescendants;
697 
698  // We have to update the children of each of these new nodes so that they
699  // record the correct parent.
700  for (size_t i = 0; i < treeOne->NumChildren(); ++i)
701  treeOne->Child(i).Parent() = treeOne;
702  for (size_t i = 0; i < treeTwo->NumChildren(); ++i)
703  treeTwo->Child(i).Parent() = treeTwo;
704 
705  return false;
706  }
707 }
708 
713 template<typename TreeType>
714 void XTreeSplit::InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
715 {
716  destTree->Bound() |= srcNode->Bound();
717  destTree->numDescendants += srcNode->numDescendants;
718  destTree->children[destTree->NumChildren()++] = srcNode;
719 }
720 
721 } // namespace tree
722 } // namespace mlpack
723 
724 #endif
static void SplitLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a leaf node using the algorithm described in "The R*-tree: An Efficient and Robust Access metho...
Definition: x_tree_split_impl.hpp:29
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
static void PickLeafSplit(TreeType *tree, size_t &bestAxis, size_t &bestIndex)
Given a node, return the best dimension and the best index to split on.
Definition: r_star_tree_split_impl.hpp:82
static bool SplitNonLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a non-leaf node using the "default" algorithm.
Definition: x_tree_split_impl.hpp:124
static size_t ReinsertPoints(TreeType *tree, std::vector< bool > &relevels)
Reinsert any points into the tree, if needed.
Definition: r_star_tree_split_impl.hpp:28
size_t Dim() const
Gets the dimensionality.
Definition: hrectbound.hpp:96
Definition of the Range class, which represents a simple range with a lower and upper bound...
const double MAX_OVERLAP
The X-tree paper says that a maximum allowable overlap of 20% works well.
Definition: x_tree_split.hpp:29