mlpack
r_tree_split_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_SPLIT_IMPL_HPP
13 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_SPLIT_IMPL_HPP
14 
15 #include "r_tree_split.hpp"
16 #include "rectangle_tree.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
28 template<typename TreeType>
29 void RTreeSplit::SplitLeafNode(TreeType *tree, std::vector<bool>& relevels)
30 {
31  if (tree->Count() <= tree->MaxLeafSize())
32  return;
33  // If we are splitting the root node, we need will do things differently so
34  // that the constructor and other methods don't confuse the end user by giving
35  // an address of another node.
36  if (tree->Parent() == NULL)
37  {
38  // We actually want to copy this way. Pointers and everything.
39  TreeType* copy = new TreeType(*tree, false);
40  copy->Parent() = tree;
41  tree->Count() = 0;
42  tree->NullifyData();
43  // Because this was a leaf node, numChildren must be 0.
44  tree->children[(tree->NumChildren())++] = copy;
45  RTreeSplit::SplitLeafNode(copy, relevels);
46  return;
47  }
48 
49  assert(tree->Parent()->NumChildren() <= tree->Parent()->MaxNumChildren());
50 
51  // Use the quadratic split method from: Guttman "R-Trees: A Dynamic Index
52  // Structure for Spatial Searching" It is simplified since we don't handle
53  // rectangles, only points. We assume that the tree uses Euclidean Distance.
54  int i = 0;
55  int j = 0;
56  RTreeSplit::GetPointSeeds(tree, i, j);
57 
58  TreeType* treeOne = new TreeType(tree->Parent());
59  TreeType* treeTwo = new TreeType(tree->Parent());
60 
61  // This will assign the ith and jth point appropriately.
62  AssignPointDestNode(tree, treeOne, treeTwo, i, j);
63 
64  // Remove this node and insert treeOne and treeTwo.
65  TreeType* par = tree->Parent();
66  size_t index = 0;
67  while (par->children[index] != tree) { ++index; }
68 
69  par->children[index] = treeOne;
70  par->children[par->NumChildren()++] = treeTwo;
71 
72  // We only add one at a time, so we should only need to test for equality
73  // just in case, we use an assert.
74  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
75  if (par->NumChildren() == par->MaxNumChildren() + 1)
76  RTreeSplit::SplitNonLeafNode(par, relevels);
77 
78  assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
79  assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
80  assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
81  assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
82 
83  // We need to delete this carefully since references to points are used.
84  tree->SoftDelete();
85 }
86 
94 template<typename TreeType>
95 bool RTreeSplit::SplitNonLeafNode(TreeType *tree, std::vector<bool>& relevels)
96 {
97  // If we are splitting the root node, we need will do things differently so
98  // that the constructor and other methods don't confuse the end user by giving
99  // an address of another node.
100  if (tree->Parent() == NULL)
101  {
102  // We actually want to copy this way. Pointers and everything.
103  TreeType* copy = new TreeType(*tree, false);
104  copy->Parent() = tree;
105  tree->NumChildren() = 0;
106  tree->NullifyData();
107  tree->children[(tree->NumChildren())++] = copy;
108  RTreeSplit::SplitNonLeafNode(copy, relevels);
109  return true;
110  }
111 
112  int i = 0;
113  int j = 0;
114  RTreeSplit::GetBoundSeeds(tree, i, j);
115 
116  assert(i != j);
117 
118  TreeType* treeOne = new TreeType(tree->Parent());
119  TreeType* treeTwo = new TreeType(tree->Parent());
120 
121  // This will assign the ith and jth rectangles appropriately.
122  AssignNodeDestNode(tree, treeOne, treeTwo, i, j);
123 
124  // Remove this node and insert treeOne and treeTwo.
125  TreeType* par = tree->Parent();
126  size_t index = 0;
127  while (par->children[index] != tree) { ++index; }
128 
129  assert(index != par->NumChildren());
130  par->children[index] = treeOne;
131  par->children[par->NumChildren()++] = treeTwo;
132 
133  for (size_t i = 0; i < par->NumChildren(); ++i)
134  assert(par->children[i] != tree);
135 
136  // We only add one at a time, so should only need to test for equality just in
137  // case, we use an assert.
138  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
139 
140  if (par->NumChildren() == par->MaxNumChildren() + 1)
141  RTreeSplit::SplitNonLeafNode(par, relevels);
142 
143  // We have to update the children of each of these new nodes so that they
144  // record the correct parent.
145  for (size_t i = 0; i < treeOne->NumChildren(); ++i)
146  treeOne->children[i]->Parent() = treeOne;
147 
148  for (size_t i = 0; i < treeTwo->NumChildren(); ++i)
149  treeTwo->children[i]->Parent() = treeTwo;
150 
151  assert(treeOne->NumChildren() <= treeOne->MaxNumChildren());
152  assert(treeTwo->NumChildren() <= treeTwo->MaxNumChildren());
153  assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
154 
155  // Because we now have pointers to the information stored under this tree,
156  // we need to delete this node carefully.
157  tree->SoftDelete(); // currently does nothing but leak memory.
158 
159  return false;
160 }
161 
166 template<typename TreeType>
167 void RTreeSplit::GetPointSeeds(const TreeType *tree, int& iRet, int& jRet)
168 {
169  // Here we want to find the pair of points that it is worst to place in the
170  // same node. Because we are just using points, we will simply choose the two
171  // that would create the most voluminous hyperrectangle.
172  typename TreeType::ElemType worstPairScore = -1.0;
173  for (size_t i = 0; i < tree->Count(); ++i)
174  {
175  for (size_t j = i + 1; j < tree->Count(); ++j)
176  {
177  const typename TreeType::ElemType score = arma::prod(arma::abs(
178  tree->Dataset().col(tree->Point(i)) -
179  tree->Dataset().col(tree->Point(j))));
180 
181  if (score > worstPairScore)
182  {
183  worstPairScore = score;
184  iRet = i;
185  jRet = j;
186  }
187  }
188  }
189 }
190 
195 template<typename TreeType>
196 void RTreeSplit::GetBoundSeeds(const TreeType *tree, int& iRet, int& jRet)
197 {
198  // Convenience typedef.
199  typedef typename TreeType::ElemType ElemType;
200 
201  ElemType worstPairScore = -1.0;
202  for (size_t i = 0; i < tree->NumChildren(); ++i)
203  {
204  for (size_t j = i + 1; j < tree->NumChildren(); ++j)
205  {
206  ElemType score = 1.0;
207  for (size_t k = 0; k < tree->Bound().Dim(); ++k)
208  {
209  const ElemType hiMax = std::max(tree->Child(i).Bound()[k].Hi(),
210  tree->Child(j).Bound()[k].Hi());
211  const ElemType loMin = std::min(tree->Child(i).Bound()[k].Lo(),
212  tree->Child(j).Bound()[k].Lo());
213  score *= (hiMax - loMin);
214  }
215 
216  if (score > worstPairScore)
217  {
218  worstPairScore = score;
219  iRet = i;
220  jRet = j;
221  }
222  }
223  }
224 }
225 
226 template<typename TreeType>
227 void RTreeSplit::AssignPointDestNode(TreeType* oldTree,
228  TreeType* treeOne,
229  TreeType* treeTwo,
230  const int intI,
231  const int intJ)
232 {
233  // Convenience typedef.
234  typedef typename TreeType::ElemType ElemType;
235 
236  size_t end = oldTree->Count();
237 
238  assert(end > 1); // If this isn't true, the tree is really weird.
239 
240  // Restart the point counts since we are going to move them.
241  oldTree->Count() = 0;
242  treeOne->Count() = 0;
243  treeTwo->Count() = 0;
244 
245  treeOne->InsertPoint(oldTree->Point(intI));
246  treeTwo->InsertPoint(oldTree->Point(intJ));
247 
248  // If intJ is the last point in the tree, we need to switch the order so that
249  // we remove the correct points.
250  if (intI > intJ)
251  {
252  oldTree->Point(intI) = oldTree->Point(--end); // Decrement end.
253  oldTree->Point(intJ) = oldTree->Point(--end); // Decrement end.
254  }
255  else
256  {
257  oldTree->Point(intJ) = oldTree->Point(--end); // Decrement end.
258  oldTree->Point(intI) = oldTree->Point(--end); // Decrement end.
259  }
260 
261  size_t numAssignedOne = 1;
262  size_t numAssignedTwo = 1;
263 
264  // In each iteration, we go through all points and find the one that causes
265  // the least increase of volume when added to one of the rectangles. We then
266  // add it to that rectangle. We stop when we run out of points or when all of
267  // the remaining points need to be assigned to the same rectangle to satisfy
268  // the minimum fill requirement.
269 
270  // The below is safe because if end decreases and the right hand side of the
271  // second part of the conjunction changes on the same iteration, we added the
272  // point to the node with fewer points anyways.
273  while ((end > 0) && (end > oldTree->MinLeafSize() -
274  std::min(numAssignedOne, numAssignedTwo)))
275  {
276  int bestIndex = 0;
277  ElemType bestScore = std::numeric_limits<ElemType>::max();
278  int bestRect = 1;
279 
280  // Calculate the increase in volume for assigning this point to each
281  // rectangle.
282 
283  // First, calculate the starting volume.
284  ElemType volOne = 1.0;
285  ElemType volTwo = 1.0;
286  for (size_t i = 0; i < oldTree->Bound().Dim(); ++i)
287  {
288  volOne *= treeOne->Bound()[i].Width();
289  volTwo *= treeTwo->Bound()[i].Width();
290  }
291 
292  // Find the point that, when assigned to one of the two new rectangles,
293  // minimizes the increase in volume.
294  for (size_t index = 0; index < end; index++)
295  {
296  ElemType newVolOne = 1.0;
297  ElemType newVolTwo = 1.0;
298  for (size_t i = 0; i < oldTree->Bound().Dim(); ++i)
299  {
300  ElemType c = oldTree->Dataset().col(oldTree->Point(index))[i];
301  newVolOne *= treeOne->Bound()[i].Contains(c) ?
302  treeOne->Bound()[i].Width() : (c < treeOne->Bound()[i].Lo() ?
303  (treeOne->Bound()[i].Hi() - c) : (c - treeOne->Bound()[i].Lo()));
304  newVolTwo *= treeTwo->Bound()[i].Contains(c) ?
305  treeTwo->Bound()[i].Width() : (c < treeTwo->Bound()[i].Lo() ?
306  (treeTwo->Bound()[i].Hi() - c) : (c - treeTwo->Bound()[i].Lo()));
307  }
308 
309  // Choose the rectangle that requires the lesser increase in volume.
310  if ((newVolOne - volOne) < (newVolTwo - volTwo))
311  {
312  if (newVolOne - volOne < bestScore)
313  {
314  bestScore = newVolOne - volOne;
315  bestIndex = index;
316  bestRect = 1;
317  }
318  }
319  else
320  {
321  if (newVolTwo - volTwo < bestScore)
322  {
323  bestScore = newVolTwo - volTwo;
324  bestIndex = index;
325  bestRect = 2;
326  }
327  }
328  }
329 
330  // Assign the point that causes the least increase in volume
331  // to the appropriate rectangle.
332  if (bestRect == 1)
333  {
334  treeOne->InsertPoint(oldTree->Point(bestIndex));
335  numAssignedOne++;
336  }
337  else
338  {
339  treeTwo->InsertPoint(oldTree->Point(bestIndex));
340  numAssignedTwo++;
341  }
342 
343  oldTree->Point(bestIndex) = oldTree->Point(--end); // Decrement end.
344  }
345 
346  // See if we need to satisfy the minimum fill.
347  if (end > 0)
348  {
349  if (numAssignedOne < numAssignedTwo)
350  {
351  for (size_t i = 0; i < end; ++i)
352  treeOne->InsertPoint(oldTree->Point(i));
353  }
354  else
355  {
356  for (size_t i = 0; i < end; ++i)
357  treeTwo->InsertPoint(oldTree->Point(i));
358  }
359  }
360 }
361 
362 template<typename TreeType>
363 void RTreeSplit::AssignNodeDestNode(TreeType* oldTree,
364  TreeType* treeOne,
365  TreeType* treeTwo,
366  const int intI,
367  const int intJ)
368 {
369  // Convenience typedef.
370  typedef typename TreeType::ElemType ElemType;
371 
372  size_t end = oldTree->NumChildren();
373  assert(end > 1); // If this isn't true, the tree is really weird.
374 
375  assert(intI != intJ);
376 
377  for (size_t i = 0; i < oldTree->NumChildren(); ++i)
378  for (size_t j = i + 1; j < oldTree->NumChildren(); ++j)
379  assert(oldTree->children[i] != oldTree->children[j]);
380 
381  InsertNodeIntoTree(treeOne, oldTree->children[intI]);
382  InsertNodeIntoTree(treeTwo, oldTree->children[intJ]);
383 
384  // If intJ is the last node in the tree, we need to switch the order so that
385  // we remove the correct nodes.
386  if (intI > intJ)
387  {
388  oldTree->children[intI] = oldTree->children[--end];
389  oldTree->children[intJ] = oldTree->children[--end];
390  }
391  else
392  {
393  oldTree->children[intJ] = oldTree->children[--end];
394  oldTree->children[intI] = oldTree->children[--end];
395  }
396 
397  assert(treeOne->NumChildren() == 1);
398  assert(treeTwo->NumChildren() == 1);
399 
400  for (size_t i = 0; i < end; ++i)
401  for (size_t j = i + 1; j < end; ++j)
402  assert(oldTree->children[i] != oldTree->children[j]);
403 
404  for (size_t i = 0; i < end; ++i)
405  assert(oldTree->children[i] != treeOne->children[0]);
406 
407  for (size_t i = 0; i < end; ++i)
408  assert(oldTree->children[i] != treeTwo->children[0]);
409 
410  size_t numAssignTreeOne = 1;
411  size_t numAssignTreeTwo = 1;
412 
413  // In each iteration, we go through all of the nodes and find the one that
414  // causes the least increase of volume when added to one of the two new
415  // rectangles. We then add it to that rectangle.
416  while ((end > 0) && (end > oldTree->MinNumChildren() -
417  std::min(numAssignTreeOne, numAssignTreeTwo)))
418  {
419  int bestIndex = 0;
420  ElemType bestScore = std::numeric_limits<ElemType>::max();
421  int bestRect = 0;
422 
423  // Calculate the increase in volume for assigning this node to each of the
424  // new rectangles.
425  ElemType volOne = 1.0;
426  ElemType volTwo = 1.0;
427  for (size_t i = 0; i < oldTree->Bound().Dim(); ++i)
428  {
429  volOne *= treeOne->Bound()[i].Width();
430  volTwo *= treeTwo->Bound()[i].Width();
431  }
432 
433  for (size_t index = 0; index < end; index++)
434  {
435  ElemType newVolOne = 1.0;
436  ElemType newVolTwo = 1.0;
437  for (size_t i = 0; i < oldTree->Bound().Dim(); ++i)
438  {
439  // For each of the new rectangles, find the width in this dimension if
440  // we add the rectangle at index to the new rectangle.
441  const math::RangeType<ElemType>& range =
442  oldTree->Child(index).Bound()[i];
443  newVolOne *= treeOne->Bound()[i].Contains(range) ?
444  treeOne->Bound()[i].Width() : (range.Contains(treeOne->Bound()[i]) ?
445  range.Width() : (range.Lo() < treeOne->Bound()[i].Lo() ?
446  (treeOne->Bound()[i].Hi() - range.Lo()) : (range.Hi() -
447  treeOne->Bound()[i].Lo())));
448 
449  newVolTwo *= treeTwo->Bound()[i].Contains(range) ?
450  treeTwo->Bound()[i].Width() : (range.Contains(treeTwo->Bound()[i]) ?
451  range.Width() : (range.Lo() < treeTwo->Bound()[i].Lo() ?
452  (treeTwo->Bound()[i].Hi() - range.Lo()) : (range.Hi() -
453  treeTwo->Bound()[i].Lo())));
454  }
455 
456  // Choose the rectangle that requires the lesser increase in volume.
457  if ((newVolOne - volOne) < (newVolTwo - volTwo))
458  {
459  if (newVolOne - volOne < bestScore)
460  {
461  bestScore = newVolOne - volOne;
462  bestIndex = index;
463  bestRect = 1;
464  }
465  }
466  else
467  {
468  if (newVolTwo - volTwo < bestScore)
469  {
470  bestScore = newVolTwo - volTwo;
471  bestIndex = index;
472  bestRect = 2;
473  }
474  }
475  }
476 
477  // Assign the rectangle that causes the least increase in volume
478  // to the appropriate rectangle.
479  if (bestRect == 1)
480  {
481  InsertNodeIntoTree(treeOne, oldTree->children[bestIndex]);
482  numAssignTreeOne++;
483  }
484  else
485  {
486  InsertNodeIntoTree(treeTwo, oldTree->children[bestIndex]);
487  numAssignTreeTwo++;
488  }
489 
490  oldTree->children[bestIndex] = oldTree->children[--end];
491  }
492 
493  // See if we need to satisfy the minimum fill.
494  if (end > 0)
495  {
496  if (numAssignTreeOne < numAssignTreeTwo)
497  {
498  for (size_t i = 0; i < end; ++i)
499  {
500  InsertNodeIntoTree(treeOne, oldTree->children[i]);
501  numAssignTreeOne++;
502  }
503  }
504  else
505  {
506  for (size_t i = 0; i < end; ++i)
507  {
508  InsertNodeIntoTree(treeTwo, oldTree->children[i]);
509  numAssignTreeTwo++;
510  }
511  }
512  }
513 
514  for (size_t i = 0; i < treeOne->NumChildren(); ++i)
515  for (size_t j = i + 1; j < treeOne->NumChildren(); ++j)
516  assert(treeOne->children[i] != treeOne->children[j]);
517 
518  for (size_t i = 0; i < treeTwo->NumChildren(); ++i)
519  for (size_t j = i + 1; j < treeTwo->NumChildren(); ++j)
520  assert(treeTwo->children[i] != treeTwo->children[j]);
521 }
522 
527 template<typename TreeType>
528 void RTreeSplit::InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
529 {
530  destTree->Bound() |= srcNode->Bound();
531  destTree->numDescendants += srcNode->numDescendants;
532  destTree->children[destTree->NumChildren()++] = srcNode;
533 }
534 
535 } // namespace tree
536 } // namespace mlpack
537 
538 #endif
T Lo() const
Get the lower bound.
Definition: range.hpp:61
static bool SplitNonLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a non-leaf node using the "default" algorithm.
Definition: r_tree_split_impl.hpp:95
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
static void SplitLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a leaf node using the "default" algorithm.
Definition: r_tree_split_impl.hpp:29
T Hi() const
Get the upper bound.
Definition: range.hpp:66
bool Contains(const T d) const
Determines if a point is contained within the range.
Definition: range_impl.hpp:187
Definition of the Range class, which represents a simple range with a lower and upper bound...
T Width() const
Gets the span of the range (hi - lo).
Definition: range_impl.hpp:47