mlpack
hilbert_r_tree_split_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_SPLIT_IMPL_HPP
13 #define MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_SPLIT_IMPL_HPP
14 
15 #include "hilbert_r_tree_split.hpp"
16 #include "rectangle_tree.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
22 template<size_t splitOrder>
23 template<typename TreeType>
25  std::vector<bool>& relevels)
26 {
27  if (tree->Count() <= tree->MaxLeafSize())
28  return;
29  // If we are splitting the root node, we need will do things differently so
30  // that the constructor and other methods don't confuse the end user by giving
31  // an address of another node.
32  if (tree->Parent() == NULL)
33  {
34  // We actually want to copy this way. Pointers and everything.
35  TreeType* copy = new TreeType(*tree, false);
36  // Only the root node owns this variable.
37  copy->AuxiliaryInfo().HilbertValue().OwnsValueToInsert() = false;
38  // Only leaf nodes own this variable.
39  tree->AuxiliaryInfo().HilbertValue().OwnsLocalHilbertValues() = false;
40  copy->Parent() = tree;
41  tree->Count() = 0;
42  tree->NullifyData();
43  // Because this was a leaf node, numChildren must be 0.
44  tree->children[(tree->NumChildren())++] = copy;
45  SplitLeafNode(copy, relevels);
46  return;
47  }
48 
49  TreeType* parent = tree->Parent();
50  size_t iTree = 0;
51  for (iTree = 0; parent->children[iTree] != tree; iTree++) { }
52 
53  // Try to find splitOrder cooperating siblings in order to redistribute points
54  // among them and avoid split.
55  size_t firstSibling, lastSibling;
56  if (FindCooperatingSiblings(parent, iTree, firstSibling, lastSibling))
57  {
58  RedistributePointsEvenly(parent, firstSibling, lastSibling);
59  return;
60  }
61 
62  // We can not find splitOrder cooperating siblings since they are all full.
63  // We introduce new one instead.
64  size_t iNewSibling = (iTree + splitOrder < parent->NumChildren() ?
65  iTree + splitOrder : parent->NumChildren());
66 
67  for (size_t i = parent->NumChildren(); i > iNewSibling ; i--)
68  parent->children[i] = parent->children[i - 1];
69 
70  parent->NumChildren()++;
71 
72  parent->children[iNewSibling] = new TreeType(parent);
73 
74  lastSibling = (iTree + splitOrder < parent->NumChildren() ?
75  iTree + splitOrder : parent->NumChildren() - 1);
76  firstSibling = (lastSibling > splitOrder ? lastSibling - splitOrder : 0);
77 
78  assert(lastSibling - firstSibling <= splitOrder);
79  assert(lastSibling < parent->NumChildren());
80 
81  // Redistribute the points among (splitOrder + 1) cooperating siblings evenly.
82  RedistributePointsEvenly(parent, firstSibling, lastSibling);
83 
84  if (parent->NumChildren() == parent->MaxNumChildren() + 1)
85  SplitNonLeafNode(parent, relevels);
86 }
87 
88 template<size_t splitOrder>
89 template<typename TreeType>
91 SplitNonLeafNode(TreeType* tree, std::vector<bool>& relevels)
92 {
93  // If we are splitting the root node, we need will do things differently so
94  // that the constructor and other methods don't confuse the end user by giving
95  // an address of another node.
96  if (tree->Parent() == NULL)
97  {
98  // We actually want to copy this way. Pointers and everything.
99  TreeType* copy = new TreeType(*tree, false);
100  // Only the root node owns this variable.
101  copy->AuxiliaryInfo().HilbertValue().OwnsValueToInsert() = false;
102  copy->Parent() = tree;
103  tree->NumChildren() = 0;
104  tree->NullifyData();
105  tree->children[(tree->NumChildren())++] = copy;
106 
107  SplitNonLeafNode(copy, relevels);
108  return true;
109  }
110 
111  TreeType* parent = tree->Parent();
112 
113  size_t iTree = 0;
114  for (iTree = 0; parent->children[iTree] != tree; iTree++) { }
115 
116  // Try to find splitOrder cooperating siblings in order to redistribute
117  // children among them and avoid split.
118  size_t firstSibling, lastSibling;
119  if (FindCooperatingSiblings(parent, iTree, firstSibling, lastSibling))
120  {
121  RedistributeNodesEvenly(parent, firstSibling, lastSibling);
122  return false;
123  }
124 
125  // We can not find splitOrder cooperating siblings since they are all full.
126  // We introduce new one instead.
127  size_t iNewSibling = (iTree + splitOrder < parent->NumChildren() ?
128  iTree + splitOrder : parent->NumChildren());
129 
130  for (size_t i = parent->NumChildren(); i > iNewSibling ; i--)
131  parent->children[i] = parent->children[i - 1];
132 
133  parent->NumChildren()++;
134 
135  parent->children[iNewSibling] = new TreeType(parent);
136 
137  lastSibling = (iTree + splitOrder < parent->NumChildren() ?
138  iTree + splitOrder : parent->NumChildren() - 1);
139  firstSibling = (lastSibling > splitOrder ?
140  lastSibling - splitOrder : 0);
141 
142  assert(lastSibling - firstSibling <= splitOrder);
143  assert(lastSibling < parent->NumChildren());
144 
145  // Redistribute children among (splitOrder + 1) cooperating siblings evenly.
146  RedistributeNodesEvenly(parent, firstSibling, lastSibling);
147 
148  if (parent->NumChildren() == parent->MaxNumChildren() + 1)
149  SplitNonLeafNode(parent, relevels);
150  return false;
151 }
152 
153 template<size_t splitOrder>
154 template<typename TreeType>
156  TreeType* parent,
157  const size_t iTree,
158  size_t& firstSibling,
159  size_t& lastSibling)
160 {
161  size_t start = (iTree > splitOrder - 1 ? iTree - splitOrder + 1 : 0);
162  size_t end = (iTree + splitOrder <= parent->NumChildren() ?
163  iTree + splitOrder : parent->NumChildren());
164 
165  size_t iUnderfullSibling;
166 
167  // Try to find empty space among cooperating siblings.
168  if (parent->Child(iTree).NumChildren() != 0)
169  {
170  for (iUnderfullSibling = start; iUnderfullSibling < end;
171  iUnderfullSibling++)
172  if (parent->Child(iUnderfullSibling).NumChildren() <
173  parent->Child(iUnderfullSibling).MaxNumChildren() - 1)
174  break;
175  }
176  else
177  {
178  for (iUnderfullSibling = start; iUnderfullSibling < end;
179  iUnderfullSibling++)
180  if (parent->Child(iUnderfullSibling).NumPoints() <
181  parent->Child(iUnderfullSibling).MaxLeafSize() - 1)
182  break;
183  }
184 
185  if (iUnderfullSibling == end) // All nodes are full.
186  return false;
187 
188  if (iUnderfullSibling > iTree)
189  {
190  lastSibling = (iTree + splitOrder - 1 < parent->NumChildren() ?
191  iTree + splitOrder - 1 : parent->NumChildren() - 1);
192  firstSibling = (lastSibling > splitOrder - 1 ?
193  lastSibling - splitOrder + 1 : 0);
194  }
195  else
196  {
197  lastSibling = (iUnderfullSibling + splitOrder - 1 < parent->NumChildren() ?
198  iUnderfullSibling + splitOrder - 1 : parent->NumChildren() - 1);
199  firstSibling = (lastSibling > splitOrder - 1 ?
200  lastSibling - splitOrder + 1 : 0);
201  }
202 
203  assert(lastSibling - firstSibling <= splitOrder - 1);
204  assert(lastSibling < parent->NumChildren());
205 
206  return true;
207 }
208 
209 template<size_t splitOrder>
210 template<typename TreeType>
212 RedistributeNodesEvenly(const TreeType *parent,
213  size_t firstSibling, size_t lastSibling)
214 {
215  size_t numChildren = 0;
216  size_t numChildrenPerNode, numRestChildren;
217 
218  for (size_t i = firstSibling; i <= lastSibling; ++i)
219  numChildren += parent->Child(i).NumChildren();
220 
221  numChildrenPerNode = numChildren / (lastSibling - firstSibling + 1);
222  numRestChildren = numChildren % (lastSibling - firstSibling + 1);
223 
224  std::vector<TreeType*> children(numChildren);
225 
226  // Copy children's children in order to redistribute them.
227  size_t iChild = 0;
228  for (size_t i = firstSibling; i <= lastSibling; ++i)
229  {
230  for (size_t j = 0; j < parent->Child(i).NumChildren(); ++j)
231  {
232  children[iChild] = parent->Child(i).children[j];
233  iChild++;
234  }
235  }
236 
237  iChild = 0;
238  for (size_t i = firstSibling; i <= lastSibling; ++i)
239  {
240  // Since we redistribute children of a sibling we should recalculate the
241  // bound.
242  parent->Child(i).Bound().Clear();
243  parent->Child(i).numDescendants = 0;
244 
245  for (size_t j = 0; j < numChildrenPerNode; ++j)
246  {
247  parent->Child(i).Bound() |= children[iChild]->Bound();
248  parent->Child(i).numDescendants += children[iChild]->numDescendants;
249  parent->Child(i).children[j] = children[iChild];
250  children[iChild]->Parent() = parent->children[i];
251  iChild++;
252  }
253  if (numRestChildren > 0)
254  {
255  parent->Child(i).Bound() |= children[iChild]->Bound();
256  parent->Child(i).numDescendants += children[iChild]->numDescendants;
257  parent->Child(i).children[numChildrenPerNode] = children[iChild];
258  children[iChild]->Parent() = parent->children[i];
259  parent->Child(i).NumChildren() = numChildrenPerNode + 1;
260  numRestChildren--;
261  iChild++;
262  }
263  else
264  {
265  parent->Child(i).NumChildren() = numChildrenPerNode;
266  }
267  assert(parent->Child(i).NumChildren() <=
268  parent->Child(i).MaxNumChildren());
269 
270  // Fix the largest Hilbert value of the sibling.
271  parent->Child(i).AuxiliaryInfo().HilbertValue().UpdateLargestValue(
272  parent->children[i]);
273  }
274 }
275 
276 template<size_t splitOrder>
277 template<typename TreeType>
279 RedistributePointsEvenly(TreeType* parent,
280  const size_t firstSibling,
281  const size_t lastSibling)
282 {
283  size_t numPoints = 0;
284  size_t numPointsPerNode, numRestPoints;
285 
286  for (size_t i = firstSibling; i <= lastSibling; ++i)
287  numPoints += parent->Child(i).NumPoints();
288 
289  numPointsPerNode = numPoints / (lastSibling - firstSibling + 1);
290  numRestPoints = numPoints % (lastSibling - firstSibling + 1);
291 
292  std::vector<size_t> points(numPoints);
293 
294  // Copy children's points in order to redistribute them.
295  size_t iPoint = 0;
296  for (size_t i = firstSibling; i <= lastSibling; ++i)
297  {
298  for (size_t j = 0; j < parent->Child(i).NumPoints(); ++j)
299  points[iPoint++] = parent->Child(i).Point(j);
300  }
301 
302  iPoint = 0;
303  for (size_t i = firstSibling; i <= lastSibling; ++i)
304  {
305  // Since we redistribute points of a sibling we should recalculate the
306  // bound.
307  parent->Child(i).Bound().Clear();
308 
309  size_t j;
310  for (j = 0; j < numPointsPerNode; ++j)
311  {
312  parent->Child(i).Bound() |= parent->Dataset().col(points[iPoint]);
313  parent->Child(i).Point(j) = points[iPoint];
314  iPoint++;
315  }
316  if (numRestPoints > 0)
317  {
318  parent->Child(i).Bound() |= parent->Dataset().col(points[iPoint]);
319  parent->Child(i).Point(j) = points[iPoint];
320  parent->Child(i).Count() = numPointsPerNode + 1;
321  numRestPoints--;
322  iPoint++;
323  }
324  else
325  {
326  parent->Child(i).Count() = numPointsPerNode;
327  }
328  parent->Child(i).numDescendants = parent->Child(i).Count();
329 
330  assert(parent->Child(i).NumPoints() <=
331  parent->Child(i).MaxLeafSize());
332  }
333 
334  // Fix the largest Hilbert values of the siblings.
335  parent->AuxiliaryInfo().HilbertValue().RedistributeHilbertValues(parent,
336  firstSibling, lastSibling);
337 
338  TreeType* root = parent;
339 
340  while (root != NULL)
341  {
342  root->AuxiliaryInfo().HilbertValue().UpdateLargestValue(root);
343  root = root->Parent();
344  }
345 }
346 
347 } // namespace tree
348 } // namespace mlpack
349 
350 #endif // MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_SPLIT_IMPL_HPP
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The splitting procedure for the Hilbert R tree.
Definition: hilbert_r_tree_split.hpp:29
static bool SplitNonLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a non-leaf node using the "default" algorithm.
Definition: hilbert_r_tree_split_impl.hpp:91
static void SplitLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a leaf node using the "default" algorithm.
Definition: hilbert_r_tree_split_impl.hpp:24
Definition of the Range class, which represents a simple range with a lower and upper bound...