mlpack
r_plus_tree_split_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
13 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
14 
15 #include "r_plus_tree_split.hpp"
19 
20 namespace mlpack {
21 namespace tree {
22 
23 template<typename SplitPolicyType,
24  template<typename> class SweepType>
25 template<typename TreeType>
27 SplitLeafNode(TreeType* tree, std::vector<bool>& relevels)
28 {
29  typedef typename TreeType::ElemType ElemType;
30 
31  if (tree->Count() == 1)
32  {
33  // Check if an intermediate node was added during the insertion process.
34  // i.e. we couldn't enlarge a node of the R+ tree. So, one of intermediate
35  // nodes may be overflowed.
36  TreeType* node = tree->Parent();
37 
38  while (node != NULL)
39  {
40  if (node->NumChildren() == node->MaxNumChildren() + 1)
41  {
42  // Split the overflowed node.
43  RPlusTreeSplit::SplitNonLeafNode(node, relevels);
44  return;
45  }
46  node = node->Parent();
47  }
48  return;
49  }
50  else if (tree->Count() <= tree->MaxLeafSize())
51  {
52  return;
53  }
54 
55  // If we are splitting the root node, we need will do things differently so
56  // that the constructor and other methods don't confuse the end user by giving
57  // an address of another node.
58  if (tree->Parent() == NULL)
59  {
60  // We actually want to copy this way. Pointers and everything.
61  TreeType* copy = new TreeType(*tree, false);
62  copy->Parent() = tree;
63  tree->Count() = 0;
64  tree->NullifyData();
65  // Because this was a leaf node, numChildren must be 0.
66  tree->children[(tree->NumChildren())++] = copy;
67  assert(tree->NumChildren() == 1);
68 
69  RPlusTreeSplit::SplitLeafNode(copy, relevels);
70  return;
71  }
72 
73  size_t cutAxis = tree->Bound().Dim();
74  ElemType cut = std::numeric_limits<ElemType>::lowest();
75 
76  // Try to find a partiotion of the node.
77  if (!PartitionNode(tree, cutAxis, cut))
78  return;
79 
80  // If we could not find a suitable partition.
81  if (cutAxis == tree->Bound().Dim())
82  {
83  tree->MaxLeafSize()++;
84  tree->points.resize(tree->MaxLeafSize() + 1);
85  Log::Warn << "Could not find an acceptable partition."
86  "The size of the node will be increased.";
87  return;
88  }
89 
90  TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
91  TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
92  treeOne->MinLeafSize() = 0;
93  treeOne->MinNumChildren() = 0;
94  treeTwo->MinLeafSize() = 0;
95  treeTwo->MinNumChildren() = 0;
96 
97  // Split the node into two new nodes.
98  SplitLeafNodeAlongPartition(tree, treeOne, treeTwo, cutAxis, cut);
99 
100  TreeType* parent = tree->Parent();
101  size_t i = 0;
102  while (parent->children[i] != tree)
103  ++i;
104 
105  assert(i < parent->NumChildren());
106 
107  // Insert two new nodes to the tree.
108  parent->children[i] = treeOne;
109  parent->children[parent->NumChildren()++] = treeTwo;
110 
111  assert(parent->NumChildren() <= parent->MaxNumChildren() + 1);
112 
113  // Propagate the split upward if necessary.
114  if (parent->NumChildren() == parent->MaxNumChildren() + 1)
115  RPlusTreeSplit::SplitNonLeafNode(parent, relevels);
116 
117  tree->SoftDelete();
118 }
119 
120 template<typename SplitPolicyType,
121  template<typename> class SweepType>
122 template<typename TreeType>
124 SplitNonLeafNode(TreeType* tree, std::vector<bool>& relevels)
125 {
126  typedef typename TreeType::ElemType ElemType;
127  // If we are splitting the root node, we need will do things differently so
128  // that the constructor and other methods don't confuse the end user by giving
129  // an address of another node.
130  if (tree->Parent() == NULL)
131  {
132  // We actually want to copy this way. Pointers and everything.
133  TreeType* copy = new TreeType(*tree, false);
134 
135  copy->Parent() = tree;
136  tree->NumChildren() = 0;
137  tree->NullifyData();
138  tree->children[(tree->NumChildren())++] = copy;
139 
140  RPlusTreeSplit::SplitNonLeafNode(copy, relevels);
141  return true;
142  }
143  size_t cutAxis = tree->Bound().Dim();
144  ElemType cut = std::numeric_limits<ElemType>::lowest();
145 
146  // Try to find a partiotion of the node.
147  if ( !PartitionNode(tree, cutAxis, cut))
148  return false;
149 
150  // If we could not find a suitable partition.
151  if (cutAxis == tree->Bound().Dim())
152  {
153  tree->MaxNumChildren()++;
154  tree->children.resize(tree->MaxNumChildren() + 1);
155  Log::Warn << "Could not find an acceptable partition."
156  "The size of the node will be increased.";
157  return false;
158  }
159 
160  TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
161  TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
162  treeOne->MinLeafSize() = 0;
163  treeOne->MinNumChildren() = 0;
164  treeTwo->MinLeafSize() = 0;
165  treeTwo->MinNumChildren() = 0;
166 
167  // Split the node into two new nodes.
168  SplitNonLeafNodeAlongPartition(tree, treeOne, treeTwo, cutAxis, cut);
169 
170  TreeType* parent = tree->Parent();
171  size_t i = 0;
172  while (parent->children[i] != tree)
173  ++i;
174 
175  assert(i < parent->NumChildren());
176 
177  // Insert two new nodes to the tree.
178  parent->children[i] = treeOne;
179  parent->children[parent->NumChildren()++] = treeTwo;
180 
181  tree->SoftDelete();
182 
183  assert(parent->NumChildren() <= parent->MaxNumChildren() + 1);
184 
185  // Propagate the split upward if necessary.
186  if (parent->NumChildren() == parent->MaxNumChildren() + 1)
187  RPlusTreeSplit::SplitNonLeafNode(parent, relevels);
188 
189  return false;
190 }
191 
192 template<typename SplitPolicyType,
193  template<typename> class SweepType>
194 template<typename TreeType>
196  TreeType* tree,
197  TreeType* treeOne,
198  TreeType* treeTwo,
199  const size_t cutAxis,
200  const typename TreeType::ElemType cut)
201 {
202  // Split the auxiliary information.
203  tree->AuxiliaryInfo().SplitAuxiliaryInfo(treeOne, treeTwo, cutAxis, cut);
204 
205  // Ensure that the capacity of the nodes is sufficient.
206  if (treeOne->MaxLeafSize() < tree->NumPoints())
207  {
208  treeOne->MaxLeafSize() = tree->NumPoints();
209  treeOne->points.resize(treeOne->MaxLeafSize() + 1);
210  }
211 
212  if (treeTwo->MaxLeafSize() < tree->NumPoints())
213  {
214  treeTwo->MaxLeafSize() = tree->NumPoints();
215  treeTwo->points.resize(treeTwo->MaxLeafSize() + 1);
216  }
217 
218  // Insert points into the corresponding subtree.
219  for (size_t i = 0; i < tree->NumPoints(); ++i)
220  {
221  if (tree->Dataset().col(tree->Point(i))[cutAxis] <= cut)
222  {
223  treeOne->Point(treeOne->Count()++) = tree->Point(i);
224  treeOne->Bound() |= tree->Dataset().col(tree->Point(i));
225  }
226  else
227  {
228  treeTwo->Point(treeTwo->Count()++) = tree->Point(i);
229  treeTwo->Bound() |= tree->Dataset().col(tree->Point(i));
230  }
231  }
232  // Update the number of descandants.
233  treeOne->numDescendants = treeOne->Count();
234  treeTwo->numDescendants = treeTwo->Count();
235 
236  assert(treeOne->Count() <= treeOne->MaxLeafSize());
237  assert(treeTwo->Count() <= treeTwo->MaxLeafSize());
238 
239  assert(tree->Count() == treeOne->Count() + treeTwo->Count());
240  assert(treeOne->Bound()[cutAxis].Hi() < treeTwo->Bound()[cutAxis].Lo());
241 }
242 
243 template<typename SplitPolicyType,
244  template<typename> class SweepType>
245 template<typename TreeType>
247  TreeType* tree,
248  TreeType* treeOne,
249  TreeType* treeTwo,
250  const size_t cutAxis,
251  const typename TreeType::ElemType cut)
252 {
253  // Split the auxiliary information.
254  tree->AuxiliaryInfo().SplitAuxiliaryInfo(treeOne, treeTwo, cutAxis, cut);
255 
256  // Insert children into the corresponding subtree.
257  for (size_t i = 0; i < tree->NumChildren(); ++i)
258  {
259  TreeType* child = tree->children[i];
260  int policy = SplitPolicyType::GetSplitPolicy(*child, cutAxis, cut);
261 
262  if (policy == SplitPolicyType::AssignToFirstTree)
263  {
264  InsertNodeIntoTree(treeOne, child);
265  child->Parent() = treeOne;
266  }
267  else if (policy == SplitPolicyType::AssignToSecondTree)
268  {
269  InsertNodeIntoTree(treeTwo, child);
270  child->Parent() = treeTwo;
271  }
272  else
273  {
274  // The child should be split (i.e. the partition divides its bound).
275  TreeType* childOne = new TreeType(treeOne);
276  TreeType* childTwo = new TreeType(treeTwo);
277  treeOne->MinLeafSize() = 0;
278  treeOne->MinNumChildren() = 0;
279  treeTwo->MinLeafSize() = 0;
280  treeTwo->MinNumChildren() = 0;
281 
282  // Propagate the split downward.
283  if (child->IsLeaf())
284  SplitLeafNodeAlongPartition(child, childOne, childTwo, cutAxis, cut);
285  else
286  SplitNonLeafNodeAlongPartition(child, childOne, childTwo, cutAxis, cut);
287 
288  InsertNodeIntoTree(treeOne, childOne);
289  InsertNodeIntoTree(treeTwo, childTwo);
290 
291  child->SoftDelete();
292  }
293  }
294 
295  assert(treeOne->NumChildren() + treeTwo->NumChildren() != 0);
296 
297  // Add a fake subtree if one of the subtrees is empty.
298  if (treeOne->NumChildren() == 0)
299  AddFakeNodes(treeTwo, treeOne);
300  else if (treeTwo->NumChildren() == 0)
301  AddFakeNodes(treeOne, treeTwo);
302 
303  assert(treeOne->NumChildren() <= treeOne->MaxNumChildren());
304  assert(treeTwo->NumChildren() <= treeTwo->MaxNumChildren());
305 }
306 
307 template<typename SplitPolicyType,
308  template<typename> class SweepType>
309 template<typename TreeType>
311 AddFakeNodes(const TreeType* tree, TreeType* emptyTree)
312 {
313  size_t numDescendantNodes = tree->TreeDepth() - 1;
314 
315  TreeType* node = emptyTree;
316  for (size_t i = 0; i < numDescendantNodes; ++i)
317  {
318  TreeType* child = new TreeType(node);
319  node->children[node->NumChildren()++] = child;
320 
321  node = child;
322  }
323 }
324 
325 template<typename SplitPolicyType,
326  template<typename> class SweepType>
327 template<typename TreeType>
329 PartitionNode(const TreeType* node, size_t& minCutAxis,
330  typename TreeType::ElemType& minCut)
331 {
332  if ((node->NumChildren() <= node->MaxNumChildren() && !node->IsLeaf()) ||
333  (node->Count() <= node->MaxLeafSize() && node->IsLeaf()))
334  return false; // No partition required.
335 
336  // Define the type of the sweep cost.
337  typedef typename
338  SweepType<SplitPolicyType>::template SweepCost<TreeType>::type
339  SweepCostType;
340 
341  SweepCostType minCost = std::numeric_limits<SweepCostType>::max();
342  minCutAxis = node->Bound().Dim();
343 
344  // Find the sweep with a minimal cost.
345  for (size_t k = 0; k < node->Bound().Dim(); ++k)
346  {
347  typename TreeType::ElemType cut;
348  SweepCostType cost;
349 
350  if (node->IsLeaf())
351  cost = SweepType<SplitPolicyType>::SweepLeafNode(k, node, cut);
352  else
353  cost = SweepType<SplitPolicyType>::SweepNonLeafNode(k, node, cut);
354 
355  if (cost < minCost)
356  {
357  minCost = cost;
358  minCutAxis = k;
359  minCut = cut;
360  }
361  }
362 
363  return true;
364 }
365 
366 template<typename SplitPolicyType,
367  template<typename> class SweepType>
368 template<typename TreeType>
370 InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
371 {
372  destTree->Bound() |= srcNode->Bound();
373  destTree->numDescendants += srcNode->numDescendants;
374  destTree->children[destTree->NumChildren()++] = srcNode;
375 }
376 
377 } // namespace tree
378 } // namespace mlpack
379 
380 #endif // MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
The RPlusTreeSplit class performs the split process of a node on overflow.
Definition: r_plus_tree_split.hpp:32
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
static void SplitLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a leaf node using the "default" algorithm.
Definition: r_plus_tree_split_impl.hpp:27
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
static bool SplitNonLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a non-leaf node using the "default" algorithm.
Definition: r_plus_tree_split_impl.hpp:124