12 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_SPLIT_IMPL_HPP 13 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_SPLIT_IMPL_HPP 27 template<
typename TreeType>
29 std::vector<bool>& relevels)
32 typedef typename TreeType::ElemType ElemType;
35 if (relevels[tree->TreeDepth() - 1])
37 relevels[tree->TreeDepth() - 1] =
false;
41 TreeType* root = tree;
42 while (root->Parent() != NULL)
43 root = root->Parent();
44 size_t p = tree->MaxLeafSize() * 0.3;
50 std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
51 arma::Col<ElemType> center;
52 tree->Bound().Center(center);
53 for (
size_t i = 0; i < sorted.size(); ++i)
55 sorted[i].first = tree->Metric().Evaluate(center,
56 tree->Dataset().col(tree->Point(i)));
57 sorted[i].second = tree->Point(i);
60 std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
63 for (
size_t i = 0; i < p; ++i)
64 root->DeletePoint(sorted[sorted.size() - 1 - i].second, relevels);
68 for (
size_t i = p; i > 0; --i)
69 root->InsertPoint(sorted[sorted.size() - i].second, relevels);
81 template<
typename TreeType>
87 typedef typename TreeType::ElemType ElemType;
92 ElemType bestScore = std::numeric_limits<ElemType>::max();
97 for (
size_t j = 0; j < tree->Bound().Dim(); ++j)
99 ElemType axisScore = 0.0;
102 arma::Col<ElemType> dimValues(tree->Count());
103 for (
size_t i = 0; i < tree->Count(); ++i)
104 dimValues[i] = tree->Dataset().col(tree->Point(i))[j];
105 arma::uvec sortedIndices = arma::sort_index(dimValues);
108 const size_t numPossibleSplits = tree->MaxLeafSize() -
109 2 * tree->MinLeafSize() + 2;
110 arma::Col<ElemType> areas(numPossibleSplits, arma::fill::zeros);
111 arma::Col<ElemType> margins(numPossibleSplits, arma::fill::zeros);
112 arma::Col<ElemType> overlaps(numPossibleSplits, arma::fill::zeros);
114 for (
size_t i = 0; i < numPossibleSplits; ++i)
119 size_t splitIndex = tree->MinLeafSize() + i;
121 BoundType bound1(tree->Bound().Dim());
122 BoundType bound2(tree->Bound().Dim());
124 for (
size_t l = 0; l < splitIndex; l++)
125 bound1 |= tree->Dataset().col(tree->Point(sortedIndices[l]));
127 for (
size_t l = splitIndex; l < tree->Count(); l++)
128 bound2 |= tree->Dataset().col(tree->Point(sortedIndices[l]));
130 areas[i] = bound1.Volume() + bound2.Volume();
131 overlaps[i] = bound1.Overlap(bound2);
133 for (
size_t k = 0; k < bound1.Dim(); ++k)
134 margins[i] += bound1[k].Width() + bound2[k].Width();
136 axisScore += margins[i];
140 if (axisScore < bestScore)
142 bestScore = axisScore;
144 size_t overlapIndex = 0;
145 size_t areaIndex = 0;
146 bool tiedOnOverlap =
false;
148 for (
size_t i = 1; i < areas.n_elem; ++i)
150 if (overlaps[i] < overlaps[overlapIndex])
152 tiedOnOverlap =
false;
156 else if (overlaps[i] == overlaps[overlapIndex])
158 tiedOnOverlap =
true;
159 if (areas[i] < areas[areaIndex])
165 bestIndex = (tiedOnOverlap ? areaIndex : overlapIndex);
176 template<
typename TreeType>
180 typedef typename TreeType::ElemType ElemType;
183 if (tree->Count() <= tree->MaxLeafSize())
200 std::vector<std::pair<ElemType, size_t>> sorted(tree->Count());
201 for (
size_t i = 0; i < sorted.size(); ++i)
203 sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
204 sorted[i].second = tree->Point(i);
207 std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
220 TreeType* par = tree->Parent();
221 TreeType* treeOne = (par) ? tree :
new TreeType(tree);
222 TreeType* treeTwo = (par) ?
new TreeType(par) :
new TreeType(tree);
225 const size_t numPoints = tree->Count();
229 tree->numChildren = 0;
230 tree->numDescendants = 0;
235 for (
size_t i = 0; i < numPoints; ++i)
237 if (i < bestIndex + tree->MinLeafSize())
238 treeOne->InsertPoint(sorted[i].second);
240 treeTwo->InsertPoint(sorted[i].second);
247 par->children[par->NumChildren()++] = treeTwo;
251 if (par->NumChildren() == par->MaxNumChildren() + 1)
258 InsertNodeIntoTree(tree, treeOne);
259 InsertNodeIntoTree(tree, treeTwo);
270 template<
typename TreeType>
273 std::vector<bool>& relevels)
276 typedef typename TreeType::ElemType ElemType;
285 size_t bestIndex = 0;
286 ElemType bestScore = std::numeric_limits<ElemType>::max();
287 bool lowIsBetter =
true;
292 for (
size_t j = 0; j < tree->Bound().Dim(); ++j)
294 ElemType axisLoScore = 0.0;
295 ElemType axisHiScore = 0.0;
299 arma::Col<ElemType> loDimValues(tree->NumChildren());
300 arma::Col<ElemType> hiDimValues(tree->NumChildren());
301 for (
size_t i = 0; i < tree->NumChildren(); ++i)
303 loDimValues[i] = tree->Child(i).Bound()[j].Lo();
304 hiDimValues[i] = tree->Child(i).Bound()[j].Hi();
306 arma::uvec sortedLoDimIndices = arma::sort_index(loDimValues);
307 arma::uvec sortedHiDimIndices = arma::sort_index(hiDimValues);
312 const size_t numPossibleSplits = tree->MaxNumChildren() -
313 2 * tree->MinNumChildren() + 2;
314 arma::Col<ElemType> areas(2 * numPossibleSplits, arma::fill::zeros);
315 arma::Col<ElemType> margins(2 * numPossibleSplits, arma::fill::zeros);
316 arma::Col<ElemType> overlaps(2 * numPossibleSplits, arma::fill::zeros);
318 for (
size_t i = 0; i < numPossibleSplits; ++i)
323 const size_t splitIndex = tree->MinNumChildren() + i;
325 BoundType lb1(tree->Bound().Dim());
326 BoundType lb2(tree->Bound().Dim());
327 BoundType hb1(tree->Bound().Dim());
328 BoundType hb2(tree->Bound().Dim());
330 for (
size_t l = 0; l < splitIndex; ++l)
332 lb1 |= tree->Child(sortedLoDimIndices[l]).Bound();
333 hb1 |= tree->Child(sortedHiDimIndices[l]).Bound();
335 for (
size_t l = splitIndex; l < tree->NumChildren(); ++l)
337 lb2 |= tree->Child(sortedLoDimIndices[l]).Bound();
338 hb2 |= tree->Child(sortedHiDimIndices[l]).Bound();
342 areas[2 * i] = lb1.Volume() + lb2.Volume();
343 overlaps[2 * i] = lb1.Overlap(lb2);
346 areas[2 * i + 1] = hb1.Volume() + hb2.Volume();
347 overlaps[2 * i + 1] = hb1.Overlap(hb2);
350 for (
size_t k = 0; k < lb1.Dim(); ++k)
352 margins[2 * i] += lb1[k].Width() + lb2[k].Width();
353 margins[2 * i + 1] += hb1[k].Width() + hb2[k].Width();
357 axisLoScore += margins[2 * i];
358 axisHiScore += margins[2 * i + 1];
363 if (std::min(axisLoScore, axisHiScore) < bestScore)
365 bestScore = std::min(axisLoScore, axisHiScore);
366 if (axisLoScore < axisHiScore)
376 const size_t indexOffset = lowIsBetter ? 0 : 1;
378 size_t overlapIndex = indexOffset;
379 size_t areaIndex = indexOffset;
380 bool tiedOnOverlap =
false;
384 for (
size_t i = 1; i < numPossibleSplits; ++i)
387 if (overlaps[2 * i + indexOffset] < overlaps[overlapIndex])
389 tiedOnOverlap =
false;
390 areaIndex = 2 * i + indexOffset;
391 overlapIndex = 2 * i + indexOffset;
393 else if (overlaps[i] == overlaps[overlapIndex])
395 tiedOnOverlap =
true;
396 if (areas[2 * i + indexOffset] < areas[areaIndex])
397 areaIndex = 2 * i + indexOffset;
401 bestIndex = ((tiedOnOverlap ? areaIndex : overlapIndex) - indexOffset) / 2
402 + tree->MinNumChildren();
407 std::vector<TreeType*> oldChildren(tree->NumChildren());
408 for (
size_t i = 0; i < oldChildren.size(); ++i)
409 oldChildren[i] = &tree->Child(i);
422 TreeType* par = tree->Parent();
423 TreeType* treeOne = par ? tree :
new TreeType(tree);
424 TreeType* treeTwo = par ?
new TreeType(par) :
new TreeType(tree);
427 tree->numChildren = 0;
428 tree->numDescendants = 0;
433 arma::Col<ElemType> values(oldChildren.size());
434 for (
size_t i = 0; i < oldChildren.size(); ++i)
436 values[i] = (lowIsBetter ? oldChildren[i]->Bound()[bestAxis].Lo()
437 : oldChildren[i]->Bound()[bestAxis].Hi());
439 arma::uvec indices = arma::sort_index(values);
441 for (
size_t i = 0; i < bestIndex; ++i)
442 InsertNodeIntoTree(treeOne, oldChildren[indices[i]]);
443 for (
size_t i = bestIndex; i < oldChildren.size(); ++i)
444 InsertNodeIntoTree(treeTwo, oldChildren[indices[i]]);
451 par->children[par->NumChildren()++] = treeTwo;
456 InsertNodeIntoTree(tree, treeOne);
457 InsertNodeIntoTree(tree, treeTwo);
461 for (
size_t i = 0; i < treeOne->NumChildren(); ++i)
462 treeOne->children[i]->Parent() = treeOne;
466 for (
size_t i = 0; i < treeTwo->NumChildren(); ++i)
467 treeTwo->children[i]->Parent() = treeTwo;
471 if (par && par->NumChildren() >= par->MaxNumChildren() + 1)
481 template<
typename TreeType>
482 void RStarTreeSplit::InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
484 destTree->Bound() |= srcNode->Bound();
485 destTree->numDescendants += srcNode->numDescendants;
486 destTree->children[destTree->NumChildren()++] = srcNode;
static bool SplitNonLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a non-leaf node using the "default" algorithm.
Definition: r_star_tree_split_impl.hpp:271
static void SplitLeafNode(TreeType *tree, std::vector< bool > &relevels)
Split a leaf node using the algorithm described in "The R*-tree: An Efficient and Robust Access metho...
Definition: r_star_tree_split_impl.hpp:177
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
static void PickLeafSplit(TreeType *tree, size_t &bestAxis, size_t &bestIndex)
Given a node, return the best dimension and the best index to split on.
Definition: r_star_tree_split_impl.hpp:82
static size_t ReinsertPoints(TreeType *tree, std::vector< bool > &relevels)
Reinsert any points into the tree, if needed.
Definition: r_star_tree_split_impl.hpp:28
Definition of the Range class, which represents a simple range with a lower and upper bound...