mlpack
spill_tree.hpp
Go to the documentation of this file.
1 
11 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
12 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
13 
14 #include <mlpack/prereqs.hpp>
15 #include "../space_split/midpoint_space_split.hpp"
16 #include "../statistic.hpp"
17 
18 namespace mlpack {
19 namespace tree {
20 
66 template<typename MetricType,
67  typename StatisticType = EmptyStatistic,
68  typename MatType = arma::mat,
69  template<typename HyperplaneMetricType>
70  class HyperplaneType = AxisOrthogonalHyperplane,
71  template<typename SplitMetricType, typename SplitMatType>
72  class SplitType = MidpointSpaceSplit>
73 class SpillTree
74 {
75  public:
77  typedef MatType Mat;
79  typedef typename MatType::elem_type ElemType;
81  typedef typename HyperplaneType<MetricType>::BoundType BoundType;
82 
83  private:
85  SpillTree* left;
87  SpillTree* right;
89  SpillTree* parent;
92  size_t count;
95  arma::Col<size_t>* pointsIndex;
97  bool overlappingNode;
99  HyperplaneType<MetricType> hyperplane;
101  BoundType bound;
103  StatisticType stat;
105  ElemType parentDistance;
108  ElemType furthestDescendantDistance;
110  ElemType minimumBoundDistance;
113  const MatType* dataset;
115  bool localDataset;
116 
121  template<typename RuleType, bool Defeatist = false>
123 
128  template<typename RuleType, bool Defeatist = false>
130 
131  public:
133  template<typename RuleType>
135 
137  template<typename RuleType>
139 
141  template<typename RuleType>
143 
145  template<typename RuleType>
147 
158  SpillTree(const MatType& data,
159  const double tau = 0,
160  const size_t maxLeafSize = 20,
161  const double rho = 0.7);
162 
174  SpillTree(MatType&& data,
175  const double tau = 0,
176  const size_t maxLeafSize = 20,
177  const double rho = 0.7);
178 
190  SpillTree(SpillTree* parent,
191  arma::Col<size_t>& points,
192  const double tau = 0,
193  const size_t maxLeafSize = 20,
194  const double rho = 0.7);
195 
202  SpillTree(const SpillTree& other);
203 
210  SpillTree(SpillTree&& other);
211 
217  SpillTree& operator=(const SpillTree& other);
218 
224  SpillTree& operator=(SpillTree&& other);
225 
231  template<typename Archive>
232  SpillTree(
233  Archive& ar,
234  const typename std::enable_if_t<cereal::is_loading<Archive>()>* = 0);
235 
241  ~SpillTree();
242 
244  const BoundType& Bound() const { return bound; }
246  BoundType& Bound() { return bound; }
247 
249  const StatisticType& Stat() const { return stat; }
251  StatisticType& Stat() { return stat; }
252 
254  bool IsLeaf() const;
255 
257  SpillTree* Left() const { return left; }
259  SpillTree*& Left() { return left; }
260 
262  SpillTree* Right() const { return right; }
264  SpillTree*& Right() { return right; }
265 
267  SpillTree* Parent() const { return parent; }
269  SpillTree*& Parent() { return parent; }
270 
272  const MatType& Dataset() const { return *dataset; }
273 
275  bool Overlap() const { return overlappingNode; }
276 
278  const HyperplaneType<MetricType>& Hyperplane() const { return hyperplane; }
279 
281  MetricType Metric() const { return MetricType(); }
282 
284  size_t NumChildren() const;
285 
292  template<typename VecType>
293  size_t GetNearestChild(
294  const VecType& point,
295  typename std::enable_if_t<IsVector<VecType>::value>* = 0);
296 
303  template<typename VecType>
304  size_t GetFurthestChild(
305  const VecType& point,
306  typename std::enable_if_t<IsVector<VecType>::value>* = 0);
307 
314  size_t GetNearestChild(const SpillTree& queryNode);
315 
322  size_t GetFurthestChild(const SpillTree& queryNode);
323 
328  ElemType FurthestPointDistance() const;
329 
337  ElemType FurthestDescendantDistance() const;
338 
340  ElemType MinimumBoundDistance() const;
341 
344  ElemType ParentDistance() const { return parentDistance; }
347  ElemType& ParentDistance() { return parentDistance; }
348 
355  SpillTree& Child(const size_t child) const;
356 
357  SpillTree*& ChildPtr(const size_t child)
358  { return (child == 0) ? left : right; }
359 
361  size_t NumPoints() const;
362 
368  size_t NumDescendants() const;
369 
377  size_t Descendant(const size_t index) const;
378 
387  size_t Point(const size_t index) const;
388 
390  ElemType MinDistance(const SpillTree& other) const
391  {
392  return bound.MinDistance(other.Bound());
393  }
394 
396  ElemType MaxDistance(const SpillTree& other) const
397  {
398  return bound.MaxDistance(other.Bound());
399  }
400 
403  {
404  return bound.RangeDistance(other.Bound());
405  }
406 
408  template<typename VecType>
409  ElemType MinDistance(const VecType& point,
410  typename std::enable_if_t<IsVector<VecType>::value>* = 0)
411  const
412  {
413  return bound.MinDistance(point);
414  }
415 
417  template<typename VecType>
418  ElemType MaxDistance(const VecType& point,
419  typename std::enable_if_t<IsVector<VecType>::value>* = 0)
420  const
421  {
422  return bound.MaxDistance(point);
423  }
424 
426  template<typename VecType>
428  RangeDistance(const VecType& point,
429  typename std::enable_if_t<IsVector<VecType>::value>* = 0) const
430  {
431  return bound.RangeDistance(point);
432  }
433 
435  static bool HasSelfChildren() { return false; }
436 
438  void Center(arma::vec& center) { bound.Center(center); }
439 
440  private:
449  void SplitNode(arma::Col<size_t>& points,
450  const size_t maxLeafSize,
451  const double tau,
452  const double rho);
453 
464  bool SplitPoints(const double tau,
465  const double rho,
466  const arma::Col<size_t>& points,
467  arma::Col<size_t>& leftPoints,
468  arma::Col<size_t>& rightPoints);
469  protected:
476  SpillTree();
477 
479  friend class cereal::access;
480 
481  public:
485  template<typename Archive>
486  void serialize(Archive& ar, const uint32_t version);
487 };
488 
489 } // namespace tree
490 } // namespace mlpack
491 
492 // Include implementation.
493 #include "spill_tree_impl.hpp"
494 
495 // Include everything else, if necessary.
496 #include "../spill_tree.hpp"
497 
498 #endif
SpillTree *& Right()
Modify the right child of this node.
Definition: spill_tree.hpp:264
SpillTree & operator=(const SpillTree &other)
Copy the given Spill Tree.
Definition: spill_tree_impl.hpp:206
MatType::elem_type ElemType
The type of element held in MatType.
Definition: spill_tree.hpp:79
ElemType MinDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum distance to another point.
Definition: spill_tree.hpp:409
ElemType FurthestDescendantDistance() const
Return the furthest possible descendant distance.
Definition: spill_tree_impl.hpp:600
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
HyperplaneType< MetricType >::BoundType BoundType
The bound type.
Definition: spill_tree.hpp:81
SpillTree *& Parent()
Modify the parent of this node.
Definition: spill_tree.hpp:269
SpillTree *& Left()
Modify the left child of this node.
Definition: spill_tree.hpp:259
The core includes that mlpack expects; standard C++ includes and Armadillo.
A generic single-tree traverser for hybrid spill trees; see spill_single_tree_traverser.hpp for implementation.
Definition: spill_single_tree_traverser.hpp:34
size_t NumPoints() const
Return the number of points in this node (0 if not a leaf).
Definition: spill_tree_impl.hpp:649
SpillTree & Child(const size_t child) const
Return the specified child (0 will be left, 1 will be right).
Definition: spill_tree_impl.hpp:631
const StatisticType & Stat() const
Return the statistic object for this node.
Definition: spill_tree.hpp:249
SpillTree * Right() const
Gets the right child of this node.
Definition: spill_tree.hpp:262
ElemType MaxDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the maximum distance to another point.
Definition: spill_tree.hpp:418
A hybrid spill tree is a variant of binary space trees in which the children of a node can "spill ove...
Definition: spill_tree.hpp:73
bool Overlap() const
Distinguish overlapping nodes from non-overlapping nodes.
Definition: spill_tree.hpp:275
size_t GetNearestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the nearest child node to the given query point (this is an efficient estimation ...
Definition: spill_tree_impl.hpp:472
size_t NumDescendants() const
Return the number of descendants of this node.
Definition: spill_tree_impl.hpp:666
A generic dual-tree traverser for hybrid spill trees; see spill_dual_tree_traverser.hpp for implementation.
Definition: spill_dual_tree_traverser.hpp:35
SpillTree * Parent() const
Gets the parent of this node.
Definition: spill_tree.hpp:267
StatisticType & Stat()
Return the statistic object for this node.
Definition: spill_tree.hpp:251
SpillTree * Left() const
Gets the left child of this node.
Definition: spill_tree.hpp:257
bool IsLeaf() const
Return whether or not this node is a leaf (true if it has no children).
Definition: spill_tree_impl.hpp:433
Implementation of generalized hybrid spill tree (SpillTree).
friend class cereal::access
Friend access is given for the default constructor.
Definition: spill_tree.hpp:479
MetricType Metric() const
Get the metric that the tree uses.
Definition: spill_tree.hpp:281
HyperplaneBase< bound::HRectBound< MetricType >, AxisParallelProjVector > AxisOrthogonalHyperplane
AxisOrthogonalHyperplane represents a hyperplane orthogonal to an axis.
Definition: hyperplane.hpp:145
const BoundType & Bound() const
Return the bound object for this node.
Definition: spill_tree.hpp:244
math::RangeType< ElemType > RangeDistance(const SpillTree &other) const
Return the minimum and maximum distance to another node.
Definition: spill_tree.hpp:402
math::RangeType< ElemType > RangeDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum and maximum distance to another point.
Definition: spill_tree.hpp:428
SpillTree()
A default constructor.
Definition: spill_tree_impl.hpp:880
void serialize(Archive &ar, const uint32_t version)
Serialize the tree.
Definition: spill_tree_impl.hpp:907
ElemType MinimumBoundDistance() const
Return the minimum distance from the center of the node to any bound edge.
Definition: spill_tree_impl.hpp:615
size_t Descendant(const size_t index) const
Return the index (with reference to the dataset) of a particular descendant of this node...
Definition: spill_tree_impl.hpp:681
size_t GetFurthestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the furthest child node to the given query point (this is an efficient estimation...
Definition: spill_tree_impl.hpp:498
ElemType & ParentDistance()
Modify the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:347
static bool HasSelfChildren()
Returns false: this tree type does not have self children.
Definition: spill_tree.hpp:435
const MatType & Dataset() const
Get the dataset which the tree is built on.
Definition: spill_tree.hpp:272
~SpillTree()
Deletes this node, deallocating the memory for the children and calling their destructors in turn...
Definition: spill_tree_impl.hpp:415
ElemType FurthestPointDistance() const
Return the furthest distance to a point held in this node.
Definition: spill_tree_impl.hpp:575
ElemType ParentDistance() const
Return the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:344
MatType Mat
So other classes can use TreeType::Mat.
Definition: spill_tree.hpp:77
ElemType MinDistance(const SpillTree &other) const
Return the minimum distance to another node.
Definition: spill_tree.hpp:390
size_t NumChildren() const
Return the number of children in this node.
Definition: spill_tree_impl.hpp:448
BoundType & Bound()
Return the bound object for this node.
Definition: spill_tree.hpp:246
If value == true, then VecType is some sort of Armadillo vector or subview.
Definition: arma_traits.hpp:35
void Center(arma::vec &center)
Store the center of the bounding region in the given vector.
Definition: spill_tree.hpp:438
size_t Point(const size_t index) const
Return the index (with reference to the dataset) of a particular point in this node.
Definition: spill_tree_impl.hpp:705
const HyperplaneType< MetricType > & Hyperplane() const
Get the Hyperplane instance.
Definition: spill_tree.hpp:278
ElemType MaxDistance(const SpillTree &other) const
Return the maximum distance to another node.
Definition: spill_tree.hpp:396