mlpack
vantage_point_split_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_IMPL_HPP
14 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_IMPL_HPP
15 
16 #include "vantage_point_split.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
22 template<typename BoundType, typename MatType, size_t MaxNumSamples>
24 SplitNode(const BoundType& bound, MatType& data, const size_t begin,
25  const size_t count, SplitInfo& splitInfo)
26 {
27  ElemType mu = 0;
28  size_t vantagePointIndex = 0;
29 
30  // Find the best vantage point.
31  SelectVantagePoint(bound.Metric(), data, begin, count, vantagePointIndex, mu);
32 
33  // If all points are equal, we can't split.
34  if (mu == 0)
35  return false;
36 
37  splitInfo = SplitInfo(bound.Metric(), data.col(vantagePointIndex), mu);
38 
39  return true;
40 }
41 
42 template<typename BoundType, typename MatType, size_t MaxNumSamples>
44 SelectVantagePoint(const MetricType& metric, const MatType& data,
45  const size_t begin, const size_t count, size_t& vantagePoint, ElemType& mu)
46 {
47  arma::uvec vantagePointCandidates;
48  arma::Col<ElemType> distances(MaxNumSamples);
49 
50  // Get no more than max(MaxNumSamples, count) vantage point candidates
51  math::ObtainDistinctSamples(begin, begin + count, MaxNumSamples,
52  vantagePointCandidates);
53 
54  ElemType bestSpread = 0;
55 
56  arma::uvec samples;
57  // Evaluate each candidate
58  for (size_t i = 0; i < vantagePointCandidates.n_elem; ++i)
59  {
60  // Get no more than min(MaxNumSamples, count) random samples
61  math::ObtainDistinctSamples(begin, begin + count, MaxNumSamples, samples);
62 
63  // Calculate the second moment of the distance to the vantage point
64  // candidate using these random samples.
65  distances.set_size(samples.n_elem);
66 
67  for (size_t j = 0; j < samples.n_elem; ++j)
68  distances[j] = metric.Evaluate(data.col(vantagePointCandidates[i]),
69  data.col(samples[j]));
70 
71  const ElemType spread = arma::sum(distances % distances) / samples.n_elem;
72 
73  if (spread > bestSpread)
74  {
75  bestSpread = spread;
76  vantagePoint = vantagePointCandidates[i];
77  // Calculate the median value of the distance from the vantage point
78  // candidate to these samples.
79  mu = arma::median(distances);
80  }
81  }
82  assert(bestSpread > 0);
83 }
84 
85 } // namespace tree
86 } // namespace mlpack
87 
88 #endif // MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_IMPL_HPP
void ObtainDistinctSamples(const size_t loInclusive, const size_t hiExclusive, const size_t maxNumSamples, arma::uvec &distinctSamples)
Obtains no more than maxNumSamples distinct samples.
Definition: random.hpp:153
MatType::elem_type ElemType
The matrix element type.
Definition: vantage_point_split.hpp:36
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
static bool SplitNode(const BoundType &bound, MatType &data, const size_t begin, const size_t count, SplitInfo &splitInfo)
Split the node according to the distance to a vantage point.
Definition: vantage_point_split_impl.hpp:24
A struct that contains an information about the split.
Definition: vantage_point_split.hpp:40
The class splits a binary space partitioning tree node according to the median distance to the vantag...
Definition: vantage_point_split.hpp:32
BoundType::MetricType MetricType
The bounding shape type.
Definition: vantage_point_split.hpp:38
Bounds that are useful for binary space partitioning trees.