mlpack
rp_tree_max_split_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MAX_SPLIT_IMPL_HPP
14 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MAX_SPLIT_IMPL_HPP
15 
16 #include "rp_tree_max_split.hpp"
17 #include "rp_tree_mean_split.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
22 template<typename BoundType, typename MatType>
23 bool RPTreeMaxSplit<BoundType, MatType>::SplitNode(const BoundType& /* bound */,
24  MatType& data,
25  const size_t begin,
26  const size_t count,
27  SplitInfo& splitInfo)
28 {
29  splitInfo.direction.zeros(data.n_rows);
30 
31  // Get the normal to the hyperplane.
32  math::RandVector(splitInfo.direction);
33 
34  // Get the value according to which we will perform the split.
35  return GetSplitVal(data, begin, count, splitInfo.direction,
36  splitInfo.splitVal);
37 }
38 
39 template<typename BoundType, typename MatType>
41  const MatType& data,
42  const size_t begin,
43  const size_t count,
44  const arma::Col<ElemType>& direction,
45  ElemType& splitVal)
46 {
47  const size_t maxNumSamples = 100;
48  const size_t numSamples = std::min(maxNumSamples, count);
49  arma::uvec samples;
50 
51  // Get no more than numSamples distinct samples.
52  math::ObtainDistinctSamples(begin, begin + count, numSamples, samples);
53 
54  arma::Col<ElemType> values(samples.n_elem);
55 
56  // Find the median of scalar products of the samples and the normal vector.
57  for (size_t k = 0; k < samples.n_elem; ++k)
58  values[k] = arma::dot(data.col(samples[k]), direction);
59 
60  const ElemType maximum = arma::max(values);
61  const ElemType minimum = arma::min(values);
62  if (minimum == maximum)
63  return false;
64 
65  splitVal = arma::median(values);
66 
67  // Add a random deviation to the median.
68  // This algorithm differs from the method suggested in the random projection
69  // tree paper, for two reasons:
70  // 1. Evaluating the method proposed in the paper is time-consuming, since
71  // we must solve the furthest-pair problem.
72  // 2. The proposed method does not appear to guarantee that a valid split
73  // value will be generated (i.e. it can produce a split value where there
74  // may be no points on the left or the right).
75  splitVal += math::Random((minimum - splitVal) * 0.75,
76  (maximum - splitVal) * 0.75);
77 
78  if (splitVal == maximum)
79  splitVal = minimum;
80 
81  return true;
82 }
83 
84 } // namespace tree
85 } // namespace mlpack
86 
87 #endif // MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MAX_SPLIT_IMPL_HPP
void ObtainDistinctSamples(const size_t loInclusive, const size_t hiExclusive, const size_t maxNumSamples, arma::uvec &distinctSamples)
Obtains no more than maxNumSamples distinct samples.
Definition: random.hpp:153
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
arma::Col< ElemType > direction
The normal vector to the hyperplane that splits the node.
Definition: rp_tree_max_split.hpp:41
This class splits a node by a random hyperplane.
Definition: rp_tree_max_split.hpp:32
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
void RandVector(arma::vec &v)
Overwrites a dimension-N vector to a random vector on the unit sphere in R^N.
Definition: lin_alg.cpp:79
MatType::elem_type ElemType
The element type held by the matrix type.
Definition: rp_tree_max_split.hpp:36
An information about the partition.
Definition: rp_tree_max_split.hpp:38
static bool SplitNode(const BoundType &, MatType &data, const size_t begin, const size_t count, SplitInfo &splitInfo)
Split the node by a random hyperplane.
Definition: rp_tree_max_split_impl.hpp:23
ElemType splitVal
The value according to which the node is being split.
Definition: rp_tree_max_split.hpp:43