mlpack
pelleg_moore_kmeans_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_KMEANS_PELLEG_MOORE_KMEANS_IMPL_HPP
14 #define MLPACK_METHODS_KMEANS_PELLEG_MOORE_KMEANS_IMPL_HPP
15 
16 #include "pelleg_moore_kmeans.hpp"
18 
19 namespace mlpack {
20 namespace kmeans {
21 
22 template<typename MetricType, typename MatType>
24  const MatType& dataset,
25  MetricType& metric) :
26  datasetOrig(dataset),
27  tree(new TreeType(const_cast<MatType&>(datasetOrig))),
28  dataset(tree->Dataset()),
29  metric(metric),
30  distanceCalculations(0)
31 {
32  // Nothing to do.
33 }
34 
35 template<typename MetricType, typename MatType>
37 {
38  if (tree)
39  delete tree;
40 }
41 
42 // Run a single iteration.
43 template<typename MetricType, typename MatType>
45  const arma::mat& centroids,
46  arma::mat& newCentroids,
47  arma::Col<size_t>& counts)
48 {
49  newCentroids.zeros(centroids.n_rows, centroids.n_cols);
50  counts.zeros(centroids.n_cols);
51 
52  // Create rules object.
54  RulesType rules(dataset, centroids, newCentroids, counts, metric);
55 
56  // Use single-tree traverser.
57  typename TreeType::template SingleTreeTraverser<RulesType> traverser(rules);
58 
59  // Now, do a traversal with a fake query index (since the query index is
60  // irrelevant; we are checking each node with all clusters.
61  traverser.Traverse(0, *tree);
62 
63  distanceCalculations += rules.DistanceCalculations();
64 
65  // Now, calculate how far the clusters moved, after normalizing them.
66  double residual = 0.0;
67  for (size_t c = 0; c < centroids.n_cols; ++c)
68  {
69  if (counts[c] > 0)
70  {
71  newCentroids.col(c) /= counts(c);
72  residual += std::pow(metric.Evaluate(centroids.col(c),
73  newCentroids.col(c)), 2.0);
74  }
75  }
76  distanceCalculations += centroids.n_cols;
77 
78  return std::sqrt(residual);
79 }
80 
81 } // namespace kmeans
82 } // namespace mlpack
83 
84 #endif
double Iterate(const arma::mat &centroids, arma::mat &newCentroids, arma::Col< size_t > &counts)
Run a single iteration of the Pelleg-Moore blacklist algorithm, updating the given centroids into the...
Definition: pelleg_moore_kmeans_impl.hpp:44
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
A binary space partitioning tree, such as a KD-tree or a ball tree.
Definition: binary_space_tree.hpp:54
~PellegMooreKMeans()
Delete the tree constructed by the PellegMooreKMeans object.
Definition: pelleg_moore_kmeans_impl.hpp:36
PellegMooreKMeans(const MatType &dataset, MetricType &metric)
Construct the PellegMooreKMeans object, which must construct a tree.
Definition: pelleg_moore_kmeans_impl.hpp:23
The rules class for the single-tree Pelleg-Moore kd-tree traversal for k-means clustering.
Definition: pelleg_moore_kmeans_rules.hpp:33