12 #ifndef MLPACK_METHODS_KMEANS_HAMERLY_KMEANS_IMPL_HPP 13 #define MLPACK_METHODS_KMEANS_HAMERLY_KMEANS_IMPL_HPP 21 template<
typename MetricType,
typename MatType>
26 distanceCalculations(0)
31 template<
typename MetricType,
typename MatType>
33 arma::mat& newCentroids,
34 arma::Col<size_t>& counts)
36 size_t hamerlyPruned = 0;
39 if (minClusterDistances.n_elem != centroids.n_cols)
41 upperBounds.set_size(dataset.n_cols);
42 upperBounds.fill(DBL_MAX);
43 lowerBounds.zeros(dataset.n_cols);
44 assignments.zeros(dataset.n_cols);
45 minClusterDistances.set_size(centroids.n_cols);
49 newCentroids.zeros(centroids.n_rows, centroids.n_cols);
50 counts.zeros(centroids.n_cols);
53 minClusterDistances.fill(DBL_MAX);
54 for (
size_t i = 0; i < centroids.n_cols; ++i)
56 for (
size_t j = i + 1; j < centroids.n_cols; ++j)
58 const double dist = metric.Evaluate(centroids.col(i), centroids.col(j)) /
60 ++distanceCalculations;
63 if (dist < minClusterDistances(i))
64 minClusterDistances(i) = dist;
65 if (dist < minClusterDistances(j))
66 minClusterDistances(j) = dist;
70 for (
size_t i = 0; i < dataset.n_cols; ++i)
72 const double m = std::max(minClusterDistances(assignments[i]),
76 if (upperBounds(i) <= m)
79 newCentroids.col(assignments[i]) += dataset.col(i);
80 ++counts(assignments[i]);
85 upperBounds(i) = metric.Evaluate(dataset.col(i),
86 centroids.col(assignments[i]));
87 ++distanceCalculations;
90 if (upperBounds(i) <= m)
92 newCentroids.col(assignments[i]) += dataset.col(i);
93 ++counts(assignments[i]);
100 lowerBounds(i) = DBL_MAX;
101 for (
size_t c = 0; c < centroids.n_cols; ++c)
103 if (c == assignments[i])
106 const double dist = metric.Evaluate(dataset.col(i), centroids.col(c));
109 if (dist < upperBounds(i))
112 lowerBounds(i) = upperBounds(i);
113 upperBounds(i) = dist;
116 else if (dist < lowerBounds(i))
119 lowerBounds(i) = dist;
122 distanceCalculations += centroids.n_cols - 1;
125 newCentroids.col(assignments[i]) += dataset.col(i);
126 ++counts(assignments[i]);
131 double furthestMovement = 0.0;
132 double secondFurthestMovement = 0.0;
133 size_t furthestMovingCluster = 0;
134 arma::vec centroidMovements(centroids.n_cols);
135 double centroidMovement = 0.0;
136 for (
size_t c = 0; c < centroids.n_cols; ++c)
139 newCentroids.col(c) /= counts(c);
142 const double movement = metric.Evaluate(centroids.col(c),
143 newCentroids.col(c));
144 centroidMovements(c) = movement;
145 centroidMovement += std::pow(movement, 2.0);
146 ++distanceCalculations;
148 if (movement > furthestMovement)
150 secondFurthestMovement = furthestMovement;
151 furthestMovement = movement;
152 furthestMovingCluster = c;
154 else if (movement > secondFurthestMovement)
156 secondFurthestMovement = movement;
161 for (
size_t i = 0; i < dataset.n_cols; ++i)
163 upperBounds(i) += centroidMovements(assignments[i]);
164 if (assignments[i] == furthestMovingCluster)
165 lowerBounds(i) -= secondFurthestMovement;
167 lowerBounds(i) -= furthestMovement;
170 Log::Info <<
"Hamerly prunes: " << hamerlyPruned <<
".\n";
172 return std::sqrt(centroidMovement);
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
HamerlyKMeans(const MatType &dataset, MetricType &metric)
Construct the HamerlyKMeans object, which must store several sets of bounds.
Definition: hamerly_kmeans_impl.hpp:22
double Iterate(const arma::mat ¢roids, arma::mat &newCentroids, arma::Col< size_t > &counts)
Run a single iteration of Hamerly's algorithm, updating the given centroids into the newCentroids mat...
Definition: hamerly_kmeans_impl.hpp:32
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84