12 #ifndef MLPACK_METHODS_KMEANS_ELKAN_KMEANS_IMPL_HPP 13 #define MLPACK_METHODS_KMEANS_ELKAN_KMEANS_IMPL_HPP 21 template<
typename MetricType,
typename MatType>
26 distanceCalculations(0)
32 template<
typename MetricType,
typename MatType>
34 arma::mat& newCentroids,
35 arma::Col<size_t>& counts)
38 newCentroids.zeros(centroids.n_rows, centroids.n_cols);
39 counts.zeros(centroids.n_cols);
43 clusterDistances.set_size(centroids.n_cols, centroids.n_cols);
47 clusterDistances.diag().fill(DBL_MAX);
50 std::vector<bool> mustRecalculate(dataset.n_cols,
true);
53 if (lowerBounds.n_rows != centroids.n_cols)
55 lowerBounds.set_size(centroids.n_cols, dataset.n_cols);
56 assignments.set_size(dataset.n_cols);
57 upperBounds.set_size(dataset.n_cols);
60 upperBounds.fill(DBL_MAX);
66 for (
size_t i = 0; i < centroids.n_cols; ++i)
68 for (
size_t j = i + 1; j < centroids.n_cols; ++j)
70 const double distance = metric.Evaluate(centroids.col(i),
72 distanceCalculations++;
73 clusterDistances(i, j) = distance;
74 clusterDistances(j, i) = distance;
80 minClusterDistances = 0.5 * arma::min(clusterDistances).t();
83 for (
size_t i = 0; i < dataset.n_cols; ++i)
86 if (upperBounds(i) <= minClusterDistances(assignments[i]))
89 counts(assignments[i])++;
90 newCentroids.col(assignments[i]) += arma::vec(dataset.col(i));
95 for (
size_t c = 0; c < centroids.n_cols; ++c)
99 if (assignments[i] == c)
102 if (upperBounds(i) <= lowerBounds(c, i))
105 if (upperBounds(i) <= 0.5 * clusterDistances(assignments[i], c))
111 if (mustRecalculate[i])
113 mustRecalculate[i] =
false;
114 dist = metric.Evaluate(dataset.col(i), centroids.col(assignments[i]));
115 lowerBounds(assignments[i], i) = dist;
116 upperBounds(i) = dist;
117 distanceCalculations++;
120 if (upperBounds(i) <= lowerBounds(c, i))
123 if (upperBounds(i) <= 0.5 * clusterDistances(assignments[i], c))
128 dist = upperBounds(i);
132 if (dist > lowerBounds(c, i) ||
133 dist > 0.5 * clusterDistances(assignments[i], c))
136 const double pointDist = metric.Evaluate(dataset.col(i),
138 lowerBounds(c, i) = pointDist;
139 distanceCalculations++;
140 if (pointDist < dist)
142 upperBounds(i) = pointDist;
152 newCentroids.col(assignments[i]) += arma::vec(dataset.col(i));
153 counts[assignments[i]]++;
157 arma::vec moveDistances(centroids.n_cols);
159 for (
size_t c = 0; c < centroids.n_cols; ++c)
162 newCentroids.col(c) /= counts[c];
164 moveDistances(c) = metric.Evaluate(newCentroids.col(c), centroids.col(c));
165 cNorm += std::pow(moveDistances(c), 2.0);
166 distanceCalculations++;
169 for (
size_t i = 0; i < dataset.n_cols; ++i)
174 for (
size_t c = 0; c < centroids.n_cols; ++c)
175 lowerBounds(c, i) -= moveDistances(c);
180 upperBounds(i) += moveDistances(assignments[i]);
183 return std::sqrt(cNorm);
ElkanKMeans(const MatType &dataset, MetricType &metric)
Construct the ElkanKMeans object, which must store several sets of bounds.
Definition: elkan_kmeans_impl.hpp:22
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
double Iterate(const arma::mat ¢roids, arma::mat &newCentroids, arma::Col< size_t > &counts)
Run a single iteration of Elkan's algorithm, updating the given centroids into the newCentroids matri...
Definition: elkan_kmeans_impl.hpp:33