12 #ifndef MLPACK_METHODS_KMEANS_MAX_VARIANCE_NEW_CLUSTER_IMPL_HPP 13 #define MLPACK_METHODS_KMEANS_MAX_VARIANCE_NEW_CLUSTER_IMPL_HPP 24 template<
typename MetricType,
typename MatType>
26 const size_t emptyCluster,
27 const arma::mat& oldCentroids,
28 arma::mat& newCentroids,
29 arma::Col<size_t>& clusterCounts,
31 const size_t iteration)
34 if (iteration != this->iteration || assignments.n_elem != data.n_cols)
35 Precalculate(data, oldCentroids, clusterCounts, metric);
36 this->iteration = iteration;
39 arma::uword maxVarCluster = 0;
40 variances.max(maxVarCluster);
44 if (variances[maxVarCluster] == 0.0)
48 size_t furthestPoint = data.n_cols;
49 double maxDistance = -DBL_MAX;
50 for (
size_t i = 0; i < data.n_cols; ++i)
52 if (assignments[i] == maxVarCluster)
54 const double distance = std::pow(metric.Evaluate(data.col(i),
55 newCentroids.col(maxVarCluster)), 2.0);
57 if (distance > maxDistance)
59 maxDistance = distance;
66 newCentroids.col(maxVarCluster) *= (double(clusterCounts[maxVarCluster]) /
67 double(clusterCounts[maxVarCluster] - 1));
68 newCentroids.col(maxVarCluster) -= (1.0 / (clusterCounts[maxVarCluster] -
69 1.0)) * arma::vec(data.col(furthestPoint));
70 clusterCounts[maxVarCluster]--;
71 clusterCounts[emptyCluster]++;
72 newCentroids.col(emptyCluster) = arma::vec(data.col(furthestPoint));
73 assignments[furthestPoint] = emptyCluster;
76 variances[emptyCluster] = 0;
82 if (clusterCounts[maxVarCluster] <= 1)
84 variances[maxVarCluster] = 0;
89 variances[maxVarCluster] = (1.0 / clusterCounts[maxVarCluster]) *
90 ((clusterCounts[maxVarCluster] + 1) * variances[maxVarCluster] -
95 Log::Debug <<
"Point " << furthestPoint <<
" assigned to empty cluster " <<
96 emptyCluster <<
".\n";
100 template<
typename Archive>
110 if (cereal::is_loading<Archive>())
111 assignments.set_size(0);
114 template<
typename MetricType,
typename MatType>
115 void MaxVarianceNewCluster::Precalculate(
const MatType&
data,
116 const arma::mat& oldCentroids,
117 arma::Col<size_t>& clusterCounts,
123 variances.zeros(oldCentroids.n_cols);
124 assignments.set_size(data.n_cols);
128 for (
size_t i = 0; i < data.n_cols; ++i)
131 double minDistance = std::numeric_limits<double>::infinity();
132 size_t closestCluster = oldCentroids.n_cols;
134 for (
size_t j = 0; j < oldCentroids.n_cols; ++j)
136 const double distance = metric.Evaluate(data.col(i), oldCentroids.col(j));
138 if (distance < minDistance)
140 minDistance = distance;
145 assignments[i] = closestCluster;
146 variances[closestCluster] += std::pow(metric.Evaluate(data.col(i),
147 oldCentroids.col(closestCluster)), 2.0);
153 for (
size_t i = 0; i < clusterCounts.n_elem; ++i)
154 if (clusterCounts[i] <= 1)
157 variances[i] /= clusterCounts[i];
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void EmptyCluster(const MatType &data, const size_t emptyCluster, const arma::mat &oldCentroids, arma::mat &newCentroids, arma::Col< size_t > &clusterCounts, MetricType &metric, const size_t iteration)
Take the point furthest from the centroid of the cluster with maximum variance to be a new cluster...
Definition: max_variance_new_cluster_impl.hpp:25
void serialize(Archive &ar, const uint32_t version)
Serialize the object.
Definition: max_variance_new_cluster_impl.hpp:101