31 template<
typename InitialPartitionPolicy>
34 static const bool value =
36 GivesCentroidsCheck<InitialPartitionPolicy,
37 void(InitialPartitionPolicy::*)(
const arma::mat&,
39 arma::mat&)>::value ||
41 GivesCentroidsCheck<InitialPartitionPolicy,
42 void(*)(
const arma::mat&,
const size_t, arma::mat&)>::value;
47 template<
typename MatType,
48 typename InitialPartitionPolicy>
50 InitialPartitionPolicy& ipp,
52 const size_t clusters,
53 arma::Row<size_t>& assignments,
55 const typename std::enable_if_t<
58 ipp.Cluster(data, clusters, assignments);
65 template<
typename MatType,
66 typename InitialPartitionPolicy>
68 InitialPartitionPolicy& ipp,
70 const size_t clusters,
73 const typename std::enable_if_t<
76 ipp.Cluster(data, clusters, centroids);
84 template<
typename MetricType,
85 typename InitialPartitionPolicy,
86 typename EmptyClusterPolicy,
87 template<
class,
class>
class LloydStepType,
91 InitialPartitionPolicy,
96 const MetricType metric,
97 const InitialPartitionPolicy partitioner,
98 const EmptyClusterPolicy emptyClusterAction) :
99 maxIterations(maxIterations),
101 partitioner(partitioner),
102 emptyClusterAction(emptyClusterAction)
113 template<
typename MetricType,
114 typename InitialPartitionPolicy,
115 typename EmptyClusterPolicy,
116 template<
class,
class>
class LloydStepType,
120 InitialPartitionPolicy,
125 const size_t clusters,
126 arma::Row<size_t>& assignments,
127 const bool initialGuess)
129 arma::mat centroids(data.n_rows, clusters);
130 Cluster(data, clusters, assignments, centroids, initialGuess);
137 template<
typename MetricType,
138 typename InitialPartitionPolicy,
139 typename EmptyClusterPolicy,
140 template<
class,
class>
class LloydStepType,
144 InitialPartitionPolicy,
149 const size_t clusters,
150 arma::mat& centroids,
151 const bool initialGuess)
154 if (clusters > data.n_cols)
155 Log::Warn <<
"KMeans::Cluster(): more clusters requested than points given." 157 else if (clusters == 0)
158 Log::Warn <<
"KMeans::Cluster(): zero clusters requested. This probably " 159 <<
"isn't going to work. Brace for crash." << std::endl;
164 if (centroids.n_cols != clusters)
165 Log::Fatal <<
"KMeans::Cluster(): wrong number of initial cluster " 166 <<
"centroids (" << centroids.n_cols <<
", should be " << clusters
167 <<
")!" << std::endl;
169 if (centroids.n_rows != data.n_rows)
170 Log::Fatal <<
"KMeans::Cluster(): initial cluster centroids have wrong " 171 <<
" dimensionality (" << centroids.n_rows <<
", should be " 172 << data.n_rows <<
")!" << std::endl;
183 arma::Row<size_t> assignments;
185 clusters, assignments, centroids);
190 arma::Row<size_t> counts;
191 counts.zeros(clusters);
192 centroids.zeros(data.n_rows, clusters);
193 for (
size_t i = 0; i < data.n_cols; ++i)
195 centroids.col(assignments[i]) += arma::vec(data.col(i));
196 counts[assignments[i]]++;
199 for (
size_t i = 0; i < clusters; ++i)
201 centroids.col(i) /= counts[i];
206 arma::Col<size_t> counts(clusters);
208 size_t iteration = 0;
210 LloydStepType<MetricType, MatType> lloydStep(data, metric);
211 arma::mat centroidsOther;
218 if (iteration % 2 == 0)
219 cNorm = lloydStep.Iterate(centroids, centroidsOther, counts);
221 cNorm = lloydStep.Iterate(centroidsOther, centroids, counts);
225 for (
size_t i = 0; i < counts.n_elem; ++i)
229 Log::Info <<
"Cluster " << i <<
" is empty.\n";
230 if (iteration % 2 == 0)
231 emptyClusterAction.EmptyCluster(data, i, centroids, centroidsOther,
232 counts, metric, iteration);
234 emptyClusterAction.EmptyCluster(data, i, centroidsOther, centroids,
235 counts, metric, iteration);
240 Log::Info <<
"KMeans::Cluster(): iteration " << iteration <<
", residual " 242 if (std::isnan(cNorm) || std::isinf(cNorm))
244 }
while (cNorm > 1e-5 && iteration != maxIterations);
249 if ((iteration - 1) % 2 == 0)
250 centroids.steal_mem(centroidsOther);
252 if (iteration != maxIterations)
254 Log::Info <<
"KMeans::Cluster(): converged after " << iteration
255 <<
" iterations." << std::endl;
259 Log::Info <<
"KMeans::Cluster(): terminated after limit of " << iteration
260 <<
" iterations." << std::endl;
262 Log::Info << lloydStep.DistanceCalculations() <<
" distance calculations." 270 template<
typename MetricType,
271 typename InitialPartitionPolicy,
272 typename EmptyClusterPolicy,
273 template<
class,
class>
class LloydStepType,
277 InitialPartitionPolicy,
282 const size_t clusters,
283 arma::Row<size_t>& assignments,
284 arma::mat& centroids,
285 const bool initialAssignmentGuess,
286 const bool initialCentroidGuess)
289 if (initialAssignmentGuess)
291 if (assignments.n_elem != data.n_cols)
292 Log::Fatal <<
"KMeans::Cluster(): initial cluster assignments (length " 293 << assignments.n_elem <<
") not the same size as the dataset (size " 294 << data.n_cols <<
")!" << std::endl;
297 arma::Row<size_t> counts;
298 counts.zeros(clusters);
299 centroids.zeros(data.n_rows, clusters);
300 for (
size_t i = 0; i < data.n_cols; ++i)
302 centroids.col(assignments[i]) += arma::vec(data.col(i));
303 counts[assignments[i]]++;
306 for (
size_t i = 0; i < clusters; ++i)
308 centroids.col(i) /= counts[i];
311 Cluster(data, clusters, centroids,
312 initialAssignmentGuess || initialCentroidGuess);
315 assignments.set_size(data.n_cols);
317 #pragma omp parallel for 318 for (omp_size_t i = 0; i < (omp_size_t) data.n_cols; ++i)
321 double minDistance = std::numeric_limits<double>::infinity();
322 size_t closestCluster = centroids.n_cols;
324 for (
size_t j = 0; j < centroids.n_cols; ++j)
326 const double distance = metric.Evaluate(data.col(i), centroids.col(j));
328 if (distance < minDistance)
330 minDistance = distance;
336 assignments[i] = closestCluster;
340 template<
typename MetricType,
341 typename InitialPartitionPolicy,
342 typename EmptyClusterPolicy,
343 template<
class,
class>
class LloydStepType,
345 template<
typename Archive>
347 InitialPartitionPolicy,
352 ar(CEREAL_NVP(maxIterations));
353 ar(CEREAL_NVP(metric));
354 ar(CEREAL_NVP(partitioner));
355 ar(CEREAL_NVP(emptyClusterAction));
void Cluster(const MatType &data, const size_t clusters, arma::Row< size_t > &assignments, const bool initialGuess=false)
Perform k-means clustering on the data, returning a list of cluster assignments.
Definition: kmeans_impl.hpp:124
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void serialize(Archive &ar, const uint32_t version)
Serialize the k-means object.
Definition: kmeans_impl.hpp:350
HAS_MEM_FUNC(Cluster, GivesCentroidsCheck)
This gives us a GivesCentroids object that we can use to tell whether or not an InitialPartitionPolic...
bool GetInitialAssignmentsOrCentroids(InitialPartitionPolicy &ipp, const MatType &data, const size_t clusters, arma::Row< size_t > &assignments, arma::mat &, const typename std::enable_if_t< !GivesCentroids< InitialPartitionPolicy >::value > *=0)
Call the initial partition policy, if it returns assignments.
Definition: kmeans_impl.hpp:49
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
'value' is true if the InitialPartitionPolicy class has a member Cluster(const arma::mat& data...
Definition: kmeans_impl.hpp:32
This class implements K-Means clustering, using a variety of possible implementations of Lloyd's algo...
Definition: kmeans.hpp:73
static void Assert(bool condition, const std::string &message="Assert Failed.")
Checks if the specified condition is true.
Definition: log.cpp:38