14 #ifndef MLPACK_METHODS_KMEANS_REFINED_START_IMPL_HPP 15 #define MLPACK_METHODS_KMEANS_REFINED_START_IMPL_HPP 24 template<
typename MatType>
26 const size_t clusters,
27 arma::mat& centroids)
const 30 const size_t numPoints = size_t(percentage * data.n_cols);
31 MatType sampledData(data.n_rows, numPoints);
33 std::vector<bool> pointsUsed(data.n_cols,
false);
34 arma::mat sampledCentroids(data.n_rows, samplings * clusters);
36 for (
size_t i = 0; i < samplings; ++i)
40 while (curSample < numPoints)
45 if (!pointsUsed[sample])
48 pointsUsed[sample] =
true;
49 sampledData.col(curSample) = data.col(sample);
59 kmeans.
Cluster(sampledData, clusters, centroids);
62 sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids;
64 pointsUsed.assign(data.n_cols,
false);
69 kmeans.
Cluster(sampledCentroids, clusters, centroids);
72 template<
typename MatType>
74 const size_t clusters,
75 arma::Row<size_t>& assignments)
const 80 Cluster(data, clusters, centroids);
83 assignments.set_size(data.n_cols);
84 for (
size_t i = 0; i < data.n_cols; ++i)
87 double minDistance = std::numeric_limits<double>::infinity();
88 size_t closestCluster = clusters;
90 for (
size_t j = 0; j < clusters; ++j)
99 if (distance < minDistance)
101 minDistance = distance;
107 assignments[i] = closestCluster;
void Cluster(const MatType &data, const size_t clusters, arma::Row< size_t > &assignments, const bool initialGuess=false)
Perform k-means clustering on the data, returning a list of cluster assignments.
Definition: kmeans_impl.hpp:124
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void Cluster(const MatType &data, const size_t clusters, arma::mat ¢roids) const
Partition the given dataset into the given number of clusters according to the random sampling scheme...
Definition: refined_start_impl.hpp:25
static VecTypeA::elem_type Evaluate(const VecTypeA &a, const VecTypeB &b)
Computes the distance between two points.
Definition: lmetric_impl.hpp:24
int RandInt(const int hiExclusive)
Generates a uniform random integer.
Definition: random.hpp:110
This class implements K-Means clustering, using a variety of possible implementations of Lloyd's algo...
Definition: kmeans.hpp:73