12 #ifndef MLPACK_METHODS_DBSCAN_DBSCAN_IMPL_HPP 13 #define MLPACK_METHODS_DBSCAN_DBSCAN_IMPL_HPP 23 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
26 const size_t minPoints,
28 RangeSearchType rangeSearch,
29 PointSelectionPolicy pointSelector) :
33 rangeSearch(rangeSearch),
34 pointSelector(pointSelector)
43 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
44 template<
typename MatType>
51 arma::Row<size_t> assignments(data.n_cols);
52 assignments.fill(SIZE_MAX);
54 return Cluster(data, assignments, centroids);
61 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
62 template<
typename MatType>
65 arma::Row<size_t>& assignments,
68 const size_t numClusters =
Cluster(data, assignments);
71 centroids.zeros(data.n_rows, numClusters);
74 arma::Row<size_t> counts;
75 counts.zeros(numClusters);
76 for (
size_t i = 0; i < data.n_cols; ++i)
78 if (assignments[i] != SIZE_MAX)
80 centroids.col(assignments[i]) += data.col(i);
81 ++counts[assignments[i]];
87 for (
size_t i = 0; i < numClusters; ++i)
88 centroids.col(i) /= counts[i];
97 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
98 template<
typename MatType>
101 arma::Row<size_t>& assignments)
105 rangeSearch.Train(data);
108 BatchCluster(data, uf);
110 PointwiseCluster(data, uf);
113 assignments.set_size(data.n_cols);
114 for (
size_t i = 0; i < data.n_cols; ++i)
115 assignments[i] = uf.
Find(i);
118 const size_t numClusters = arma::max(assignments) + 1;
119 arma::Col<size_t> counts(numClusters, arma::fill::zeros);
120 for (
size_t i = 0; i < assignments.n_elem; ++i)
121 counts[assignments[i]]++;
124 size_t currentCluster = 0;
125 arma::Col<size_t> newAssignments(numClusters);
126 for (
size_t i = 0; i < counts.n_elem; ++i)
128 if (counts[i] >= minPoints)
129 newAssignments[i] = currentCluster++;
131 newAssignments[i] = SIZE_MAX;
135 for (
size_t i = 0; i < assignments.n_elem; ++i)
136 assignments[i] = newAssignments[assignments[i]];
138 Log::Info << currentCluster <<
" clusters found." << std::endl;
140 return currentCluster;
149 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
150 template<
typename MatType>
155 std::vector<std::vector<size_t>> neighbors;
156 std::vector<std::vector<double>> distances;
158 for (
size_t i = 0; i < data.n_cols; ++i)
160 if (i % 10000 == 0 && i > 0)
161 Log::Info <<
"DBSCAN clustering on point " << i <<
"..." << std::endl;
164 rangeSearch.Search(data.col(i),
math::Range(0.0, epsilon), neighbors,
168 for (
size_t j = 0; j < neighbors[0].size(); ++j)
169 uf.
Union(i, neighbors[0][j]);
178 template<
typename RangeSearchType,
typename Po
intSelectionPolicy>
179 template<
typename MatType>
185 std::vector<std::vector<size_t>> neighbors;
186 std::vector<std::vector<double>> distances;
187 Log::Info <<
"Performing range search." << std::endl;
188 rangeSearch.Train(data);
189 rangeSearch.Search(data,
math::Range(0.0, epsilon), neighbors, distances);
190 Log::Info <<
"Range search complete." << std::endl;
193 for (
size_t i = 0; i < data.n_cols; ++i)
196 const size_t index = pointSelector.Select(i, data);
197 for (
size_t j = 0; j < neighbors[index].size(); ++j)
198 uf.
Union(index, neighbors[index][j]);
A Union-Find data structure.
Definition: union_find.hpp:30
DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a clustering technique descri...
Definition: dbscan.hpp:52
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void Union(const size_t x, const size_t y)
Union the components containing x and y.
Definition: union_find.hpp:76
DBSCAN(const double epsilon, const size_t minPoints, const bool batchMode=true, RangeSearchType rangeSearch=RangeSearchType(), PointSelectionPolicy pointSelector=PointSelectionPolicy())
Construct the DBSCAN object with the given parameters.
Definition: dbscan_impl.hpp:24
RangeType< double > Range
3.0.0 TODO: break reverse-compatibility by changing RangeType to Range.
Definition: range.hpp:19
size_t Cluster(const MatType &data, arma::mat ¢roids)
Performs DBSCAN clustering on the data, returning number of clusters and also the centroid of each cl...
Definition: dbscan_impl.hpp:45
size_t Find(const size_t x)
Returns the component containing an element.
Definition: union_find.hpp:56
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84