mlpack
pca_impl.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_PCA_PCA_IMPL_HPP
17 #define MLPACK_METHODS_PCA_PCA_IMPL_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 #include "pca.hpp"
22 
23 namespace mlpack {
24 namespace pca {
25 
26 template<typename DecompositionPolicy>
28  const bool scaleData, const DecompositionPolicy& decomposition) :
29  scaleData(scaleData),
30  decomposition(decomposition)
31 { }
32 
41 template<typename DecompositionPolicy>
42 void PCA<DecompositionPolicy>::Apply(const arma::mat& data,
43  arma::mat& transformedData,
44  arma::vec& eigVal,
45  arma::mat& eigvec)
46 {
47  Timer::Start("pca");
48 
49  // Center the data into a temporary matrix.
50  arma::mat centeredData;
51  math::Center(data, centeredData);
52 
53  // Scale the data if the user ask for.
54  ScaleData(centeredData);
55 
56  decomposition.Apply(data, centeredData, transformedData, eigVal, eigvec,
57  data.n_rows);
58 
59  Timer::Stop("pca");
60 }
61 
69 template<typename DecompositionPolicy>
70 void PCA<DecompositionPolicy>::Apply(const arma::mat& data,
71  arma::mat& transformedData,
72  arma::vec& eigVal)
73 {
74  arma::mat eigvec;
75  Apply(data, transformedData, eigVal, eigvec);
76 }
77 
84 template<typename DecompositionPolicy>
85 void PCA<DecompositionPolicy>::Apply(const arma::mat& data,
86  arma::mat& transformedData)
87 {
88  arma::mat eigvec;
89  arma::vec eigVal;
90  Apply(data, transformedData, eigVal, eigvec);
91 }
92 
104 template<typename DecompositionPolicy>
106  const size_t newDimension)
107 {
108  // Parameter validation.
109  if (newDimension == 0)
110  Log::Fatal << "PCA::Apply(): newDimension (" << newDimension << ") cannot "
111  << "be zero!" << std::endl;
112  if (newDimension > data.n_rows)
113  Log::Fatal << "PCA::Apply(): newDimension (" << newDimension << ") cannot "
114  << "be greater than the existing dimensionality of the data ("
115  << data.n_rows << ")!" << std::endl;
116 
117  arma::mat eigvec;
118  arma::vec eigVal;
119 
120  Timer::Start("pca");
121 
122  // Center the data into a temporary matrix.
123  arma::mat centeredData;
124  math::Center(data, centeredData);
125 
126  // Scale the data if the user ask for.
127  ScaleData(centeredData);
128 
129  decomposition.Apply(data, centeredData, data, eigVal, eigvec, newDimension);
130 
131  if (newDimension < eigvec.n_rows)
132  // Drop unnecessary rows.
133  data.shed_rows(newDimension, data.n_rows - 1);
134 
135  // The svd method returns only non-zero eigenvalues so we have to calculate
136  // the right dimension before calculating the amount of variance retained.
137  double eigDim = std::min(newDimension - 1, (size_t) eigVal.n_elem - 1);
138 
139  Timer::Stop("pca");
140 
141  // Calculate the total amount of variance retained.
142  return (sum(eigVal.subvec(0, eigDim)) / sum(eigVal));
143 }
144 
155 template<typename DecompositionPolicy>
157  const double varRetained)
158 {
159  // Parameter validation.
160  if (varRetained < 0)
161  Log::Fatal << "PCA::Apply(): varRetained (" << varRetained << ") must be "
162  << "greater than or equal to 0." << std::endl;
163  if (varRetained > 1)
164  Log::Fatal << "PCA::Apply(): varRetained (" << varRetained << ") should be "
165  << "less than or equal to 1." << std::endl;
166 
167  arma::mat eigvec;
168  arma::vec eigVal;
169 
170  Apply(data, data, eigVal, eigvec);
171 
172  // Calculate the dimension we should keep.
173  size_t newDimension = 0;
174  double varSum = 0.0;
175  eigVal /= arma::sum(eigVal); // Normalize eigenvalues.
176  while ((varSum < varRetained) && (newDimension < eigVal.n_elem))
177  {
178  varSum += eigVal[newDimension];
179  ++newDimension;
180  }
181 
182  // varSum is the actual variance we will retain.
183  if (newDimension < eigVal.n_elem)
184  data.shed_rows(newDimension, data.n_rows - 1);
185 
186  return varSum;
187 }
188 
189 } // namespace pca
190 } // namespace mlpack
191 
192 #endif
static void Start(const std::string &name)
Start the given timer.
Definition: timers.cpp:28
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.
PCA(const bool scaleData=false, const DecompositionPolicy &decomposition=DecompositionPolicy())
Create the PCA object, specifying if the data should be scaled in each dimension by standard deviatio...
Definition: pca_impl.hpp:27
bool ScaleData() const
Get whether or not this PCA object will scale (by standard deviation) the data when PCA is performed...
Definition: pca.hpp:118
void Apply(const arma::mat &data, arma::mat &transformedData, arma::vec &eigVal, arma::mat &eigvec)
Apply Principal Component Analysis to the provided data set.
Definition: pca_impl.hpp:42
static void Stop(const std::string &name)
Stop the given timer.
Definition: timers.cpp:36
void Center(const arma::mat &x, arma::mat &xCentered)
Creates a centered matrix, where centering is done by subtracting the sum over the columns (a column ...
Definition: lin_alg.cpp:43