mlpack
adaboost_impl.hpp
1 /*
2  * @file methods/adaboost/adaboost_impl.hpp
3  * @author Udit Saxena
4  *
5  * Implementation of the AdaBoost class.
6  *
7  * @code
8  * @article{schapire1999improved,
9  * author = {Schapire, Robert E. and Singer, Yoram},
10  * title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
11  * journal = {Machine Learning},
12  * volume = {37},
13  * number = {3},
14  * month = dec,
15  * year = {1999},
16  * issn = {0885-6125},
17  * pages = {297--336},
18  * }
19  * @endcode
20  *
21  * mlpack is free software; you may redistribute it and/or modify it under the
22  * terms of the 3-clause BSD license. You should have received a copy of the
23  * 3-clause BSD license along with mlpack. If not, see
24  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
25  */
26 #ifndef MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
27 #define MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
28 
29 #include "adaboost.hpp"
30 
31 namespace mlpack {
32 namespace adaboost {
33 
43 template<typename WeakLearnerType, typename MatType>
45  const MatType& data,
46  const arma::Row<size_t>& labels,
47  const size_t numClasses,
48  const WeakLearnerType& other,
49  const size_t iterations,
50  const double tol)
51 {
52  Train(data, labels, numClasses, other, iterations, tol);
53 }
54 
55 // Empty constructor.
56 template<typename WeakLearnerType, typename MatType>
58  numClasses(0),
59  tolerance(tolerance)
60 {
61  // Nothing to do.
62 }
63 
64 // Train AdaBoost.
65 template<typename WeakLearnerType, typename MatType>
67  const MatType& data,
68  const arma::Row<size_t>& labels,
69  const size_t numClasses,
70  const WeakLearnerType& other,
71  const size_t iterations,
72  const double tolerance)
73 {
74  // Clear information from previous runs.
75  wl.clear();
76  alpha.clear();
77 
78  this->tolerance = tolerance;
79  this->numClasses = numClasses;
80 
81  // crt is the cumulative rt value for terminating the optimization when rt is
82  // changing by less than the tolerance.
83  double rt, crt = 0.0, alphat = 0.0, zt;
84 
85  double ztProduct = 1.0;
86 
87  // To be used for prediction by the weak learner.
88  arma::Row<size_t> predictedLabels(labels.n_cols);
89 
90  // Use tempData to modify input data for incorporating weights.
91  MatType tempData(data);
92 
93  // This matrix is a helper matrix used to calculate the final hypothesis.
94  arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
95  predictedLabels.n_cols);
96 
97  // Load the initial weights into a 2-D matrix.
98  const double initWeight = 1.0 / double(data.n_cols * numClasses);
99  arma::mat D(numClasses, data.n_cols);
100  D.fill(initWeight);
101 
102  // Weights are stored in this row vector.
103  arma::rowvec weights(predictedLabels.n_cols);
104 
105  // This is the final hypothesis.
106  arma::Row<size_t> finalH(predictedLabels.n_cols);
107 
108  // Now, start the boosting rounds.
109  for (size_t i = 0; i < iterations; ++i)
110  {
111  // Initialized to zero in every round. rt is used for calculation of
112  // alphat; it is the weighted error.
113  // rt = (sum) D(i) y(i) ht(xi)
114  rt = 0.0;
115 
116  // zt is used for weight normalization.
117  zt = 0.0;
118 
119  // Build the weight vectors.
120  weights = arma::sum(D);
121 
122  // Use the existing weak learner to train a new one with new weights.
123  WeakLearnerType w(other, tempData, labels, numClasses, weights);
124  w.Classify(tempData, predictedLabels);
125 
126  // Now from predictedLabels, build ht, the weak hypothesis
127  // buildClassificationMatrix(ht, predictedLabels);
128 
129  // Now, calculate alpha(t) using ht.
130  for (size_t j = 0; j < D.n_cols; ++j) // instead of D, ht
131  {
132  if (predictedLabels(j) == labels(j))
133  rt += arma::accu(D.col(j));
134  else
135  rt -= arma::accu(D.col(j));
136  }
137 
138  if ((i > 0) && (std::abs(rt - crt) < tolerance))
139  break;
140 
141  // Check if model has converged.
142  if (rt >= 1.0)
143  {
144  // Save the weak learner and terminate.
145  alpha.push_back(1.0);
146  wl.push_back(w);
147  break;
148  }
149 
150  crt = rt;
151 
152  // Our goal is to find alphat which mizimizes or approximately minimizes the
153  // value of Z as a function of alpha.
154  alphat = 0.5 * log((1 + rt) / (1 - rt));
155 
156  alpha.push_back(alphat);
157  wl.push_back(w);
158 
159  // Now start modifying the weights.
160  for (size_t j = 0; j < D.n_cols; ++j)
161  {
162  const double expo = exp(alphat);
163  if (predictedLabels(j) == labels(j))
164  {
165  for (size_t k = 0; k < D.n_rows; ++k)
166  {
167  // We calculate zt, the normalization constant.
168  D(k, j) /= expo;
169  zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));
170 
171  // Add to the final hypothesis matrix.
172  // sumFinalH(k, j) += (alphat * ht(k, j));
173  if (k == labels(j))
174  sumFinalH(k, j) += (alphat); // * ht(k, j));
175  else
176  sumFinalH(k, j) -= (alphat);
177  }
178  }
179  else
180  {
181  for (size_t k = 0; k < D.n_rows; ++k)
182  {
183  // We calculate zt, the normalization constant.
184  D(k, j) *= expo;
185  zt += D(k, j);
186 
187  // Add to the final hypothesis matrix.
188  if (k == labels(j))
189  sumFinalH(k, j) += alphat; // * ht(k, j));
190  else
191  sumFinalH(k, j) -= alphat;
192  }
193  }
194  }
195 
196  // Normalize D.
197  D /= zt;
198 
199  // Accumulate the value of zt for the Hamming loss bound.
200  ztProduct *= zt;
201  }
202  return ztProduct;
203 }
204 
208 template<typename WeakLearnerType, typename MatType>
210  const MatType& test,
211  arma::Row<size_t>& predictedLabels)
212 {
213  arma::Row<size_t> tempPredictedLabels(test.n_cols);
214  arma::mat probabilities;
215 
216  Classify(test, predictedLabels, probabilities);
217 }
218 
222 template<typename WeakLearnerType, typename MatType>
224  const MatType& test,
225  arma::Row<size_t>& predictedLabels,
226  arma::mat& probabilities)
227 {
228  arma::Row<size_t> tempPredictedLabels(test.n_cols);
229 
230  probabilities.zeros(numClasses, test.n_cols);
231  predictedLabels.set_size(test.n_cols);
232 
233  for (size_t i = 0; i < wl.size(); ++i)
234  {
235  wl[i].Classify(test, tempPredictedLabels);
236 
237  for (size_t j = 0; j < tempPredictedLabels.n_cols; ++j)
238  probabilities(tempPredictedLabels(j), j) += alpha[i];
239  }
240 
241  arma::colvec pRow;
242  arma::uword maxIndex = 0;
243 
244  for (size_t i = 0; i < predictedLabels.n_cols; ++i)
245  {
246  probabilities.col(i) /= arma::accu(probabilities.col(i));
247  pRow = probabilities.unsafe_col(i);
248  pRow.max(maxIndex);
249  predictedLabels(i) = maxIndex;
250  }
251 }
252 
256 template<typename WeakLearnerType, typename MatType>
257 template<typename Archive>
259  const uint32_t /* version */)
260 {
261  ar(CEREAL_NVP(numClasses));
262  ar(CEREAL_NVP(tolerance));
263  ar(CEREAL_NVP(alpha));
264 
265  // Now serialize each weak learner.
266  if (cereal::is_loading<Archive>())
267  {
268  wl.clear();
269  wl.resize(alpha.size());
270  }
271  ar(CEREAL_NVP(wl));
272 }
273 
274 } // namespace adaboost
275 } // namespace mlpack
276 
277 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The AdaBoost class.
Definition: adaboost.hpp:81
AdaBoost(const MatType &data, const arma::Row< size_t > &labels, const size_t numClasses, const WeakLearnerType &other, const size_t iterations=100, const double tolerance=1e-6)
Constructor.
Definition: adaboost_impl.hpp:44
Definition: hmm_train_main.cpp:300