mlpack
linear_svm_function_impl.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_LINEAR_SVM_LINEAR_SVM_FUNCTION_IMPL_HPP
15 #define MLPACK_METHODS_LINEAR_SVM_LINEAR_SVM_FUNCTION_IMPL_HPP
16 
19 
20 // In case it hasn't been included yet.
21 #include "linear_svm_function.hpp"
22 
23 namespace mlpack {
24 namespace svm {
25 
26 template <typename MatType>
28  const MatType& dataset,
29  const arma::Row<size_t>& labels,
30  const size_t numClasses,
31  const double lambda,
32  const double delta,
33  const bool fitIntercept) :
34  dataset(math::MakeAlias(const_cast<MatType&>(dataset), false)),
35  numClasses(numClasses),
36  lambda(lambda),
37  delta(delta),
38  fitIntercept(fitIntercept)
39 {
40  InitializeWeights(initialPoint, dataset.n_rows, numClasses, fitIntercept);
41  initialPoint *= 0.005;
42 
43  // Calculate the label matrix.
44  GetGroundTruthMatrix(labels, groundTruth);
45 }
46 
52 template <typename MatType>
54  arma::mat &weights,
55  const size_t featureSize,
56  const size_t numClasses,
57  const bool fitIntercept)
58 {
59  // Initialize values to 0.005 * r. 'r' is a matrix of random values taken from
60  // a Gaussian distribution with mean zero and variance one.
61  if (fitIntercept)
62  weights.randn(featureSize + 1, numClasses);
63  else
64  weights.randn(featureSize, numClasses);
65  weights *= 0.005;
66 }
67 
73 template <typename MatType>
75  const arma::Row<size_t>& labels,
76  arma::sp_mat& groundTruth)
77 {
78  // Calculate the ground truth matrix according to the labels passed. The
79  // ground truth matrix is a matrix of dimensions 'numClasses * numExamples',
80  // where each column contains a single entry of '1', marking the label
81  // corresponding to that example.
82 
83  // Row pointers and column pointers corresponding to the entries.
84  arma::uvec rowPointers(labels.n_elem);
85  arma::uvec colPointers(labels.n_elem + 1);
86 
87  // colPointers[0] needs to be set to 0.
88  colPointers[0] = 0;
89 
90  // Row pointers are the labels of the examples, and column pointers are the
91  // number of cumulative entries made uptil that column.
92  for (size_t i = 0; i < labels.n_elem; ++i)
93  {
94  rowPointers(i) = labels(i);
95  colPointers(i + 1) = i + 1;
96  }
97 
98  // All entries are '1'.
99  arma::vec values;
100  values.ones(labels.n_elem);
101 
102  // Calculate the matrix.
103  groundTruth = arma::sp_mat(rowPointers, colPointers, values, numClasses,
104  labels.n_elem);
105 }
106 
110 template <typename MatType>
112 {
113  // Determine new ordering.
114  arma::uvec ordering = arma::shuffle(arma::linspace<arma::uvec>(0,
115  dataset.n_cols - 1, dataset.n_cols));
116 
117  // Re-sort data.
118  arma::mat newData = dataset.cols(ordering);
119  math::ClearAlias(dataset);
120  dataset = std::move(newData);
121 
122  // Assemble data for batch constructor. We need reverse orderings though...
123  arma::uvec reverseOrdering(ordering.n_elem);
124  for (size_t i = 0; i < ordering.n_elem; ++i)
125  reverseOrdering[ordering[i]] = i;
126 
127  arma::umat newLocations(2, groundTruth.n_nonzero);
128  arma::vec values(groundTruth.n_nonzero);
129  arma::sp_mat::const_iterator it = groundTruth.begin();
130  size_t loc = 0;
131  while (it != groundTruth.end())
132  {
133  newLocations(0, loc) = reverseOrdering(it.col());
134  newLocations(1, loc) = it.row();
135  values(loc) = (*it);
136 
137  ++it;
138  ++loc;
139  }
140 
141  groundTruth = arma::sp_mat(newLocations, values, groundTruth.n_rows,
142  groundTruth.n_cols);
143 }
144 
145 template <typename MatType>
147  const arma::mat& parameters)
148 {
149  // The objective function is the hinge loss function and it is
150  // calculated over all the training examples.
151 
152  // Calculate the loss and regularization terms.
153  // L_i = Σ_i Σ_m max(0, Δ + (w_m x_i + b_m) - (w_{y_i} x_i + b_{y_i}))
154  // where (m != y_i)
155  double loss, regularization;
156 
157  // Scores for each class are evaluated.
158  arma::mat scores;
159 
160  // Check intercept condition.
161  if (!fitIntercept)
162  {
163  scores = parameters.t() * dataset;
164  }
165  else
166  {
167  // When using `fitIntercept` we need to add the `b_i` term explicitly.
168  // The first `parameters.n_rows - 1` rows of parameters holds the value
169  // of Weights `w_i`, and the last row holds `b_i`.
170  // On calculating the score, we add `b_i` term to each element of
171  // `i_th` row of `scores`.
172  scores = parameters.rows(0, dataset.n_rows - 1).t() * dataset
173  + arma::repmat(parameters.row(dataset.n_rows).t(), 1,
174  dataset.n_cols);
175  }
176 
177  // Evaluate the margin by the following steps:
178  // - Subtracting the score of correct class from all the class scores.
179  // - Adding the margin parameter `delta`.
180  // - Removing the `delta` parameter from correct class label in each
181  // column.
182  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
183  * (scores % groundTruth), numClasses, 1)) + delta
184  - (delta * groundTruth);
185 
186  // The Hinge Loss Function
187  loss = arma::accu(arma::clamp(margin, 0.0, DBL_MAX)) / dataset.n_cols;
188 
189  // Adding the regularization term.
190  regularization = 0.5 * lambda * arma::dot(parameters, parameters);
191 
192  return loss + regularization;
193 }
194 
195 template <typename MatType>
197  const arma::mat& parameters,
198  const size_t firstId,
199  const size_t batchSize)
200 {
201  const size_t lastId = firstId + batchSize - 1;
202 
203  // Calculate the loss and regularization terms.
204  double loss, regularization, cost;
205 
206  // Scores for each class are evaluated.
207  arma::mat scores;
208 
209  // Check intercept condition.
210  if (!fitIntercept)
211  {
212  scores = parameters.t() * dataset.cols(firstId, lastId);
213  }
214  else
215  {
216  scores = parameters.rows(0, dataset.n_rows - 1).t()
217  * dataset.cols(firstId, lastId)
218  + arma::repmat(parameters.row(dataset.n_rows).t(), 1,
219  dataset.n_cols);
220  }
221 
222  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
223  * (scores % groundTruth.cols(firstId, lastId)), numClasses, 1))
224  + delta - (delta * groundTruth.cols(firstId, lastId));
225 
226  // The Hinge Loss Function
227  loss = arma::accu(arma::clamp(margin, 0.0, DBL_MAX));
228  loss /= batchSize;
229 
230  // Adding the regularization term.
231  regularization = 0.5 * lambda * arma::dot(parameters, parameters);
232 
233  cost = loss + regularization;
234  return cost;
235 }
236 
237 template <typename MatType>
238 template <typename GradType>
240  const arma::mat& parameters,
241  GradType& gradient)
242 {
243  // The objective is to minimize the loss, which is evaluated as the sum
244  // of all the positive elements of `margin` matrix.
245  // So, we focus of these positive elements and reduce them.
246  // Also, we need to increase the score of the correct class.
247 
248  // Scores for each class are evaluated.
249  arma::mat scores;
250 
251  if (!fitIntercept)
252  {
253  scores = parameters.t() * dataset;
254  }
255  else
256  {
257  scores = parameters.rows(0, dataset.n_rows - 1).t() * dataset
258  + arma::repmat(parameters.row(dataset.n_rows).t(), 1,
259  dataset.n_cols);
260  }
261 
262  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
263  * (scores % groundTruth), numClasses, 1)) + delta
264  - (delta * groundTruth);
265 
266  // An element of `mask` matrix holds `1` corresponding to
267  // each positive element of `margin` matrix.
268  arma::mat mask = margin.for_each([](arma::mat::elem_type& val)
269  { val = (val > 0) ? 1: 0; });
270 
271  arma::mat difference = groundTruth
272  % (-arma::repmat(arma::sum(mask), numClasses, 1)) + mask;
273 
274  // The gradient is evaluated as follows:
275  // - Add `x_i` to `w_j` if `margin_i_m`is positive.
276  // - Subtract `x_i` from `w_y_i` for each positive
277  // `margin_i_j`.
278  // - Take the average over the size of dataset.
279  // - Add the regularization parameter.
280 
281  // Check intercept condition
282  if (!fitIntercept)
283  {
284  gradient = dataset * difference.t();
285  }
286  else
287  {
288  gradient.set_size(arma::size(parameters));
289  gradient.submat(0, 0, parameters.n_rows - 2, parameters.n_cols - 1) =
290  dataset * difference.t();
291  gradient.row(parameters.n_rows - 1) =
292  arma::ones<arma::rowvec>(dataset.n_cols) * difference.t();
293  }
294 
295  gradient /= dataset.n_cols;
296 
297  // Adding the regularization contribution to the gradient.
298  gradient += lambda * parameters;
299 }
300 
301 template <typename MatType>
302 template <typename GradType>
304  const arma::mat& parameters,
305  const size_t firstId,
306  GradType& gradient,
307  const size_t batchSize)
308 {
309  const size_t lastId = firstId + batchSize - 1;
310 
311  // Scores for each class are evaluated.
312  arma::mat scores;
313 
314  // Check intercept condition.
315  if (!fitIntercept)
316  {
317  scores = parameters.t() * dataset.cols(firstId, lastId);
318  }
319  else
320  {
321  scores = parameters.rows(0, dataset.n_rows - 1).t()
322  * dataset.cols(firstId, lastId)
323  + arma::repmat(parameters.row(dataset.n_rows).t(), 1, batchSize);
324  }
325 
326  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
327  * (scores % groundTruth.cols(firstId, lastId)), numClasses, 1))
328  + delta - (delta * groundTruth.cols(firstId, lastId));
329 
330  // For each sample, find the total number of classes where
331  // ( margin > 0 ).
332  arma::mat mask = margin.for_each([](arma::mat::elem_type& val)
333  { val = (val > 0) ? 1: 0; });
334 
335  arma::mat difference = groundTruth.cols(firstId, lastId)
336  % (-arma::repmat(arma::sum(mask), numClasses, 1)) + mask;
337 
338  // Check intercept condition
339  if (!fitIntercept)
340  {
341  gradient = dataset.cols(firstId, lastId) * difference.t();
342  }
343  else
344  {
345  gradient.set_size(arma::size(parameters));
346  gradient.submat(0, 0, parameters.n_rows - 2, parameters.n_cols - 1) =
347  dataset.cols(firstId, lastId) * difference.t();
348  gradient.row(parameters.n_rows - 1) =
349  arma::ones<arma::rowvec>(batchSize) * difference.t();
350  }
351 
352  gradient /= batchSize;
353 
354  // Adding the regularization contribution to the gradient.
355  gradient += lambda * parameters;
356 }
357 
358 template <typename MatType>
359 template <typename GradType>
361  const arma::mat& parameters,
362  GradType& gradient) const
363 {
364  double loss, regularization, cost;
365 
366  // Scores for each class are evaluated.
367  arma::mat scores;
368 
369  if (!fitIntercept)
370  {
371  scores = parameters.t() * dataset;
372  }
373  else
374  {
375  scores = parameters.rows(0, dataset.n_rows - 1).t() * dataset
376  + arma::repmat(parameters.row(dataset.n_rows).t(), 1,
377  dataset.n_cols);
378  }
379 
380  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
381  * (scores % groundTruth), numClasses, 1)) + delta
382  - (delta * groundTruth);
383 
384  // For each sample, find the total number of classes where
385  // ( margin > 0 ).
386  arma::mat mask = margin.for_each([](arma::mat::elem_type& val)
387  { val = (val > 0) ? 1: 0; });
388 
389  arma::mat difference = groundTruth
390  % (-arma::repmat(arma::sum(mask), numClasses, 1)) + mask;
391 
392  // Check intercept condition
393  if (!fitIntercept)
394  {
395  gradient = dataset * difference.t();
396  }
397  else
398  {
399  gradient.set_size(arma::size(parameters));
400  gradient.submat(0, 0, parameters.n_rows - 2, parameters.n_cols - 1) =
401  dataset * difference.t();
402  gradient.row(parameters.n_rows - 1) =
403  arma::ones<arma::rowvec>(dataset.n_cols) * difference.t();
404  }
405 
406  gradient /= dataset.n_cols;
407 
408  // Adding the regularization contribution to the gradient.
409  gradient += lambda * parameters;
410 
411  // The Hinge Loss Function
412  loss = arma::accu(arma::clamp(margin, 0.0, DBL_MAX));
413  loss /= dataset.n_cols;
414 
415  // Adding the regularization term.
416  regularization = 0.5 * lambda * arma::dot(parameters, parameters);
417 
418  cost = loss + regularization;
419  return cost;
420 }
421 
422 template <typename MatType>
423 template <typename GradType>
425  const arma::mat& parameters,
426  const size_t firstId,
427  GradType& gradient,
428  const size_t batchSize) const
429 {
430  const size_t lastId = firstId + batchSize - 1;
431 
432  // Calculate the loss and regularization terms.
433  double loss, regularization, cost;
434 
435  // Scores for each class are evaluated.
436  arma::mat scores;
437 
438  // Check intercept condition.
439  if (!fitIntercept)
440  {
441  scores = parameters.t() * dataset.cols(firstId, lastId);
442  }
443  else
444  {
445  scores = parameters.rows(0, dataset.n_rows - 1).t()
446  * dataset.cols(firstId, lastId)
447  + arma::repmat(parameters.row(dataset.n_rows).t(), 1, dataset.n_cols);
448  }
449 
450  arma::mat margin = scores - (arma::repmat(arma::ones(numClasses).t()
451  * (scores % groundTruth.cols(firstId, lastId)), numClasses, 1))
452  + delta - (delta * groundTruth.cols(firstId, lastId));
453 
454  // For each sample, find the total number of classes where
455  // ( margin > 0 ).
456  arma::mat mask = margin.for_each([](arma::mat::elem_type& val)
457  { val = (val > 0) ? 1: 0; });
458 
459  arma::mat difference = groundTruth.cols(firstId, lastId)
460  % (-arma::repmat(arma::sum(mask), numClasses, 1)) + mask;
461 
462  // Check intercept condition
463  if (!fitIntercept)
464  {
465  gradient = dataset.cols(firstId, lastId) * difference.t();
466  }
467  else
468  {
469  gradient.set_size(arma::size(parameters));
470  gradient.submat(0, 0, parameters.n_rows - 2, parameters.n_cols - 1) =
471  dataset.cols(firstId, lastId) * difference.t();
472  gradient.row(parameters.n_rows - 1) =
473  arma::ones<arma::rowvec>(batchSize) * difference.t();
474  }
475 
476  gradient /= batchSize;
477 
478 
479  // Adding the regularization contribution to the gradient.
480  gradient += lambda * parameters;
481 
482  // The Hinge Loss Function
483  loss = arma::accu(arma::clamp(margin.cols(firstId, lastId), 0.0, DBL_MAX));
484  loss /= batchSize;
485 
486  // Adding the regularization term.
487  regularization = 0.5 * lambda * arma::dot(parameters, parameters);
488 
489  cost = loss + regularization;
490  return cost;
491 }
492 
493 template <typename MatType>
495 {
496  // The number of points in the dataset is the number of functions, as this
497  // is a data dependent function.
498  return dataset.n_cols;
499 }
500 
501 } // namespace svm
502 } // namespace mlpack
503 
504 
505 #endif // MLPACK_METHODS_LINEAR_SVM_LINEAR_SVM_FUNCTION_IMPL_HPP
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void GetGroundTruthMatrix(const arma::Row< size_t > &labels, arma::sp_mat &groundTruth)
Constructs the ground truth label matrix with the passed labels.
Definition: linear_svm_function_impl.hpp:74
double EvaluateWithGradient(const arma::mat &parameters, GradType &gradient) const
Evaluate the gradient of the hinge loss function, following the LinearFunctionType requirements on th...
Definition: linear_svm_function_impl.hpp:360
void Gradient(const arma::mat &parameters, GradType &gradient)
Evaluate the gradient of the hinge loss function following the LinearFunctionType requirements on the...
Definition: linear_svm_function_impl.hpp:239
LinearSVMFunction(const MatType &dataset, const arma::Row< size_t > &labels, const size_t numClasses, const double lambda=0.0001, const double delta=1.0, const bool fitIntercept=false)
Construct the Linear SVM objective function with given parameters.
Definition: linear_svm_function_impl.hpp:27
double Evaluate(const arma::mat &parameters)
Evaluate the hinge loss function for all the datapoints.
Definition: linear_svm_function_impl.hpp:146
size_t NumFunctions() const
Return the number of functions.
Definition: linear_svm_function_impl.hpp:494
void ClearAlias(arma::Mat< ElemType > &mat)
Clear an alias so that no data is overwritten.
Definition: make_alias.hpp:110
void Shuffle()
Shuffle the dataset.
Definition: linear_svm_function_impl.hpp:111
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
static void InitializeWeights(arma::mat &weights, const size_t featureSize, const size_t numClasses, const bool fitIntercept=false)
Initialize Linear SVM weights (trainable parameters) with the given parameters.
Definition: linear_svm_function_impl.hpp:53