mlpack
svdplusplus_function_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_SVDPLUSPLUS_SVDPLUSPLUS_FUNCTION_IMPL_HPP
14 #define MLPACK_METHODS_SVDPLUSPLUS_SVDPLUSPLUS_FUNCTION_IMPL_HPP
15 
16 #include "svdplusplus_function.hpp"
18 
19 namespace mlpack {
20 namespace svd {
21 
22 template <typename MatType>
24  const MatType& data,
25  const arma::sp_mat& implicitData,
26  const size_t rank,
27  const double lambda) :
28  data(math::MakeAlias(const_cast<MatType&>(data), false)),
29  implicitData(implicitData),
30  rank(rank),
31  lambda(lambda)
32 {
33  // Number of users and items in the data.
34  numUsers = max(data.row(0)) + 1;
35  numItems = max(data.row(1)) + 1;
36 
37  // Initialize the parameters.
38  // Item matrix: submat(0, numUsers, rank - 1, numUsers + numItems - 1).t()
39  // User matrix: submat(0, 0, rank - 1, numUsers - 1)
40  // Item bias: row(rank).subvec(numUsers, numUsers + numItems - 1)
41  // User bias: row(rank).subvec(0, numUsers - 1)
42  // Item implicit matrix: submat(0, numUsers + numItems,
43  // rank - 1, numUsers + 2 * numItems - 1)
44  // Unused:
45  // row(rank).subvec(numUsers + numItems, numUsers + 2 * numItems - 1)
46  initialPoint.randu(rank + 1, numUsers + 2 * numItems);
47 }
48 
49 template<typename MatType>
51 {
52  data = data.cols(arma::shuffle(arma::linspace<arma::uvec>(0, data.n_cols - 1,
53  data.n_cols)));
54 }
55 
56 template <typename MatType>
57 double SVDPlusPlusFunction<MatType>::Evaluate(const arma::mat& parameters) const
58 {
59  return Evaluate(parameters, 0, data.n_cols);
60 }
61 
62 template <typename MatType>
63 double SVDPlusPlusFunction<MatType>::Evaluate(const arma::mat& parameters,
64  const size_t start,
65  const size_t batchSize) const
66 {
67  // The cost for the optimization is as follows:
68  // f(u, v, p, q, y) =
69  // sum((rating(i, j) - p(i) - q(j) - u(i).t() * (v(j) + sum(y(k))))^2)
70  // The sum is over all the ratings in the rating matrix.
71  // 'i' points to the user and 'j' points to the item being considered.
72  // 'k' points to the items which user 'i' interacted with.
73  // The regularization term is added to the above cost, where the vectors u(i)
74  // and v(j), bias p(i) and q(j), implicit vectors y(k) are regularized for
75  // each rating they contribute to.
76 
77  // It's possible this loop could be changed so that it's SIMD-vectorized.
78  double objective = 0.0;
79 
80  // The norm square of implicit item vectors is cached to avoid repeated
81  // calculation.
82  arma::vec implicitVecsNormSquare(numItems);
83  implicitVecsNormSquare.fill(-1);
84 
85  for (size_t i = start; i < start + batchSize; ++i)
86  {
87  // Indices for accessing the the correct parameter columns.
88  const size_t user = data(0, i);
89  const size_t item = data(1, i) + numUsers;
90  const size_t implicitStart = numUsers + numItems;
91 
92  // Calculate the squared error in the prediction.
93  const double rating = data(2, i);
94  const double userBias = parameters(rank, user);
95  const double itemBias = parameters(rank, item);
96 
97  // Iterate through each item which the user interacted with to calculate
98  // user vector.
99  arma::vec userVec(rank, arma::fill::zeros);
100  arma::sp_mat::const_iterator it = implicitData.begin_col(user);
101  arma::sp_mat::const_iterator it_end = implicitData.end_col(user);
102  size_t implicitCount = 0;
103  double regularizationError = 0;
104  for (; it != it_end; ++it)
105  {
106  userVec += parameters.col(implicitStart + it.row()).subvec(0, rank - 1);
107  if (implicitVecsNormSquare(it.row()) < 0)
108  {
109  implicitVecsNormSquare(it.row()) = arma::dot(
110  parameters.col(implicitStart + it.row()).subvec(0, rank - 1),
111  parameters.col(implicitStart + it.row()).subvec(0, rank - 1));
112  }
113  regularizationError += lambda * implicitVecsNormSquare(it.row());
114  implicitCount += 1;
115  }
116  if (implicitCount != 0)
117  {
118  userVec /= std::sqrt(implicitCount);
119  regularizationError /= implicitCount;
120  }
121  userVec += parameters.col(user).subvec(0, rank - 1);
122 
123  double ratingError = rating - userBias - itemBias -
124  arma::dot(userVec, parameters.col(item).subvec(0, rank - 1));
125  double ratingErrorSquared = ratingError * ratingError;
126 
127  // Calculate the regularization penalty corresponding to the parameters.
128  double userVecNorm = arma::norm(parameters.col(user), 2);
129  double itemVecNorm = arma::norm(parameters.col(item), 2);
130  regularizationError += lambda * (userVecNorm * userVecNorm +
131  itemVecNorm * itemVecNorm);
132 
133  objective += (ratingErrorSquared + regularizationError);
134  }
135 
136  return objective;
137 }
138 
139 template <typename MatType>
140 void SVDPlusPlusFunction<MatType>::Gradient(const arma::mat& parameters,
141  arma::mat& gradient) const
142 {
143  // For an example with rating corresponding to user 'i' and item 'j', the
144  // gradients for the parameters is as follows:
145  // uservec = v(j) + sum(y(k))
146  // grad(u(i)) = 2 * (lambda * u(i) - error * v(j))
147  // grad(v(j)) = 2 * (lambda * v(j) - error * uservec)
148  // grad(p(i)) = 2 * (lambda * p(i) - error)
149  // grad(q(j)) = 2 * (lambda * q(j) - error)
150  // grad(y(k)) = 2 * (lambda * y(k) - error / sqrt(N(u)) * v(j))
151  // 'error' is the prediction error for that example, which is:
152  // rating(i, j) - p(i) - q(j) - u(i).t() * (v(j) + sum(y(k)))
153  // The full gradient is calculated by summing the contributions over all the
154  // training examples.
155 
156  gradient.zeros(rank + 1, numUsers + 2 * numItems);
157 
158  for (size_t i = 0; i < data.n_cols; ++i)
159  {
160  // Indices for accessing the the correct parameter columns.
161  const size_t user = data(0, i);
162  const size_t item = data(1, i) + numUsers;
163  const size_t implicitStart = numUsers + numItems;
164 
165  // Calculate the squared error in the prediction.
166  const double rating = data(2, i);
167  const double userBias = parameters(rank, user);
168  const double itemBias = parameters(rank, item);
169 
170  // Iterate through each item which the user interacted with to calculate
171  // user vector.
172  arma::vec userVec(rank, arma::fill::zeros);
173  arma::sp_mat::const_iterator it = implicitData.begin_col(user);
174  arma::sp_mat::const_iterator it_end = implicitData.end_col(user);
175  size_t implicitCount = 0;
176  for (; it != it_end; ++it)
177  {
178  userVec += parameters.col(implicitStart + it.row()).subvec(0, rank - 1);
179  implicitCount += 1;
180  }
181  if (implicitCount != 0)
182  userVec /= std::sqrt(implicitCount);
183  userVec += parameters.col(user).subvec(0, rank - 1);
184 
185  double ratingError = rating - userBias - itemBias -
186  arma::dot(userVec, parameters.col(item).subvec(0, rank - 1));
187 
188  // Gradient is non-zero only for the parameter columns corresponding to the
189  // example.
190  gradient.col(user).subvec(0, rank - 1) +=
191  2 * (lambda * parameters.col(user).subvec(0, rank - 1) -
192  ratingError * parameters.col(item).subvec(0, rank - 1));
193  gradient.col(item).subvec(0, rank - 1) +=
194  2 * (lambda * parameters.col(item).subvec(0, rank - 1) -
195  ratingError * userVec);
196  gradient(rank, user) +=
197  2 * (lambda * parameters(rank, user) - ratingError);
198  gradient(rank, item) +=
199  2 * (lambda * parameters(rank, item) - ratingError);
200  // Calculate gradients for item implicit vector.
201  it = implicitData.begin_col(user);
202  it_end = implicitData.end_col(user);
203  for (; it != it_end; ++it)
204  {
205  // Note that implicitCount != 0 if this loop is acutally executed.
206  gradient.col(implicitStart + it.row()).subvec(0, rank - 1) +=
207  2.0 * (lambda / implicitCount *
208  parameters.col(implicitStart + it.row()).subvec(0, rank - 1) -
209  ratingError / std::sqrt(implicitCount) *
210  parameters.col(item).subvec(0, rank - 1));
211  }
212  }
213 }
214 
215 template <typename MatType>
216 template <typename GradType>
217 void SVDPlusPlusFunction<MatType>::Gradient(const arma::mat& parameters,
218  const size_t start,
219  GradType& gradient,
220  const size_t batchSize) const
221 {
222  gradient.zeros(rank + 1, numUsers + 2 * numItems);
223 
224  // It's possible this could be SIMD-vectorized for additional speedup.
225  for (size_t i = start; i < start + batchSize; ++i)
226  {
227  // Indices for accessing the the correct parameter columns.
228  const size_t user = data(0, i);
229  const size_t item = data(1, i) + numUsers;
230  const size_t implicitStart = numUsers + numItems;
231 
232  // Calculate the squared error in the prediction.
233  const double rating = data(2, i);
234  const double userBias = parameters(rank, user);
235  const double itemBias = parameters(rank, item);
236 
237  // Iterate through each item which the user interacted with to calculate
238  // user vector.
239  arma::vec userVec(rank, arma::fill::zeros);
240  arma::sp_mat::const_iterator it = implicitData.begin_col(user);
241  arma::sp_mat::const_iterator it_end = implicitData.end_col(user);
242  size_t implicitCount = 0;
243  for (; it != it_end; ++it)
244  {
245  userVec += parameters.col(implicitStart + it.row()).subvec(0, rank - 1);
246  implicitCount += 1;
247  }
248  if (implicitCount != 0)
249  userVec /= std::sqrt(implicitCount);
250  userVec += parameters.col(user).subvec(0, rank - 1);
251 
252  double ratingError = rating - userBias - itemBias -
253  arma::dot(userVec, parameters.col(item).subvec(0, rank - 1));
254 
255  // Gradient is non-zero only for the parameter columns corresponding to the
256  // example.
257  for (size_t j = 0; j < rank; ++j)
258  {
259  gradient(j, user) +=
260  2 * (lambda * parameters(j, user) -
261  ratingError * parameters(j, item));
262  gradient(j, item) +=
263  2 * (lambda * parameters(j, item) -
264  ratingError * userVec(j));
265  }
266  gradient(rank, user) +=
267  2 * (lambda * parameters(rank, user) - ratingError);
268  gradient(rank, item) +=
269  2 * (lambda * parameters(rank, item) - ratingError);
270  // Calculate gradients for item implicit vector.
271  it = implicitData.begin_col(user);
272  it_end = implicitData.end_col(user);
273  for (; it != it_end; ++it)
274  {
275  // Note that implicitCount != 0 if this loop is acutally executed.
276  for (size_t j = 0; j < rank; ++j)
277  {
278  gradient(j, implicitStart + it.row()) +=
279  2.0 * (lambda / implicitCount *
280  parameters(j, implicitStart + it.row()) -
281  ratingError / std::sqrt(implicitCount) *
282  parameters(j, item));
283  }
284  }
285  }
286 }
287 
288 } // namespace svd
289 } // namespace mlpack
290 
291 // Template specialization for the SGD optimizer.
292 namespace ens {
293 
294 template <>
295 template <>
296 double StandardSGD::Optimize(
298  arma::mat& parameters)
299 {
300  // Find the number of functions to use.
301  const size_t numFunctions = function.NumFunctions();
302 
303  // To keep track of where we are and how things are going.
304  size_t currentFunction = 0;
305  double overallObjective = 0;
306 
307  // Calculate the first objective function.
308  for (size_t i = 0; i < numFunctions; ++i)
309  overallObjective += function.Evaluate(parameters, i);
310 
311  const arma::mat data = function.Dataset();
312  const arma::sp_mat implicitData = function.ImplicitDataset();
313  const size_t numUsers = function.NumUsers();
314  const size_t numItems = function.NumItems();
315  const double lambda = function.Lambda();
316 
317  // Rank of decomposition.
318  const size_t rank = function.Rank();
319 
320  // Now iterate!
321  for (size_t i = 1; i != maxIterations; ++i, currentFunction++)
322  {
323  // Is this iteration the start of a sequence?
324  if ((currentFunction % numFunctions) == 0)
325  {
326  const size_t epoch = i / numFunctions + 1;
327  mlpack::Log::Info << "Epoch " << epoch << "; " << "objective "
328  << overallObjective << "." << std::endl;
329 
330  // Reset the counter variables.
331  overallObjective = 0;
332  currentFunction = 0;
333  }
334 
335  // Indices for accessing the the correct parameter columns.
336  const size_t user = data(0, currentFunction);
337  const size_t item = data(1, currentFunction) + numUsers;
338  const size_t implicitStart = numUsers + numItems;
339 
340  // Calculate the squared error in the prediction.
341  const double rating = data(2, currentFunction);
342  const double userBias = parameters(rank, user);
343  const double itemBias = parameters(rank, item);
344 
345  // Iterate through each item which the user interacted with to calculate
346  // user vector.
347  arma::vec userVec(rank, arma::fill::zeros);
348  arma::sp_mat::const_iterator it = implicitData.begin_col(user);
349  arma::sp_mat::const_iterator it_end = implicitData.end_col(user);
350  size_t implicitCount = 0;
351  for (; it != it_end; ++it)
352  {
353  userVec += parameters.col(implicitStart + it.row()).subvec(0, rank - 1);
354  implicitCount += 1;
355  }
356  if (implicitCount != 0)
357  userVec /= std::sqrt(implicitCount);
358  userVec += parameters.col(user).subvec(0, rank - 1);
359 
360  double ratingError = rating - userBias - itemBias -
361  arma::dot(userVec, parameters.col(item).subvec(0, rank - 1));
362 
363  // Gradient is non-zero only for the parameter columns corresponding to the
364  // example.
365  parameters.col(user).subvec(0, rank - 1) -= stepSize * 2 * (
366  lambda * parameters.col(user).subvec(0, rank - 1) -
367  ratingError * parameters.col(item).subvec(0, rank - 1));
368  parameters.col(item).subvec(0, rank - 1) -= stepSize * 2 * (
369  lambda * parameters.col(item).subvec(0, rank - 1) -
370  ratingError * userVec);
371  parameters(rank, user) -= stepSize * 2 * (
372  lambda * parameters(rank, user) - ratingError);
373  parameters(rank, item) -= stepSize * 2 * (
374  lambda * parameters(rank, item) - ratingError);
375  // Update item implicit vectors.
376  it = implicitData.begin_col(user);
377  it_end = implicitData.end_col(user);
378  for (; it != it_end; ++it)
379  {
380  // Note that implicitCount != 0 if this loop is acutally executed.
381  parameters.col(implicitStart + it.row()).subvec(0, rank - 1) -=
382  stepSize * 2.0 * (lambda / implicitCount *
383  parameters.col(implicitStart + it.row()).subvec(0, rank - 1) -
384  ratingError / std::sqrt(implicitCount) *
385  parameters.col(item).subvec(0, rank - 1));
386  }
387 
388  // Now add that to the overall objective function.
389  overallObjective += function.Evaluate(parameters, currentFunction);
390  }
391 
392  return overallObjective;
393 }
394 
395 
396 template <>
397 template <>
398 inline double ParallelSGD<ExponentialBackoff>::Optimize(
400  arma::mat& iterate)
401 {
402  double overallObjective = DBL_MAX;
403  double lastObjective;
404 
405  // The order in which the functions will be visited.
406  arma::Col<size_t> visitationOrder = arma::linspace<arma::Col<size_t>>(0,
407  (function.NumFunctions() - 1), function.NumFunctions());
408 
409  const arma::mat data = function.Dataset();
410  const arma::sp_mat implicitData = function.ImplicitDataset();
411  const size_t numUsers = function.NumUsers();
412  const size_t numItems = function.NumItems();
413  const double lambda = function.Lambda();
414 
415  // Rank of decomposition.
416  const size_t rank = function.Rank();
417 
418  // Iterate till the objective is within tolerance or the maximum number of
419  // allowed iterations is reached. If maxIterations is 0, this will iterate
420  // till convergence.
421  for (size_t i = 1; i != maxIterations; ++i)
422  {
423  // Calculate the overall objective.
424  lastObjective = overallObjective;
425  overallObjective = 0;
426 
427  #pragma omp parallel for reduction(+:overallObjective)
428  for (omp_size_t j = 0; j < (omp_size_t) function.NumFunctions(); ++j)
429  {
430  overallObjective += function.Evaluate(iterate, j);
431  }
432 
433  // Output current objective function.
434  mlpack::Log::Info << "Parallel SGD: iteration " << i << ", objective "
435  << overallObjective << "." << std::endl;
436 
437  if (std::isnan(overallObjective) || std::isinf(overallObjective))
438  {
439  mlpack::Log::Warn << "Parallel SGD: converged to " << overallObjective
440  << "; terminating with failure. Try a smaller step size?"
441  << std::endl;
442  return overallObjective;
443  }
444 
445  if (std::abs(lastObjective - overallObjective) < tolerance)
446  {
447  mlpack::Log::Info << "SGD: minimized within tolerance " << tolerance
448  << "; terminating optimization." << std::endl;
449  return overallObjective;
450  }
451 
452  // Get the stepsize for this iteration
453  double stepSize = decayPolicy.StepSize(i);
454 
455  if (shuffle) // Determine order of visitation.
456  std::shuffle(visitationOrder.begin(), visitationOrder.end(),
458 
459  #pragma omp parallel
460  {
461  // Each processor gets a subset of the instances.
462  // Each subset is of size threadShareSize.
463  size_t threadId = 0;
464  #ifdef HAS_OPENMP
465  threadId = omp_get_thread_num();
466  #endif
467 
468  for (size_t j = threadId * threadShareSize;
469  j < (threadId + 1) * threadShareSize && j < visitationOrder.n_elem;
470  ++j)
471  {
472  // Indices for accessing the the correct parameter columns.
473  const size_t user = data(0, visitationOrder[j]);
474  const size_t item = data(1, visitationOrder[j]) + numUsers;
475  const size_t implicitStart = numUsers + numItems;
476 
477  // Prediction error for the example.
478  const double rating = data(2, visitationOrder[j]);
479  const double userBias = iterate(rank, user);
480  const double itemBias = iterate(rank, item);
481  // Iterate through each item which the user interacted with to calculate
482  // user vector.
483  arma::vec userVec(rank, arma::fill::zeros);
484  arma::sp_mat::const_iterator it = implicitData.begin_col(user);
485  arma::sp_mat::const_iterator it_end = implicitData.end_col(user);
486  size_t implicitCount = 0;
487  for (; it != it_end; ++it)
488  {
489  userVec += iterate.col(implicitStart + it.row()).subvec(0, rank - 1);
490  implicitCount += 1;
491  }
492  if (implicitCount != 0)
493  userVec /= std::sqrt(implicitCount);
494  userVec += iterate.col(user).subvec(0, rank - 1);
495 
496  double ratingError = rating - userBias - itemBias -
497  arma::dot(userVec, iterate.col(item).subvec(0, rank - 1));
498 
499  arma::mat userVecUpdate = stepSize * 2 * (
500  lambda * iterate.col(user).subvec(0, rank - 1) -
501  ratingError * iterate.col(item).subvec(0, rank - 1));
502  arma::mat itemVecUpdate = stepSize * 2 * (
503  lambda * iterate.col(item).subvec(0, rank - 1) -
504  ratingError * userVec);
505  double userBiasUpdate = stepSize * 2 * (
506  lambda * iterate(rank, user) - ratingError);
507  double itemBiasUpdate = stepSize * 2 * (
508  lambda * iterate(rank, item) - ratingError);
509 
510  // Update of item implicit vectors.
511  arma::mat itemImplicitUpdate(rank, implicitCount);
512  arma::Col<size_t> implicitItems(implicitCount);
513  it = implicitData.begin_col(user);
514  it_end = implicitData.end_col(user);
515  size_t implicitIndex = 0;
516  for (; it != it_end; ++it, ++implicitIndex)
517  {
518  itemImplicitUpdate.col(implicitIndex) =
519  stepSize * 2.0 * (lambda / implicitCount *
520  iterate.col(implicitStart + it.row()).subvec(0, rank - 1) -
521  ratingError / std::sqrt(implicitCount) *
522  iterate.col(item).subvec(0, rank - 1));
523  implicitItems(implicitIndex) = it.row();
524  }
525 
526  // Gradient is non-zero only for the parameter columns corresponding to
527  // the example.
528  for (size_t i = 0; i < rank; ++i)
529  {
530  #pragma omp atomic
531  iterate(i, user) -= userVecUpdate(i);
532  #pragma omp atomic
533  iterate(i, item) -= itemVecUpdate(i);
534  }
535  #pragma omp atomic
536  iterate(rank, user) -= userBiasUpdate;
537  #pragma omp atomic
538  iterate(rank, item) -= itemBiasUpdate;
539  for (size_t k = 0; k < implicitCount; ++k)
540  {
541  for (size_t i = 0; i < rank; ++i)
542  {
543  #pragma omp atomic
544  iterate(i, implicitStart + implicitItems(k)) -=
545  itemImplicitUpdate(i, k);
546  }
547  }
548  }
549  }
550  }
551  mlpack::Log::Info << "\n Parallel SGD terminated with objective : "
552  << overallObjective << std::endl;
553 
554  return overallObjective;
555 }
556 
557 } // namespace ens
558 
559 #endif
This class contains methods which are used to calculate the cost of SVD++&#39;s objective function...
Definition: svdplusplus_function.hpp:31
void Gradient(const arma::mat &parameters, arma::mat &gradient) const
Evaluates the full gradient of the cost function over all the training examples.
Definition: svdplusplus_function_impl.hpp:140
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: bias_svd_function_impl.hpp:185
SVDPlusPlusFunction(const MatType &data, const arma::sp_mat &implicitData, const size_t rank, const double lambda)
Constructor for SVDPlusPlusFunction class.
Definition: svdplusplus_function_impl.hpp:23
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
void Shuffle()
Shuffle the points in the dataset.
Definition: svdplusplus_function_impl.hpp:50
size_t NumFunctions() const
Return the number of training examples. Useful for SGD optimizer.
Definition: svdplusplus_function.hpp:116
double Evaluate(const arma::mat &parameters) const
Evaluates the cost function over all examples in the data.
Definition: svdplusplus_function_impl.hpp:57
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
MLPACK_EXPORT std::mt19937 randGen
MLPACK_EXPORT is required for global variables; it exports the symbols correctly on Windows...
Definition: random.cpp:18