mlpack
bias_svd_function_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_BIAS_SVD_BIAS_SVD_FUNCTION_IMPL_HPP
14 #define MLPACK_METHODS_BIAS_SVD_BIAS_SVD_FUNCTION_IMPL_HPP
15 
16 #include "bias_svd_function.hpp"
18 
19 namespace mlpack {
20 namespace svd {
21 
22 template <typename MatType>
24  const size_t rank,
25  const double lambda) :
26  data(math::MakeAlias(const_cast<MatType&>(data), false)),
27  rank(rank),
28  lambda(lambda)
29 {
30  // Number of users and items in the data.
31  numUsers = max(data.row(0)) + 1;
32  numItems = max(data.row(1)) + 1;
33 
34  // Initialize the parameters.
35  // The last row in initialPoint saves use/item bias.
36  initialPoint.randu(rank + 1, numUsers + numItems);
37 }
38 
39 template<typename MatType>
41 {
42  data = data.cols(arma::shuffle(arma::linspace<arma::uvec>(0, data.n_cols - 1,
43  data.n_cols)));
44 }
45 
46 template <typename MatType>
47 double BiasSVDFunction<MatType>::Evaluate(const arma::mat& parameters) const
48 {
49  return Evaluate(parameters, 0, data.n_cols);
50 }
51 
52 template <typename MatType>
53 double BiasSVDFunction<MatType>::Evaluate(const arma::mat& parameters,
54  const size_t start,
55  const size_t batchSize) const
56 {
57  // The cost for the optimization is as follows:
58  // f(u, v, p ,q) = sum((rating(i, j) - p(i) - q(j) - u(i).t() * v(j))^2)
59  // The sum is over all the ratings in the rating matrix.
60  // 'i' points to the user and 'j' points to the item being considered.
61  // The regularization term is added to the above cost, where the vectors u(i)
62  // and v(j), bias p(i) and q(j) are regularized for each rating they
63  // contribute to.
64 
65  // It's possible this loop could be changed so that it's SIMD-vectorized.
66  double objective = 0.0;
67  for (size_t i = start; i < start + batchSize; ++i)
68  {
69  // Indices for accessing the the correct parameter columns.
70  const size_t user = data(0, i);
71  const size_t item = data(1, i) + numUsers;
72 
73  // Calculate the squared error in the prediction.
74  const double rating = data(2, i);
75  const double userBias = parameters(rank, user);
76  const double itemBias = parameters(rank, item);
77  double ratingError = rating - userBias - itemBias -
78  arma::dot(parameters.col(user).subvec(0, rank - 1),
79  parameters.col(item).subvec(0, rank - 1));
80  double ratingErrorSquared = ratingError * ratingError;
81 
82  // Calculate the regularization penalty corresponding to the parameters.
83  double userVecNorm = arma::norm(parameters.col(user), 2);
84  double itemVecNorm = arma::norm(parameters.col(item), 2);
85  double regularizationError = lambda * (userVecNorm * userVecNorm +
86  itemVecNorm * itemVecNorm);
87 
88  objective += (ratingErrorSquared + regularizationError);
89  }
90 
91  return objective;
92 }
93 
94 template <typename MatType>
95 void BiasSVDFunction<MatType>::Gradient(const arma::mat& parameters,
96  arma::mat& gradient) const
97 {
98  // For an example with rating corresponding to user 'i' and item 'j', the
99  // gradients for the parameters is as follows:
100  // grad(u(i)) = 2 * (lambda * u(i) - error * v(j))
101  // grad(v(j)) = 2 * (lambda * v(j) - error * u(i))
102  // grad(p(i)) = 2 * (lambda * p(i) - error)
103  // grad(q(j)) = 2 * (lambda * q(j) - error)
104  // 'error' is the prediction error for that example, which is:
105  // rating(i, j) - p(i) - q(j) - u(i).t() * v(j)
106  // The full gradient is calculated by summing the contributions over all the
107  // training examples.
108 
109  gradient.zeros(rank + 1, numUsers + numItems);
110 
111  for (size_t i = 0; i < data.n_cols; ++i)
112  {
113  // Indices for accessing the the correct parameter columns.
114  const size_t user = data(0, i);
115  const size_t item = data(1, i) + numUsers;
116 
117  // Prediction error for the example.
118  const double rating = data(2, i);
119  const double userBias = parameters(rank, user);
120  const double itemBias = parameters(rank, item);
121  double ratingError = rating - userBias - itemBias -
122  arma::dot(parameters.col(user).subvec(0, rank - 1),
123  parameters.col(item).subvec(0, rank - 1));
124 
125  // Gradient is non-zero only for the parameter columns corresponding to the
126  // example.
127  gradient.col(user).subvec(0, rank - 1) +=
128  2 * (lambda * parameters.col(user).subvec(0, rank - 1) -
129  ratingError * parameters.col(item).subvec(0, rank - 1));
130  gradient.col(item).subvec(0, rank - 1) +=
131  2 * (lambda * parameters.col(item).subvec(0, rank - 1) -
132  ratingError * parameters.col(user).subvec(0, rank - 1));
133  gradient(rank, user) +=
134  2 * (lambda * parameters(rank, user) - ratingError);
135  gradient(rank, item) +=
136  2 * (lambda * parameters(rank, item) - ratingError);
137  }
138 }
139 
140 template <typename MatType>
141 template <typename GradType>
142 void BiasSVDFunction<MatType>::Gradient(const arma::mat& parameters,
143  const size_t start,
144  GradType& gradient,
145  const size_t batchSize) const
146 {
147  gradient.zeros(rank + 1, numUsers + numItems);
148 
149  // It's possible this could be SIMD-vectorized for additional speedup.
150  for (size_t i = start; i < start + batchSize; ++i)
151  {
152  const size_t user = data(0, i);
153  const size_t item = data(1, i) + numUsers;
154 
155  // Prediction error for the example.
156  const double rating = data(2, i);
157  const double userBias = parameters(rank, user);
158  const double itemBias = parameters(rank, item);
159  double ratingError = rating - userBias - itemBias -
160  arma::dot(parameters.col(user).subvec(0, rank - 1),
161  parameters.col(item).subvec(0, rank - 1));
162 
163  // Gradient is non-zero only for the parameter columns corresponding to the
164  // example.
165  for (size_t j = 0; j < rank; ++j)
166  {
167  gradient(j, user) +=
168  2 * (lambda * parameters(j, user) -
169  ratingError * parameters(j, item));
170  gradient(j, item) +=
171  2 * (lambda * parameters(j, item) -
172  ratingError * parameters(j, user));
173  }
174  gradient(rank, user) +=
175  2 * (lambda * parameters(rank, user) - ratingError);
176  gradient(rank, item) +=
177  2 * (lambda * parameters(rank, item) - ratingError);
178  }
179 }
180 
181 } // namespace svd
182 } // namespace mlpack
183 
184 // Template specialization for the SGD optimizer.
185 namespace ens {
186 
187 template <>
188 template <>
189 double StandardSGD::Optimize(
191  arma::mat& parameters)
192 {
193  // Find the number of functions to use.
194  const size_t numFunctions = function.NumFunctions();
195 
196  // To keep track of where we are and how things are going.
197  size_t currentFunction = 0;
198  double overallObjective = 0;
199 
200  // Calculate the first objective function.
201  for (size_t i = 0; i < numFunctions; ++i)
202  overallObjective += function.Evaluate(parameters, i);
203 
204  const arma::mat data = function.Dataset();
205 
206  // Rank of decomposition.
207  const size_t rank = function.Rank();
208 
209  // Now iterate!
210  for (size_t i = 1; i != maxIterations; ++i, currentFunction++)
211  {
212  // Is this iteration the start of a sequence?
213  if ((currentFunction % numFunctions) == 0)
214  {
215  const size_t epoch = i / numFunctions + 1;
216  mlpack::Log::Info << "Epoch " << epoch << "; " << "objective "
217  << overallObjective << "." << std::endl;
218 
219  // Reset the counter variables.
220  overallObjective = 0;
221  currentFunction = 0;
222  }
223 
224  const size_t numUsers = function.NumUsers();
225 
226  // Indices for accessing the the correct parameter columns.
227  const size_t user = data(0, currentFunction);
228  const size_t item = data(1, currentFunction) + numUsers;
229 
230  // Prediction error for the example.
231  const double rating = data(2, currentFunction);
232  const double userBias = parameters(rank, user);
233  const double itemBias = parameters(rank, item);
234  double ratingError = rating - userBias - itemBias -
235  arma::dot(parameters.col(user).subvec(0, rank - 1),
236  parameters.col(item).subvec(0, rank - 1));
237 
238  double lambda = function.Lambda();
239 
240  // Gradient is non-zero only for the parameter columns corresponding to the
241  // example.
242  parameters.col(user).subvec(0, rank - 1) -= stepSize * 2 *(
243  lambda * parameters.col(user).subvec(0, rank - 1) -
244  ratingError * parameters.col(item).subvec(0, rank - 1));
245  parameters.col(item).subvec(0, rank - 1) -= stepSize * 2 * (
246  lambda * parameters.col(item).subvec(0, rank - 1) -
247  ratingError * parameters.col(user).subvec(0, rank - 1));
248  parameters(rank, user) -= stepSize * 2 * (
249  lambda * parameters(rank, user) - ratingError);
250  parameters(rank, item) -= stepSize * 2 * (
251  lambda * parameters(rank, item) - ratingError);
252 
253  // Now add that to the overall objective function.
254  overallObjective += function.Evaluate(parameters, currentFunction);
255  }
256 
257  return overallObjective;
258 }
259 
260 
261 template <>
262 template <>
263 inline double ParallelSGD<ExponentialBackoff>::Optimize(
265  arma::mat& iterate)
266 {
267  double overallObjective = DBL_MAX;
268  double lastObjective;
269 
270  // The order in which the functions will be visited.
271  arma::Col<size_t> visitationOrder = arma::linspace<arma::Col<size_t>>(0,
272  (function.NumFunctions() - 1), function.NumFunctions());
273 
274  const arma::mat data = function.Dataset();
275  const size_t numUsers = function.NumUsers();
276  const double lambda = function.Lambda();
277 
278  // Rank of decomposition.
279  const size_t rank = function.Rank();
280 
281  // Iterate till the objective is within tolerance or the maximum number of
282  // allowed iterations is reached. If maxIterations is 0, this will iterate
283  // till convergence.
284  for (size_t i = 1; i != maxIterations; ++i)
285  {
286  // Calculate the overall objective.
287  lastObjective = overallObjective;
288  overallObjective = 0;
289 
290  #pragma omp parallel for reduction(+:overallObjective)
291  for (omp_size_t j = 0; j < (omp_size_t) function.NumFunctions(); ++j)
292  {
293  overallObjective += function.Evaluate(iterate, j);
294  }
295 
296  // Output current objective function.
297  mlpack::Log::Info << "Parallel SGD: iteration " << i << ", objective "
298  << overallObjective << "." << std::endl;
299 
300  if (std::isnan(overallObjective) || std::isinf(overallObjective))
301  {
302  mlpack::Log::Warn << "Parallel SGD: converged to " << overallObjective
303  << "; terminating with failure. Try a smaller step size?"
304  << std::endl;
305  return overallObjective;
306  }
307 
308  if (std::abs(lastObjective - overallObjective) < tolerance)
309  {
310  mlpack::Log::Info << "SGD: minimized within tolerance " << tolerance
311  << "; terminating optimization." << std::endl;
312  return overallObjective;
313  }
314 
315  // Get the stepsize for this iteration
316  double stepSize = decayPolicy.StepSize(i);
317 
318  if (shuffle) // Determine order of visitation.
319  std::shuffle(visitationOrder.begin(), visitationOrder.end(),
321 
322  #pragma omp parallel
323  {
324  // Each processor gets a subset of the instances.
325  // Each subset is of size threadShareSize.
326  size_t threadId = 0;
327  #ifdef HAS_OPENMP
328  threadId = omp_get_thread_num();
329  #endif
330 
331  for (size_t j = threadId * threadShareSize;
332  j < (threadId + 1) * threadShareSize && j < visitationOrder.n_elem;
333  ++j)
334  {
335  // Indices for accessing the the correct parameter columns.
336  const size_t user = data(0, visitationOrder[j]);
337  const size_t item = data(1, visitationOrder[j]) + numUsers;
338 
339  // Prediction error for the example.
340  const double rating = data(2, visitationOrder[j]);
341  const double userBias = iterate(rank, user);
342  const double itemBias = iterate(rank, item);
343  double ratingError = rating - userBias - itemBias -
344  arma::dot(iterate.col(user).subvec(0, rank - 1),
345  iterate.col(item).subvec(0, rank - 1));
346 
347  arma::mat userVecUpdate = stepSize * 2 * (
348  lambda * iterate.col(user).subvec(0, rank - 1) -
349  ratingError * iterate.col(item).subvec(0, rank - 1));
350  arma::mat itemVecUpdate = stepSize * 2 * (
351  lambda * iterate.col(item).subvec(0, rank - 1) -
352  ratingError * iterate.col(user).subvec(0, rank - 1));
353  double userBiasUpdate = stepSize * 2 * (
354  lambda * iterate(rank, user) - ratingError);
355  double itemBiasUpdate = stepSize * 2 * (
356  lambda * iterate(rank, item) - ratingError);
357 
358  // Gradient is non-zero only for the parameter columns corresponding to
359  // the example.
360  for (size_t i = 0; i < rank; ++i)
361  {
362  #pragma omp atomic
363  iterate(i, user) -= userVecUpdate(i);
364  #pragma omp atomic
365  iterate(i, item) -= itemVecUpdate(i);
366  }
367  #pragma omp atomic
368  iterate(rank, user) -= userBiasUpdate;
369  #pragma omp atomic
370  iterate(rank, item) -= itemBiasUpdate;
371  }
372  }
373  }
374  mlpack::Log::Info << "\n Parallel SGD terminated with objective : "
375  << overallObjective << std::endl;
376 
377  return overallObjective;
378 }
379 
380 } // namespace ens
381 
382 #endif
double Evaluate(const arma::mat &parameters) const
Evaluates the cost function over all examples in the data.
Definition: bias_svd_function_impl.hpp:47
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: bias_svd_function_impl.hpp:185
This class contains methods which are used to calculate the cost of BiasSVD&#39;s objective function...
Definition: bias_svd_function.hpp:31
void Shuffle()
Shuffle the points in the dataset.
Definition: bias_svd_function_impl.hpp:40
void Gradient(const arma::mat &parameters, arma::mat &gradient) const
Evaluates the full gradient of the cost function over all the training examples.
Definition: bias_svd_function_impl.hpp:95
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
BiasSVDFunction(const MatType &data, const size_t rank, const double lambda)
Constructor for BiasSVDFunction class.
Definition: bias_svd_function_impl.hpp:23
size_t NumFunctions() const
Return the number of training examples. Useful for SGD optimizer.
Definition: bias_svd_function.hpp:110
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
MLPACK_EXPORT std::mt19937 randGen
MLPACK_EXPORT is required for global variables; it exports the symbols correctly on Windows...
Definition: random.cpp:18