mlpack
regularized_svd_function_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_IMPL_HPP
13 #define MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_IMPL_HPP
14 
17 
18 namespace mlpack {
19 namespace svd {
20 
21 template <typename MatType>
23  const size_t rank,
24  const double lambda) :
25  data(math::MakeAlias(const_cast<MatType&>(data), false)),
26  rank(rank),
27  lambda(lambda)
28 {
29  // Number of users and items in the data.
30  numUsers = max(data.row(0)) + 1;
31  numItems = max(data.row(1)) + 1;
32 
33  // Initialize the parameters.
34  initialPoint.randu(rank, numUsers + numItems);
35 }
36 
37 template<typename MatType>
39 {
40  data = data.cols(arma::shuffle(arma::linspace<arma::uvec>(0, data.n_cols - 1,
41  data.n_cols)));
42 }
43 
44 template <typename MatType>
45 double RegularizedSVDFunction<MatType>::Evaluate(const arma::mat& parameters)
46 const
47 {
48  return Evaluate(parameters, 0, data.n_cols);
49 }
50 
51 template <typename MatType>
52 double RegularizedSVDFunction<MatType>::Evaluate(const arma::mat& parameters,
53  const size_t start,
54  const size_t batchSize) const
55 {
56  // The cost for the optimization is as follows:
57  // f(u, v) = sum((rating(i, j) - u(i).t() * v(j))^2)
58  // The sum is over all the ratings in the rating matrix.
59  // 'i' points to the user and 'j' points to the item being considered.
60  // The regularization term is added to the above cost, where the vectors u(i)
61  // and v(j) are regularized for each rating they contribute to.
62 
63  // It's possible this loop could be changed so that it's SIMD-vectorized.
64  double objective = 0.0;
65  for (size_t i = start; i < start + batchSize; ++i)
66  {
67  // Indices for accessing the the correct parameter columns.
68  const size_t user = data(0, i);
69  const size_t item = data(1, i) + numUsers;
70 
71  // Calculate the squared error in the prediction.
72  const double rating = data(2, i);
73  double ratingError = rating - arma::dot(parameters.col(user),
74  parameters.col(item));
75  double ratingErrorSquared = ratingError * ratingError;
76 
77  // Calculate the regularization penalty corresponding to the parameters.
78  double userVecNorm = arma::norm(parameters.col(user), 2);
79  double itemVecNorm = arma::norm(parameters.col(item), 2);
80  double regularizationError = lambda * (userVecNorm * userVecNorm +
81  itemVecNorm * itemVecNorm);
82 
83  objective += (ratingErrorSquared + regularizationError);
84  }
85 
86  return objective;
87 }
88 
89 template <typename MatType>
90 void RegularizedSVDFunction<MatType>::Gradient(const arma::mat& parameters,
91  arma::mat& gradient) const
92 {
93  // For an example with rating corresponding to user 'i' and item 'j', the
94  // gradients for the parameters is as follows:
95  // grad(u(i)) = lambda * u(i) - error * v(j)
96  // grad(v(j)) = lambda * v(j) - error * u(i)
97  // 'error' is the prediction error for that example, which is:
98  // rating(i, j) - u(i).t() * v(j)
99  // The full gradient is calculated by summing the contributions over all the
100  // training examples.
101 
102  gradient.zeros(rank, numUsers + numItems);
103 
104  for (size_t i = 0; i < data.n_cols; ++i)
105  {
106  // Indices for accessing the the correct parameter columns.
107  const size_t user = data(0, i);
108  const size_t item = data(1, i) + numUsers;
109 
110  // Prediction error for the example.
111  const double rating = data(2, i);
112  double ratingError = rating - arma::dot(parameters.col(user),
113  parameters.col(item));
114 
115  // Gradient is non-zero only for the parameter columns corresponding to the
116  // example.
117  gradient.col(user) += 2 * (lambda * parameters.col(user) -
118  ratingError * parameters.col(item));
119  gradient.col(item) += 2 * (lambda * parameters.col(item) -
120  ratingError * parameters.col(user));
121  }
122 }
123 
124 template <typename MatType>
125 template <typename GradType>
126 void RegularizedSVDFunction<MatType>::Gradient(const arma::mat& parameters,
127  const size_t start,
128  GradType& gradient,
129  const size_t batchSize) const
130 {
131  gradient.zeros(rank, numUsers + numItems);
132 
133  // It's possible this could be SIMD-vectorized for additional speedup.
134  for (size_t i = start; i < start + batchSize; ++i)
135  {
136  const size_t user = data(0, i);
137  const size_t item = data(1, i) + numUsers;
138 
139  // Prediction error for the example.
140  const double rating = data(2, i);
141  double ratingError = rating - arma::dot(parameters.col(user),
142  parameters.col(item));
143 
144  // Gradient is non-zero only for the parameter columns corresponding to the
145  // example.
146  gradient.col(user) += 2 * (lambda * parameters.col(user) -
147  ratingError * parameters.col(item));
148  gradient.col(item) += 2 * (lambda * parameters.col(item) -
149  ratingError * parameters.col(user));
150  }
151 }
152 
153 } // namespace svd
154 } // namespace mlpack
155 
156 // Template specialization for the SGD optimizer.
157 namespace ens {
158 
159 template <>
160 template <>
161 double StandardSGD::Optimize(
163  arma::mat& parameters)
164 {
165  // Find the number of functions to use.
166  const size_t numFunctions = function.NumFunctions();
167 
168  // To keep track of where we are and how things are going.
169  size_t currentFunction = 0;
170  double overallObjective = 0;
171 
172  // Calculate the first objective function.
173  for (size_t i = 0; i < numFunctions; ++i)
174  overallObjective += function.Evaluate(parameters, i);
175 
176  const arma::mat data = function.Dataset();
177 
178  // Now iterate!
179  for (size_t i = 1; i != maxIterations; ++i, currentFunction++)
180  {
181  // Is this iteration the start of a sequence?
182  if ((currentFunction % numFunctions) == 0)
183  {
184  const size_t epoch = i / numFunctions + 1;
185  mlpack::Log::Info << "Epoch " << epoch << "; " << "objective "
186  << overallObjective << "." << std::endl;
187 
188  // Reset the counter variables.
189  overallObjective = 0;
190  currentFunction = 0;
191  }
192 
193  const size_t numUsers = function.NumUsers();
194 
195  // Indices for accessing the the correct parameter columns.
196  const size_t user = data(0, currentFunction);
197  const size_t item = data(1, currentFunction) + numUsers;
198 
199  // Prediction error for the example.
200  const double rating = data(2, currentFunction);
201  double ratingError = rating - arma::dot(parameters.col(user),
202  parameters.col(item));
203 
204  double lambda = function.Lambda();
205 
206  // Gradient is non-zero only for the parameter columns corresponding to the
207  // example.
208  parameters.col(user) -= stepSize * (lambda * parameters.col(user) -
209  ratingError * parameters.col(item));
210  parameters.col(item) -= stepSize * (lambda * parameters.col(item) -
211  ratingError * parameters.col(user));
212 
213  // Now add that to the overall objective function.
214  overallObjective += function.Evaluate(parameters, currentFunction);
215  }
216 
217  return overallObjective;
218 }
219 
220 
221 template <>
222 template <>
223 inline double ParallelSGD<ExponentialBackoff>::Optimize(
225  arma::mat& iterate)
226 {
227  double overallObjective = DBL_MAX;
228  double lastObjective;
229 
230  // The order in which the functions will be visited.
231  arma::Col<size_t> visitationOrder = arma::linspace<arma::Col<size_t>>(0,
232  (function.NumFunctions() - 1), function.NumFunctions());
233 
234  const arma::mat data = function.Dataset();
235 
236  // Iterate till the objective is within tolerance or the maximum number of
237  // allowed iterations is reached. If maxIterations is 0, this will iterate
238  // till convergence.
239  for (size_t i = 1; i != maxIterations; ++i)
240  {
241  // Calculate the overall objective.
242  lastObjective = overallObjective;
243  overallObjective = 0;
244 
245  #pragma omp parallel for reduction(+:overallObjective)
246  for (omp_size_t j = 0; j < (omp_size_t) function.NumFunctions(); ++j)
247  {
248  overallObjective += function.Evaluate(iterate, j);
249  }
250 
251  // Output current objective function.
252  mlpack::Log::Info << "Parallel SGD: iteration " << i << ", objective "
253  << overallObjective << "." << std::endl;
254 
255  if (std::isnan(overallObjective) || std::isinf(overallObjective))
256  {
257  mlpack::Log::Warn << "Parallel SGD: converged to " << overallObjective
258  << "; terminating with failure. Try a smaller step size?"
259  << std::endl;
260  return overallObjective;
261  }
262 
263  if (std::abs(lastObjective - overallObjective) < tolerance)
264  {
265  mlpack::Log::Info << "SGD: minimized within tolerance " << tolerance
266  << "; terminating optimization." << std::endl;
267  return overallObjective;
268  }
269 
270  // Get the stepsize for this iteration
271  double stepSize = decayPolicy.StepSize(i);
272 
273  if (shuffle) // Determine order of visitation.
274  std::shuffle(visitationOrder.begin(), visitationOrder.end(),
276 
277  #pragma omp parallel
278  {
279  // Each processor gets a subset of the instances.
280  // Each subset is of size threadShareSize.
281  size_t threadId = 0;
282  #ifdef HAS_OPENMP
283  threadId = omp_get_thread_num();
284  #endif
285 
286  for (size_t j = threadId * threadShareSize;
287  j < (threadId + 1) * threadShareSize && j < visitationOrder.n_elem;
288  ++j)
289  {
290  const size_t numUsers = function.NumUsers();
291 
292  // Indices for accessing the the correct parameter columns.
293  const size_t user = data(0, visitationOrder[j]);
294  const size_t item = data(1, visitationOrder[j]) + numUsers;
295 
296  // Prediction error for the example.
297  const double rating = data(2, visitationOrder[j]);
298  double ratingError = rating - arma::dot(iterate.col(user),
299  iterate.col(item));
300 
301  double lambda = function.Lambda();
302 
303  arma::mat userUpdate = stepSize * (lambda * iterate.col(user) -
304  ratingError * iterate.col(item));
305  arma::mat itemUpdate = stepSize * (lambda * iterate.col(item) -
306  ratingError * iterate.col(user));
307 
308  // Gradient is non-zero only for the parameter columns corresponding to
309  // the example.
310  for (size_t i = 0; i < iterate.n_rows; ++i)
311  {
312  #pragma omp atomic
313  iterate(i, user) -= userUpdate(i);
314  #pragma omp atomic
315  iterate(i, item) -= itemUpdate(i);
316  }
317  }
318  }
319  }
320  mlpack::Log::Info << "\n Parallel SGD terminated with objective : "
321  << overallObjective << std::endl;
322 
323  return overallObjective;
324 }
325 
326 } // namespace ens
327 
328 #endif
void Shuffle()
Shuffle the points in the dataset.
Definition: regularized_svd_function_impl.hpp:38
size_t NumFunctions() const
Return the number of training examples. Useful for SGD optimizer.
Definition: regularized_svd_function.hpp:104
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: bias_svd_function_impl.hpp:185
double Evaluate(const arma::mat &parameters) const
Evaluates the cost function over all examples in the data.
Definition: regularized_svd_function_impl.hpp:45
void Gradient(const arma::mat &parameters, arma::mat &gradient) const
Evaluates the full gradient of the cost function over all the training examples.
Definition: regularized_svd_function_impl.hpp:90
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
RegularizedSVDFunction(const MatType &data, const size_t rank, const double lambda)
Constructor for RegularizedSVDFunction class.
Definition: regularized_svd_function_impl.hpp:22
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
The data is stored in a matrix of type MatType, so that this class can be used with both dense and sp...
Definition: regularized_svd_function.hpp:29
MLPACK_EXPORT std::mt19937 randGen
MLPACK_EXPORT is required for global variables; it exports the symbols correctly on Windows...
Definition: random.cpp:18