13 #ifndef MLPACK_METHODS_BIAS_SVD_BIAS_SVD_FUNCTION_IMPL_HPP 14 #define MLPACK_METHODS_BIAS_SVD_BIAS_SVD_FUNCTION_IMPL_HPP 22 template <
typename MatType>
25 const double lambda) :
26 data(math::
MakeAlias(const_cast<MatType&>(data), false)),
31 numUsers = max(data.row(0)) + 1;
32 numItems = max(data.row(1)) + 1;
36 initialPoint.randu(rank + 1, numUsers + numItems);
39 template<
typename MatType>
42 data =
data.cols(arma::shuffle(arma::linspace<arma::uvec>(0,
data.n_cols - 1,
46 template <
typename MatType>
52 template <
typename MatType>
55 const size_t batchSize)
const 66 double objective = 0.0;
67 for (
size_t i = start; i < start + batchSize; ++i)
70 const size_t user =
data(0, i);
71 const size_t item =
data(1, i) + numUsers;
74 const double rating =
data(2, i);
75 const double userBias = parameters(rank, user);
76 const double itemBias = parameters(rank, item);
77 double ratingError = rating - userBias - itemBias -
78 arma::dot(parameters.col(user).subvec(0, rank - 1),
79 parameters.col(item).subvec(0, rank - 1));
80 double ratingErrorSquared = ratingError * ratingError;
83 double userVecNorm = arma::norm(parameters.col(user), 2);
84 double itemVecNorm = arma::norm(parameters.col(item), 2);
85 double regularizationError = lambda * (userVecNorm * userVecNorm +
86 itemVecNorm * itemVecNorm);
88 objective += (ratingErrorSquared + regularizationError);
94 template <
typename MatType>
96 arma::mat& gradient)
const 109 gradient.zeros(rank + 1, numUsers + numItems);
111 for (
size_t i = 0; i <
data.n_cols; ++i)
114 const size_t user =
data(0, i);
115 const size_t item =
data(1, i) + numUsers;
118 const double rating =
data(2, i);
119 const double userBias = parameters(rank, user);
120 const double itemBias = parameters(rank, item);
121 double ratingError = rating - userBias - itemBias -
122 arma::dot(parameters.col(user).subvec(0, rank - 1),
123 parameters.col(item).subvec(0, rank - 1));
127 gradient.col(user).subvec(0, rank - 1) +=
128 2 * (lambda * parameters.col(user).subvec(0, rank - 1) -
129 ratingError * parameters.col(item).subvec(0, rank - 1));
130 gradient.col(item).subvec(0, rank - 1) +=
131 2 * (lambda * parameters.col(item).subvec(0, rank - 1) -
132 ratingError * parameters.col(user).subvec(0, rank - 1));
133 gradient(rank, user) +=
134 2 * (lambda * parameters(rank, user) - ratingError);
135 gradient(rank, item) +=
136 2 * (lambda * parameters(rank, item) - ratingError);
140 template <
typename MatType>
141 template <
typename GradType>
145 const size_t batchSize)
const 147 gradient.zeros(rank + 1, numUsers + numItems);
150 for (
size_t i = start; i < start + batchSize; ++i)
152 const size_t user =
data(0, i);
153 const size_t item =
data(1, i) + numUsers;
156 const double rating =
data(2, i);
157 const double userBias = parameters(rank, user);
158 const double itemBias = parameters(rank, item);
159 double ratingError = rating - userBias - itemBias -
160 arma::dot(parameters.col(user).subvec(0, rank - 1),
161 parameters.col(item).subvec(0, rank - 1));
165 for (
size_t j = 0; j < rank; ++j)
168 2 * (lambda * parameters(j, user) -
169 ratingError * parameters(j, item));
171 2 * (lambda * parameters(j, item) -
172 ratingError * parameters(j, user));
174 gradient(rank, user) +=
175 2 * (lambda * parameters(rank, user) - ratingError);
176 gradient(rank, item) +=
177 2 * (lambda * parameters(rank, item) - ratingError);
189 double StandardSGD::Optimize(
191 arma::mat& parameters)
194 const size_t numFunctions =
function.NumFunctions();
197 size_t currentFunction = 0;
198 double overallObjective = 0;
201 for (
size_t i = 0; i < numFunctions; ++i)
202 overallObjective +=
function.
Evaluate(parameters, i);
204 const arma::mat
data =
function.Dataset();
207 const size_t rank =
function.Rank();
210 for (
size_t i = 1; i != maxIterations; ++i, currentFunction++)
213 if ((currentFunction % numFunctions) == 0)
215 const size_t epoch = i / numFunctions + 1;
217 << overallObjective <<
"." << std::endl;
220 overallObjective = 0;
224 const size_t numUsers =
function.NumUsers();
227 const size_t user = data(0, currentFunction);
228 const size_t item = data(1, currentFunction) + numUsers;
231 const double rating = data(2, currentFunction);
232 const double userBias = parameters(rank, user);
233 const double itemBias = parameters(rank, item);
234 double ratingError = rating - userBias - itemBias -
235 arma::dot(parameters.col(user).subvec(0, rank - 1),
236 parameters.col(item).subvec(0, rank - 1));
238 double lambda =
function.Lambda();
242 parameters.col(user).subvec(0, rank - 1) -= stepSize * 2 *(
243 lambda * parameters.col(user).subvec(0, rank - 1) -
244 ratingError * parameters.col(item).subvec(0, rank - 1));
245 parameters.col(item).subvec(0, rank - 1) -= stepSize * 2 * (
246 lambda * parameters.col(item).subvec(0, rank - 1) -
247 ratingError * parameters.col(user).subvec(0, rank - 1));
248 parameters(rank, user) -= stepSize * 2 * (
249 lambda * parameters(rank, user) - ratingError);
250 parameters(rank, item) -= stepSize * 2 * (
251 lambda * parameters(rank, item) - ratingError);
254 overallObjective +=
function.Evaluate(parameters, currentFunction);
257 return overallObjective;
263 inline double ParallelSGD<ExponentialBackoff>::Optimize(
267 double overallObjective = DBL_MAX;
268 double lastObjective;
271 arma::Col<size_t> visitationOrder = arma::linspace<arma::Col<size_t>>(0,
272 (
function.NumFunctions() - 1),
function.
NumFunctions());
274 const arma::mat
data =
function.Dataset();
275 const size_t numUsers =
function.NumUsers();
276 const double lambda =
function.Lambda();
279 const size_t rank =
function.Rank();
284 for (
size_t i = 1; i != maxIterations; ++i)
287 lastObjective = overallObjective;
288 overallObjective = 0;
290 #pragma omp parallel for reduction(+:overallObjective) 291 for (omp_size_t j = 0; j < (omp_size_t)
function.
NumFunctions(); ++j)
293 overallObjective +=
function.Evaluate(iterate, j);
298 << overallObjective <<
"." << std::endl;
300 if (std::isnan(overallObjective) || std::isinf(overallObjective))
303 <<
"; terminating with failure. Try a smaller step size?" 305 return overallObjective;
308 if (std::abs(lastObjective - overallObjective) < tolerance)
311 <<
"; terminating optimization." << std::endl;
312 return overallObjective;
316 double stepSize = decayPolicy.StepSize(i);
319 std::shuffle(visitationOrder.begin(), visitationOrder.end(),
328 threadId = omp_get_thread_num();
331 for (
size_t j = threadId * threadShareSize;
332 j < (threadId + 1) * threadShareSize && j < visitationOrder.n_elem;
336 const size_t user = data(0, visitationOrder[j]);
337 const size_t item = data(1, visitationOrder[j]) + numUsers;
340 const double rating = data(2, visitationOrder[j]);
341 const double userBias = iterate(rank, user);
342 const double itemBias = iterate(rank, item);
343 double ratingError = rating - userBias - itemBias -
344 arma::dot(iterate.col(user).subvec(0, rank - 1),
345 iterate.col(item).subvec(0, rank - 1));
347 arma::mat userVecUpdate = stepSize * 2 * (
348 lambda * iterate.col(user).subvec(0, rank - 1) -
349 ratingError * iterate.col(item).subvec(0, rank - 1));
350 arma::mat itemVecUpdate = stepSize * 2 * (
351 lambda * iterate.col(item).subvec(0, rank - 1) -
352 ratingError * iterate.col(user).subvec(0, rank - 1));
353 double userBiasUpdate = stepSize * 2 * (
354 lambda * iterate(rank, user) - ratingError);
355 double itemBiasUpdate = stepSize * 2 * (
356 lambda * iterate(rank, item) - ratingError);
360 for (
size_t i = 0; i < rank; ++i)
363 iterate(i, user) -= userVecUpdate(i);
365 iterate(i, item) -= itemVecUpdate(i);
368 iterate(rank, user) -= userBiasUpdate;
370 iterate(rank, item) -= itemBiasUpdate;
375 << overallObjective << std::endl;
377 return overallObjective;
double Evaluate(const arma::mat ¶meters) const
Evaluates the cost function over all examples in the data.
Definition: bias_svd_function_impl.hpp:47
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: bias_svd_function_impl.hpp:185
This class contains methods which are used to calculate the cost of BiasSVD's objective function...
Definition: bias_svd_function.hpp:31
void Shuffle()
Shuffle the points in the dataset.
Definition: bias_svd_function_impl.hpp:40
void Gradient(const arma::mat ¶meters, arma::mat &gradient) const
Evaluates the full gradient of the cost function over all the training examples.
Definition: bias_svd_function_impl.hpp:95
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
BiasSVDFunction(const MatType &data, const size_t rank, const double lambda)
Constructor for BiasSVDFunction class.
Definition: bias_svd_function_impl.hpp:23
size_t NumFunctions() const
Return the number of training examples. Useful for SGD optimizer.
Definition: bias_svd_function.hpp:110
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
MLPACK_EXPORT std::mt19937 randGen
MLPACK_EXPORT is required for global variables; it exports the symbols correctly on Windows...
Definition: random.cpp:18