16 #ifndef MLPACK_METHODS_CF_CF_IMPL_HPP 17 #define MLPACK_METHODS_CF_CF_IMPL_HPP 26 template<
typename DecompositionPolicy,
27 typename NormalizationType>
28 CFType<DecompositionPolicy,
30 CFType(
const size_t numUsersForSimilarity,
32 numUsersForSimilarity(numUsersForSimilarity),
36 if (numUsersForSimilarity < 1)
38 Log::Warn <<
"CFType::CFType(): neighbourhood size should be > 0 (" 39 << numUsersForSimilarity <<
" given). Setting value to 5.\n";
41 this->numUsersForSimilarity = 5;
48 template<
typename DecompositionPolicy,
49 typename NormalizationType>
50 template<
typename MatType>
51 CFType<DecompositionPolicy,
54 const DecompositionPolicy& decomposition,
55 const size_t numUsersForSimilarity,
57 const size_t maxIterations,
58 const double minResidue,
60 numUsersForSimilarity(numUsersForSimilarity),
64 if (numUsersForSimilarity < 1)
66 Log::Warn <<
"CFType::CFType(): neighbourhood size should be > 0 (" 67 << numUsersForSimilarity <<
" given). Setting value to 5.\n";
69 this->numUsersForSimilarity = 5;
72 Train(data, decomposition, maxIterations, minResidue, mit);
76 template<
typename DecompositionPolicy,
77 typename NormalizationType>
78 void CFType<DecompositionPolicy,
81 const DecompositionPolicy& decomposition,
82 const size_t maxIterations,
83 const double minResidue,
86 this->decomposition = decomposition;
89 arma::mat normalizedData(data);
90 normalization.Normalize(normalizedData);
91 CleanData(normalizedData, cleanedData);
98 const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
99 const size_t rankEstimate = size_t(density) + 5;
102 Log::Info <<
"No rank given for decomposition; using rank of " 103 << rankEstimate <<
" calculated by density-based heuristic." 105 this->rank = rankEstimate;
110 Timer::Start(
"cf_factorization");
111 this->decomposition.Apply(
112 normalizedData, cleanedData, rank, maxIterations, minResidue, mit);
113 Timer::Stop(
"cf_factorization");
117 template<
typename DecompositionPolicy,
118 typename NormalizationType>
119 void CFType<DecompositionPolicy,
122 const DecompositionPolicy& decomposition,
123 const size_t maxIterations,
124 const double minResidue,
127 this->decomposition = decomposition;
132 normalization.Normalize(cleanedData);
139 const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
140 const size_t rankEstimate = size_t(density) + 5;
143 Log::Info <<
"No rank given for decomposition; using rank of " 144 << rankEstimate <<
" calculated by density-based heuristic." 146 this->rank = rankEstimate;
151 Timer::Start(
"cf_factorization");
152 this->decomposition.Apply(
153 data, cleanedData, rank, maxIterations, minResidue, mit);
154 Timer::Stop(
"cf_factorization");
157 template<
typename DecompositionPolicy,
158 typename NormalizationType>
159 template<
typename NeighborSearchPolicy,
160 typename InterpolationPolicy>
161 void CFType<DecompositionPolicy,
163 GetRecommendations(
const size_t numRecs,
164 arma::Mat<size_t>& recommendations)
170 arma::Col<size_t> users = arma::linspace<arma::Col<size_t> >(0,
171 cleanedData.n_cols - 1, cleanedData.n_cols);
174 GetRecommendations<NeighborSearchPolicy,
175 InterpolationPolicy>(numRecs, recommendations, users);
178 template<
typename DecompositionPolicy,
179 typename NormalizationType>
180 template<
typename NeighborSearchPolicy,
181 typename InterpolationPolicy>
182 void CFType<DecompositionPolicy,
184 GetRecommendations(
const size_t numRecs,
185 arma::Mat<size_t>& recommendations,
186 const arma::Col<size_t>& users)
189 arma::Mat<size_t> neighborhood;
191 arma::mat similarities;
198 decomposition.template GetNeighborhood<NeighborSearchPolicy>(
199 users, numUsersForSimilarity, neighborhood, similarities);
203 recommendations.set_size(numRecs, users.n_elem);
204 arma::mat values(numRecs, users.n_elem);
205 recommendations.fill(SIZE_MAX);
206 values.fill(DBL_MAX);
211 InterpolationPolicy interpolation(cleanedData);
213 for (
size_t i = 0; i < users.n_elem; ++i)
217 ratings.zeros(cleanedData.n_rows);
220 arma::vec weights(numUsersForSimilarity);
221 interpolation.GetWeights(weights, decomposition, users(i),
222 neighborhood.col(i), similarities.col(i), cleanedData);
224 for (
size_t j = 0; j < neighborhood.n_rows; ++j)
226 arma::vec neighborRatings;
227 decomposition.GetRatingOfUser(neighborhood(j, i), neighborRatings);
228 ratings += weights(j) * neighborRatings;
233 const Candidate def = std::make_pair(-DBL_MAX, cleanedData.n_rows);
234 std::vector<Candidate> vect(numRecs, def);
235 typedef std::priority_queue<Candidate, std::vector<Candidate>, CandidateCmp>
237 CandidateList pqueue(CandidateCmp(), std::move(vect));
240 for (
size_t j = 0; j < ratings.n_rows; ++j)
246 if (cleanedData(j, users(i)) != 0.0)
251 double realRating = normalization.Denormalize(users(i), j, ratings[j]);
252 if (realRating > pqueue.top().first)
254 Candidate c = std::make_pair(realRating, j);
260 for (
size_t p = 1; p <= numRecs; p++)
262 recommendations(numRecs - p, i) = pqueue.top().second;
263 values(numRecs - p, i) = pqueue.top().first;
269 if (recommendations(numRecs - 1, i) == def.second)
270 Log::Warn <<
"Could not provide " << numRecs <<
" recommendations " 271 <<
"for user " << users(i) <<
" (not enough un-rated items)!" 277 template<
typename DecompositionPolicy,
278 typename NormalizationType>
279 template<
typename NeighborSearchPolicy,
280 typename InterpolationPolicy>
281 double CFType<DecompositionPolicy,
283 Predict(
const size_t user,
const size_t item)
const 289 arma::Mat<size_t> neighborhood;
291 arma::mat similarities;
298 arma::Col<size_t> users(1);
300 decomposition.template GetNeighborhood<NeighborSearchPolicy>(
301 users, numUsersForSimilarity, neighborhood, similarities);
303 arma::vec weights(numUsersForSimilarity);
306 InterpolationPolicy interpolation(cleanedData);
307 interpolation.GetWeights(weights, decomposition, user,
308 neighborhood.col(0), similarities.col(0), cleanedData);
312 for (
size_t j = 0; j < neighborhood.n_rows; ++j)
313 rating += weights(j) * decomposition.GetRating(neighborhood(j, 0), item);
316 double realRating = normalization.Denormalize(user, item, rating);
321 template<
typename DecompositionPolicy,
322 typename NormalizationType>
323 template<
typename NeighborSearchPolicy,
324 typename InterpolationPolicy>
325 void CFType<DecompositionPolicy,
327 Predict(
const arma::Mat<size_t>& combinations,
328 arma::vec& predictions)
const 332 arma::Mat<size_t> sortedCombinations(combinations.n_rows,
333 combinations.n_cols);
334 arma::uvec ordering = arma::sort_index(combinations.row(0).t());
335 for (
size_t i = 0; i < ordering.n_elem; ++i)
336 sortedCombinations.col(i) = combinations.col(ordering[i]);
339 arma::Col<size_t> users = arma::unique(combinations.row(0).t());
342 arma::Mat<size_t> neighborhood;
344 arma::mat similarities;
351 decomposition.template GetNeighborhood<NeighborSearchPolicy>(
352 users, numUsersForSimilarity, neighborhood, similarities);
354 arma::mat weights(numUsersForSimilarity, users.n_elem);
357 InterpolationPolicy interpolation(cleanedData);
358 for (
size_t i = 0; i < users.n_elem; ++i)
360 interpolation.GetWeights(weights.col(i), decomposition, users[i],
361 neighborhood.col(i), similarities.col(i), cleanedData);
365 predictions.set_size(combinations.n_cols);
368 for (
size_t i = 0; i < sortedCombinations.n_cols; ++i)
375 while (users[user] < sortedCombinations(0, i))
378 for (
size_t j = 0; j < neighborhood.n_rows; ++j)
380 rating += weights(j, user) * decomposition.GetRating(
381 neighborhood(j, user), sortedCombinations(1, i));
384 predictions(ordering[i]) = rating;
388 normalization.Denormalize(combinations, predictions);
391 template<
typename DecompositionPolicy,
392 typename NormalizationType>
393 void CFType<DecompositionPolicy,
395 CleanData(
const arma::mat&
data, arma::sp_mat& cleanedData)
399 arma::umat locations(2, data.n_cols);
400 arma::vec values(data.n_cols);
401 for (
size_t i = 0; i < data.n_cols; ++i)
404 locations(1, i) = ((arma::uword) data(0, i));
405 locations(0, i) = ((arma::uword) data(1, i));
406 values(i) = data(2, i);
412 Log::Warn <<
"User rating of 0 ignored for user " << locations(1, i)
413 <<
", item " << locations(0, i) <<
"." << std::endl;
417 const size_t maxItemID = (size_t) max(locations.row(0)) + 1;
418 const size_t maxUserID = (size_t) max(locations.row(1)) + 1;
421 cleanedData = arma::sp_mat(locations, values, maxItemID, maxUserID);
425 template<
typename DecompositionPolicy,
426 typename NormalizationType>
427 template<
typename Archive>
428 void CFType<DecompositionPolicy,
430 serialize(Archive& ar,
const uint32_t )
434 ar(CEREAL_NVP(numUsersForSimilarity));
435 ar(CEREAL_NVP(rank));
436 ar(CEREAL_NVP(decomposition));
437 ar(CEREAL_NVP(cleanedData));
438 ar(CEREAL_NVP(normalization));
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: hmm_train_main.cpp:300
This class implements Collaborative Filtering (CF).
Definition: cf.hpp:70