mlpack
z_score_normalization.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
13 #define MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 
17 namespace mlpack {
18 namespace cf {
19 
39 {
40  public:
41  // Empty constructor.
42  ZScoreNormalization() : mean(0), stddev(1) { }
43 
49  void Normalize(arma::mat& data)
50  {
51  mean = arma::mean(data.row(2));
52  stddev = arma::stddev(data.row(2));
53 
54  if (std::fabs(stddev) < 1e-14)
55  {
56  Log::Fatal << "Standard deviation of all existing ratings is 0! "
57  << "This may indicate that all existing ratings are the same."
58  << std::endl;
59  }
60 
61  data.row(2) = (data.row(2) - mean) / stddev;
62  // The algorithm omits rating of zero. If normalized rating equals zero,
63  // it is set to the smallest positive float value.
64  data.row(2).for_each([](double& x)
65  {
66  if (x == 0)
67  x = std::numeric_limits<float>::min();
68  });
69  }
70 
76  void Normalize(arma::sp_mat& cleanedData)
77  {
78  // Caculate mean and stdev of all non zero ratings.
79  arma::vec ratings = arma::nonzeros(cleanedData);
80  mean = arma::mean(ratings);
81  stddev = arma::stddev(ratings);
82 
83  if (std::fabs(stddev) < 1e-14)
84  {
85  Log::Fatal << "Standard deviation of all existing ratings is 0! "
86  << "This may indicate that all existing ratings are the same."
87  << std::endl;
88  }
89 
90  // Subtract mean from existing rating and divide it by stddev.
91  // TODO: consider using spmat::transform() instead of spmat iterators
92  // TODO: http://arma.sourceforge.net/docs.html#transform
93  arma::sp_mat::iterator it = cleanedData.begin();
94  arma::sp_mat::iterator it_end = cleanedData.end();
95  for (; it != it_end; ++it)
96  {
97  double tmp = (*it - mean) / stddev;
98 
99  // The algorithm omits rating of zero. If normalized rating equals zero,
100  // it is set to the smallest positive float value.
101  if (tmp == 0)
102  tmp = std::numeric_limits<float>::min();
103 
104  *it = tmp;
105  }
106  }
107 
115  double Denormalize(const size_t /* user */,
116  const size_t /* item */,
117  const double rating) const
118  {
119  return rating * stddev + mean;
120  }
121 
128  void Denormalize(const arma::Mat<size_t>& /* combinations */,
129  arma::vec& predictions) const
130  {
131  predictions = predictions * stddev + mean;
132  }
133 
137  double Mean() const
138  {
139  return mean;
140  }
141 
145  double Stddev() const
146  {
147  return stddev;
148  }
149 
153  template<typename Archive>
154  void serialize(Archive& ar, const uint32_t /* version */)
155  {
156  ar(CEREAL_NVP(mean));
157  ar(CEREAL_NVP(stddev));
158  }
159 
160  private:
162  double mean;
164  double stddev;
165 };
166 
167 } // namespace cf
168 } // namespace mlpack
169 
170 #endif
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
double Denormalize(const size_t, const size_t, const double rating) const
Denormalize computed rating by adding mean and multiplying stddev.
Definition: z_score_normalization.hpp:115
void Normalize(arma::sp_mat &cleanedData)
Normalize the data to zero mean and one standard deviation.
Definition: z_score_normalization.hpp:76
void Normalize(arma::mat &data)
Normalize the data to zero mean and one standard deviation.
Definition: z_score_normalization.hpp:49
The core includes that mlpack expects; standard C++ includes and Armadillo.
void serialize(Archive &ar, const uint32_t)
Serialization.
Definition: z_score_normalization.hpp:154
double Mean() const
Return mean.
Definition: z_score_normalization.hpp:137
This normalization class performs z-score normalization on raw ratings.
Definition: z_score_normalization.hpp:38
void Denormalize(const arma::Mat< size_t > &, arma::vec &predictions) const
Denormalize computed rating by adding mean and multiplying stddev.
Definition: z_score_normalization.hpp:128
double Stddev() const
Return stddev.
Definition: z_score_normalization.hpp:145