mlpack
logistic_regression_function_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_LOGISTIC_REGRESSION_FUNCTION_IMPL_HPP
13 #define MLPACK_METHODS_LOGISTIC_REGRESSION_FUNCTION_IMPL_HPP
14 
15 // In case it hasn't been included yet.
17 
18 #include <mlpack/core.hpp>
19 
20 namespace mlpack {
21 namespace regression {
22 
23 template<typename MatType>
25  const MatType& predictors,
26  const arma::Row<size_t>& responses,
27  const double lambda) :
28  // We promise to be well-behaved... the elements won't be modified.
29  predictors(math::MakeAlias(const_cast<MatType&>(predictors), false)),
30  responses(math::MakeAlias(const_cast<arma::Row<size_t>&>(responses),
31  false)),
32  lambda(lambda)
33 {
34  // Sanity check.
35  if (responses.n_elem != predictors.n_cols)
36  {
37  Log::Fatal << "LogisticRegressionFunction::LogisticRegressionFunction(): "
38  << "predictors matrix has " << predictors.n_cols << " points, but "
39  << "responses vector has " << responses.n_elem << " elements (should be"
40  << " " << predictors.n_cols << ")!" << std::endl;
41  }
42 }
43 
47 template<typename MatType>
49 {
50  MatType newPredictors;
51  arma::Row<size_t> newResponses;
52 
53  math::ShuffleData(predictors, responses, newPredictors, newResponses);
54 
55  // If we are an alias, make sure we don't write to the original data.
56  math::ClearAlias(predictors);
57  math::ClearAlias(responses);
58 
59  // Take ownership of the new data.
60  predictors = std::move(newPredictors);
61  responses = std::move(newResponses);
62 }
63 
68 template<typename MatType>
70  const arma::mat& parameters) const
71 {
72  // The objective function is the log-likelihood function (w is the parameters
73  // vector for the model; y is the responses; x is the predictors; sig() is the
74  // sigmoid function):
75  // f(w) = sum(y log(sig(w'x)) + (1 - y) log(sig(1 - w'x))).
76  // We want to minimize this function. L2-regularization is just lambda
77  // multiplied by the squared l2-norm of the parameters then divided by two.
78 
79  // For the regularization, we ignore the first term, which is the intercept
80  // term and take every term except the last one in the decision variable.
81  const double regularization = 0.5 * lambda *
82  arma::dot(parameters.tail_cols(parameters.n_elem - 1),
83  parameters.tail_cols(parameters.n_elem - 1));
84 
85  // Calculate vectors of sigmoids. The intercept term is parameters(0, 0) and
86  // does not need to be multiplied by any of the predictors.
87  const arma::rowvec sigmoid = 1.0 / (1.0 + arma::exp(-(parameters(0, 0) +
88  parameters.tail_cols(parameters.n_elem - 1) * predictors)));
89 
90  // Assemble full objective function. Often the objective function and the
91  // regularization as given are divided by the number of features, but this
92  // doesn't actually affect the optimization result, so we'll just ignore those
93  // terms for computational efficiency. Note that the conversion causes some
94  // copy and slowdown, but this is so negligible compared to the rest of the
95  // calculation it is not worth optimizing for.
96  const double result = arma::accu(arma::log(1.0 -
97  arma::conv_to<arma::rowvec>::from(responses) + sigmoid %
98  (2 * arma::conv_to<arma::rowvec>::from(responses) - 1.0)));
99 
100  // Invert the result, because it's a minimization.
101  return regularization - result;
102 }
103 
108 template<typename MatType>
110  const arma::mat& parameters,
111  const size_t begin,
112  const size_t batchSize) const
113 {
114  // Calculate the regularization term.
115  const double regularization = lambda *
116  (batchSize / (2.0 * predictors.n_cols)) *
117  arma::dot(parameters.tail_cols(parameters.n_elem - 1),
118  parameters.tail_cols(parameters.n_elem - 1));
119 
120  // Calculate the sigmoid function values.
121  const arma::rowvec sigmoid = 1.0 / (1.0 + arma::exp(-(parameters(0, 0) +
122  parameters.tail_cols(parameters.n_elem - 1) *
123  predictors.cols(begin, begin + batchSize - 1))));
124 
125  // Compute the objective for the given batch size from a given point.
126  arma::rowvec respD = arma::conv_to<arma::rowvec>::from(responses.subvec(begin,
127  begin + batchSize - 1));
128  const double result = arma::accu(arma::log(1.0 - respD + sigmoid %
129  (2 * respD - 1.0)));
130 
131  // Invert the result, because it's a minimization.
132  return regularization - result;
133 }
134 
136 template<typename MatType>
138  const arma::mat& parameters,
139  arma::mat& gradient) const
140 {
141  // Regularization term.
142  arma::mat regularization;
143  regularization = lambda * parameters.tail_cols(parameters.n_elem - 1);
144 
145  const arma::rowvec sigmoids = (1 / (1 + arma::exp(-parameters(0, 0)
146  - parameters.tail_cols(parameters.n_elem - 1) * predictors)));
147 
148  gradient.set_size(arma::size(parameters));
149  gradient[0] = -arma::accu(responses - sigmoids);
150  gradient.tail_cols(parameters.n_elem - 1) = (sigmoids - responses) *
151  predictors.t() + regularization;
152 }
153 
156 template<typename MatType>
157 template<typename GradType>
159  const arma::mat& parameters,
160  const size_t begin,
161  GradType& gradient,
162  const size_t batchSize) const
163 {
164  // Regularization term.
165  arma::mat regularization;
166  regularization = lambda * parameters.tail_cols(parameters.n_elem - 1)
167  / predictors.n_cols * batchSize;
168 
169  const arma::rowvec exponents = parameters(0, 0) +
170  parameters.tail_cols(parameters.n_elem - 1) *
171  predictors.cols(begin, begin + batchSize - 1);
172  // Calculating the sigmoid function values.
173  const arma::rowvec sigmoids = 1.0 / (1.0 + arma::exp(-exponents));
174 
175  gradient.set_size(parameters.n_rows, parameters.n_cols);
176  gradient[0] = -arma::accu(responses.subvec(begin, begin + batchSize - 1) -
177  sigmoids);
178  gradient.tail_cols(parameters.n_elem - 1) = (sigmoids -
179  responses.subvec(begin, begin + batchSize - 1)) *
180  predictors.cols(begin, begin + batchSize - 1).t() + regularization;
181 }
182 
187 template <typename MatType>
189  const arma::mat& parameters,
190  const size_t j,
191  arma::sp_mat& gradient) const
192 {
193  const arma::rowvec diffs = responses - (1 / (1 + arma::exp(-parameters(0, 0)
194  - parameters.tail_cols(parameters.n_elem - 1) * predictors)));
195 
196  gradient.set_size(arma::size(parameters));
197 
198  if (j == 0)
199  {
200  gradient[j] = -arma::accu(diffs);
201  }
202  else
203  {
204  gradient[j] = arma::dot(-predictors.row(j - 1), diffs) + lambda *
205  parameters(0, j);
206  }
207 }
208 
209 template<typename MatType>
210 template<typename GradType>
212  const arma::mat& parameters,
213  GradType& gradient) const
214 {
215  // Regularization term.
216  arma::mat regularization = lambda *
217  parameters.tail_cols(parameters.n_elem - 1);
218 
219  const double objectiveRegularization = lambda / 2.0 *
220  arma::dot(parameters.tail_cols(parameters.n_elem - 1),
221  parameters.tail_cols(parameters.n_elem - 1));
222 
223  // Calculate the sigmoid function values.
224  const arma::rowvec sigmoids = 1.0 / (1.0 + arma::exp(-(parameters(0, 0) +
225  parameters.tail_cols(parameters.n_elem - 1) * predictors)));
226 
227  gradient.set_size(arma::size(parameters));
228  gradient[0] = -arma::accu(responses - sigmoids);
229  gradient.tail_cols(parameters.n_elem - 1) = (sigmoids - responses) *
230  predictors.t() + regularization;
231 
232  // Now compute the objective function using the sigmoids.
233  double result = arma::accu(arma::log(1.0 -
234  arma::conv_to<arma::rowvec>::from(responses) + sigmoids %
235  (2 * arma::conv_to<arma::rowvec>::from(responses) - 1.0)));
236 
237  // Invert the result, because it's a minimization.
238  return objectiveRegularization - result;
239 }
240 
241 template<typename MatType>
242 template<typename GradType>
244  const arma::mat& parameters,
245  const size_t begin,
246  GradType& gradient,
247  const size_t batchSize) const
248 {
249  // Regularization term.
250  arma::mat regularization =
251  lambda * parameters.tail_cols(parameters.n_elem - 1) / predictors.n_cols *
252  batchSize;
253 
254  const double objectiveRegularization = lambda *
255  (batchSize / (2.0 * predictors.n_cols)) *
256  arma::dot(parameters.tail_cols(parameters.n_elem - 1),
257  parameters.tail_cols(parameters.n_elem - 1));
258 
259  // Calculate the sigmoid function values.
260  const arma::rowvec sigmoids = 1.0 / (1.0 + arma::exp(-(parameters(0, 0) +
261  parameters.tail_cols(parameters.n_elem - 1) *
262  predictors.cols(begin, begin + batchSize - 1))));
263 
264  gradient.set_size(parameters.n_rows, parameters.n_cols);
265  gradient[0] = -arma::accu(responses.subvec(begin, begin + batchSize - 1) -
266  sigmoids);
267  gradient.tail_cols(parameters.n_elem - 1) = (sigmoids -
268  responses.subvec(begin, begin + batchSize - 1)) *
269  predictors.cols(begin, begin + batchSize - 1).t() + regularization;
270 
271  // Now compute the objective function using the sigmoids.
272  arma::rowvec respD = arma::conv_to<arma::rowvec>::from(responses.subvec(begin,
273  begin + batchSize - 1));
274  const double result = arma::accu(arma::log(1.0 - respD + sigmoids %
275  (2 * respD - 1.0)));
276 
277  // Invert the result, because it's a minimization.
278  return objectiveRegularization - result;
279 }
280 
281 } // namespace regression
282 } // namespace mlpack
283 
284 #endif
void Gradient(const arma::mat &parameters, arma::mat &gradient) const
Evaluate the gradient of the logistic regression log-likelihood function with the given parameters...
Definition: logistic_regression_function_impl.hpp:137
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void PartialGradient(const arma::mat &parameters, const size_t j, arma::sp_mat &gradient) const
Evaluate the gradient of the logistic regression log-likelihood function with the given parameters...
Definition: logistic_regression_function_impl.hpp:188
double Evaluate(const arma::mat &parameters) const
Evaluate the logistic regression log-likelihood function with the given parameters.
Definition: logistic_regression_function_impl.hpp:69
double EvaluateWithGradient(const arma::mat &parameters, GradType &gradient) const
Evaluate the objective function and gradient of the logistic regression log-likelihood function simul...
Definition: logistic_regression_function_impl.hpp:211
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
void Shuffle()
Shuffle the order of function visitation.
Definition: logistic_regression_function_impl.hpp:48
void ShuffleData(const MatType &inputPoints, const LabelsType &inputLabels, MatType &outputPoints, LabelsType &outputLabels, const std::enable_if_t<!arma::is_SpMat< MatType >::value > *=0, const std::enable_if_t<!arma::is_Cube< MatType >::value > *=0)
Shuffle a dataset and associated labels (or responses).
Definition: shuffle_data.hpp:28
void ClearAlias(arma::Mat< ElemType > &mat)
Clear an alias so that no data is overwritten.
Definition: make_alias.hpp:110
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24
LogisticRegressionFunction(const MatType &predictors, const arma::Row< size_t > &responses, const double lambda=0)
Creates the LogisticRegressionFunction.
Definition: logistic_regression_function_impl.hpp:24