mlpack
nca_softmax_error_function_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_NCA_NCA_SOFTMAX_ERROR_FUNCTION_IMPL_H
13 #define MLPACK_METHODS_NCA_NCA_SOFTMAX_ERROR_FUNCTION_IMPL_H
14 
15 // In case it hasn't been included already.
17 
18 #include <mlpack/core.hpp>
19 
20 namespace mlpack {
21 namespace nca {
22 
23 // Initialize with the given kernel.
24 template<typename MetricType>
26  const arma::mat& dataset,
27  const arma::Row<size_t>& labels,
28  MetricType metric) :
29  dataset(math::MakeAlias(const_cast<arma::mat&>(dataset), false)),
30  labels(math::MakeAlias(const_cast<arma::Row<size_t>&>(labels), false)),
31  metric(metric),
32  precalculated(false)
33 { /* nothing to do */ }
34 
36 template<typename MetricType>
38 {
39  arma::mat newDataset;
40  arma::Row<size_t> newLabels;
41 
42  math::ShuffleData(dataset, labels, newDataset, newLabels);
43 
44  math::ClearAlias(dataset);
45  math::ClearAlias(labels);
46 
47  dataset = std::move(newDataset);
48  labels = std::move(newLabels);
49 }
50 
52 template<typename MetricType>
53 double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates)
54 {
55  // Calculate the denominators and numerators, if necessary.
56  Precalculate(coordinates);
57 
58  return -accu(p); // Sum of p_i for all i. We negate because our solver
59  // minimizes, not maximizes.
60 };
61 
64 template<typename MetricType>
65 double SoftmaxErrorFunction<MetricType>::Evaluate(const arma::mat& coordinates,
66  const size_t begin,
67  const size_t batchSize)
68 {
69  // Unfortunately each evaluation will take O(N) time because it requires a
70  // scan over all points in the dataset. Our objective is to compute p_i.
71  double denominator = 0;
72  double numerator = 0;
73  double result = 0;
74 
75  // It's quicker to do this now than one point at a time later.
76  stretchedDataset = coordinates * dataset;
77  for (size_t i = begin; i < begin + batchSize; ++i)
78  {
79  for (size_t k = 0; k < dataset.n_cols; ++k)
80  {
81  // Don't consider the case where the points are the same.
82  if (k == i)
83  continue;
84 
85  // We want to evaluate exp(-D(A x_i, A x_k)).
86  double eval = std::exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
87  stretchedDataset.unsafe_col(k)));
88 
89  // If they are in the same class, update the numerator.
90  if (labels[i] == labels[k])
91  numerator += eval;
92 
93  denominator += eval;
94  }
95 
96  // Now the result is just a simple division, but we have to be sure that the
97  // denominator is not 0.
98  if (denominator == 0.0)
99  {
100  Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
101  continue;
102  }
103 
104  result += -(numerator / denominator); // Negate because the optimizer is a
105  // minimizer.
106  }
107  return result;
108 }
109 
111 template<typename MetricType>
112 void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
113  arma::mat& gradient)
114 {
115  // Calculate the denominators and numerators, if necessary.
116  Precalculate(coordinates);
117 
118  // Now, we handle the summation over i:
119  // sum_i (p_i sum_k (p_ik x_ik x_ik^T) -
120  // sum_{j in class of i} (p_ij x_ij x_ij^T)
121  // We can algebraically manipulate the whole thing to produce a more
122  // memory-friendly way to calculate this. Looping over each i and k (again
123  // O((n * (n + 1)) / 2) as with the last step, we can add the following to the
124  // sum:
125  //
126  // if class of i is the same as the class of k, add
127  // (((p_i - (1 / p_i)) p_ik) + ((p_k - (1 / p_k)) p_ki)) x_ik x_ik^T
128  // otherwise, add
129  // (p_i p_ik + p_k p_ki) x_ik x_ik^T
130  arma::mat sum;
131  sum.zeros(stretchedDataset.n_rows, stretchedDataset.n_rows);
132  for (size_t i = 0; i < stretchedDataset.n_cols; ++i)
133  {
134  for (size_t k = (i + 1); k < stretchedDataset.n_cols; ++k)
135  {
136  // Calculate p_ik and p_ki first.
137  double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
138  stretchedDataset.unsafe_col(k)));
139  double p_ik = 0, p_ki = 0;
140  p_ik = eval / denominators(i);
141  p_ki = eval / denominators(k);
142 
143  // Subtract x_i from x_k. We are not using stretched points here.
144  arma::vec x_ik = dataset.col(i) - dataset.col(k);
145  arma::mat secondTerm = (x_ik * trans(x_ik));
146 
147  if (labels[i] == labels[k])
148  sum += ((p[i] - 1) * p_ik + (p[k] - 1) * p_ki) * secondTerm;
149  else
150  sum += (p[i] * p_ik + p[k] * p_ki) * secondTerm;
151  }
152  }
153 
154  // Assemble the final gradient.
155  gradient = -2 * coordinates * sum;
156 }
157 
159 template <typename MetricType>
160 template <typename GradType>
161 void SoftmaxErrorFunction<MetricType>::Gradient(const arma::mat& coordinates,
162  const size_t begin,
163  GradType& gradient,
164  const size_t batchSize)
165 {
166  // The gradient involves two matrix terms which are eventually combined into
167  // one.
168  GradType firstTerm, secondTerm;
169  // We will need to calculate p_i before this evaluation is done, so
170  // these two variables will hold the information necessary for that.
171  double numerator, denominator;
172 
173  gradient.zeros(coordinates.n_rows, coordinates.n_rows);
174 
175  // Compute the stretched dataset.
176  stretchedDataset = coordinates * dataset;
177  for (size_t i = begin; i < begin + batchSize; ++i)
178  {
179  numerator = 0;
180  denominator = 0;
181 
182  firstTerm.zeros(coordinates.n_rows, coordinates.n_cols);
183  secondTerm.zeros(coordinates.n_rows, coordinates.n_cols);
184 
185  for (size_t k = 0; k < dataset.n_cols; ++k)
186  {
187  // Don't consider the case where the points are the same.
188  if (i == k)
189  continue;
190 
191  // Calculate the numerator of p_ik.
192  double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
193  stretchedDataset.unsafe_col(k)));
194 
195  // If the points are in the same class, we must add to the second term of
196  // the gradient as well as the numerator of p_i. We will divide by the
197  // denominator of p_ik later. For x_ik we are not using stretched points.
198  GradType x_ik = dataset.col(i) - dataset.col(k);
199  if (labels[i] == labels[k])
200  {
201  numerator += eval;
202  secondTerm += eval * x_ik * trans(x_ik);
203  }
204 
205  // We always have to add to the denominator of p_i
206  // and the first term of the gradient computation.
207  // We will divide by the denominator of p_ik later.
208  denominator += eval;
209  firstTerm += eval * x_ik * trans(x_ik);
210  }
211 
212  // Calculate p_i.
213  double p = 0;
214  if (denominator == 0)
215  {
216  Log::Warn << "Denominator of p_" << i << " is 0!" << std::endl;
217  // If the denominator is zero, then all p_ik should be zero and there is
218  // no gradient contribution from this point.
219  continue;
220  }
221  else
222  {
223  p = numerator / denominator;
224  firstTerm /= denominator;
225  secondTerm /= denominator;
226  }
227 
228  // Now multiply the first term by p_i, and add the two together and multiply
229  // all by 2 * A. We negate it though, because our optimizer is a minimizer.
230  gradient += -2 * coordinates * (p * firstTerm - secondTerm);
231  }
232 }
233 
234 template<typename MetricType>
236 {
237  return arma::eye<arma::mat>(dataset.n_rows, dataset.n_rows);
238 }
239 
240 template<typename MetricType>
242  const arma::mat& coordinates)
243 {
244  // Ensure it is the right size.
245  if (lastCoordinates.n_rows != coordinates.n_rows ||
246  lastCoordinates.n_cols != coordinates.n_cols)
247  {
248  lastCoordinates.set_size(coordinates.n_rows, coordinates.n_cols);
249  }
250  else if ((accu(coordinates == lastCoordinates) == coordinates.n_elem) &&
251  precalculated)
252  {
253  return; // No need to calculate; we already have this stuff saved.
254  }
255 
256  // Coordinates are different; save the new ones, and stretch the dataset.
257  lastCoordinates = coordinates;
258  stretchedDataset = coordinates * dataset;
259 
260  // For each point i, we must evaluate the softmax function:
261  // p_ij = exp( -K(x_i, x_j) ) / ( sum_{k != i} ( exp( -K(x_i, x_k) )))
262  // p_i = sum_{j in class of i} p_ij
263  // We will do this by keeping track of the denominators for each i as well as
264  // the numerators (the sum for all j in class of i). This will be on the
265  // order of O((n * (n + 1)) / 2), which really isn't all that great.
266  p.zeros(stretchedDataset.n_cols);
267  denominators.zeros(stretchedDataset.n_cols);
268  for (size_t i = 0; i < stretchedDataset.n_cols; ++i)
269  {
270  for (size_t j = (i + 1); j < stretchedDataset.n_cols; ++j)
271  {
272  // Evaluate exp(-d(x_i, x_j)).
273  double eval = exp(-metric.Evaluate(stretchedDataset.unsafe_col(i),
274  stretchedDataset.unsafe_col(j)));
275 
276  // Add this to the denominators of both p_i and p_j: K(i, j) = K(j, i).
277  denominators[i] += eval;
278  denominators[j] += eval;
279 
280  // If i and j are the same class, add to numerator of both.
281  if (labels[i] == labels[j])
282  {
283  p[i] += eval;
284  p[j] += eval;
285  }
286  }
287  }
288 
289  // Divide p_i by their denominators.
290  p /= denominators;
291 
292  // Clean up any bad values.
293  for (size_t i = 0; i < stretchedDataset.n_cols; ++i)
294  {
295  if (denominators[i] == 0.0)
296  {
297  Log::Debug << "Denominator of p_{" << i << ", j} is 0." << std::endl;
298 
299  // Set to usable values.
300  denominators[i] = std::numeric_limits<double>::infinity();
301  p[i] = 0;
302  }
303  }
304 
305  // We've done a precalculation. Mark it as done.
306  precalculated = true;
307 }
308 
309 } // namespace nca
310 } // namespace mlpack
311 
312 #endif
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79
The "softmax" stochastic neighbor assignment probability function.
Definition: nca_softmax_error_function.hpp:45
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
const arma::mat GetInitialPoint() const
Get the initial point.
Definition: nca_softmax_error_function_impl.hpp:235
void Gradient(const arma::mat &covariance, arma::mat &gradient)
Evaluate the gradient of the softmax function for the given covariance matrix.
Definition: nca_softmax_error_function_impl.hpp:112
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
SoftmaxErrorFunction(const arma::mat &dataset, const arma::Row< size_t > &labels, MetricType metric=MetricType())
Initialize with the given kernel; useful when the kernel has some state to store, which is set elsewh...
Definition: nca_softmax_error_function_impl.hpp:25
void Shuffle()
Shuffle the dataset.
Definition: nca_softmax_error_function_impl.hpp:37
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
void ShuffleData(const MatType &inputPoints, const LabelsType &inputLabels, MatType &outputPoints, LabelsType &outputLabels, const std::enable_if_t<!arma::is_SpMat< MatType >::value > *=0, const std::enable_if_t<!arma::is_Cube< MatType >::value > *=0)
Shuffle a dataset and associated labels (or responses).
Definition: shuffle_data.hpp:28
double Evaluate(const arma::mat &covariance)
Evaluate the softmax function for the given covariance matrix.
Definition: nca_softmax_error_function_impl.hpp:53
void ClearAlias(arma::Mat< ElemType > &mat)
Clear an alias so that no data is overwritten.
Definition: make_alias.hpp:110
arma::Cube< ElemType > MakeAlias(arma::Cube< ElemType > &input, const bool strict=true)
Make an alias of a dense cube.
Definition: make_alias.hpp:24