mlpack
rnn_impl.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_ANN_RNN_IMPL_HPP
13 #define MLPACK_METHODS_ANN_RNN_IMPL_HPP
14 
15 // In case it hasn't been included yet.
16 #include "rnn.hpp"
17 
27 
29 
30 namespace mlpack {
31 namespace ann {
32 
33 template<typename OutputLayerType, typename InitializationRuleType,
34  typename... CustomLayers>
36  const size_t rho,
37  const bool single,
38  OutputLayerType outputLayer,
39  InitializationRuleType initializeRule) :
40  rho(rho),
41  outputLayer(std::move(outputLayer)),
42  initializeRule(std::move(initializeRule)),
43  inputSize(0),
44  outputSize(0),
45  targetSize(0),
46  reset(false),
47  single(single),
48  numFunctions(0),
49  deterministic(true)
50 {
51  /* Nothing to do here */
52 }
53 
54 template<typename OutputLayerType, typename InitializationRuleType,
55  typename... CustomLayers>
57  const RNN& network) :
58  rho(network.rho),
59  outputLayer(network.outputLayer),
60  initializeRule(network.initializeRule),
61  inputSize(network.inputSize),
62  outputSize(network.outputSize),
63  targetSize(network.targetSize),
64  reset(network.reset),
65  single(network.single),
66  parameter(network.parameter),
67  numFunctions(network.numFunctions),
68  deterministic(network.deterministic)
69 {
70  for (size_t i = 0; i < network.network.size(); ++i)
71  {
72  this->network.push_back(boost::apply_visitor(copyVisitor,
73  network.network[i]));
74  boost::apply_visitor(resetVisitor, this->network.back());
75  }
76 }
77 
78 template<typename OutputLayerType, typename InitializationRuleType,
79  typename... CustomLayers>
81  RNN&& network) :
82  rho(std::move(network.rho)),
83  outputLayer(std::move(network.outputLayer)),
84  initializeRule(std::move(network.initializeRule)),
85  inputSize(std::move(network.inputSize)),
86  outputSize(std::move(network.outputSize)),
87  targetSize(std::move(network.targetSize)),
88  reset(std::move(network.reset)),
89  single(std::move(network.single)),
90  network(std::move(network.network)),
91  parameter(std::move(network.parameter)),
92  numFunctions(std::move(network.numFunctions)),
93  deterministic(std::move(network.deterministic))
94 {
95  // Nothing to do here.
96 }
97 
98 template<typename OutputLayerType, typename InitializationRuleType,
99  typename... CustomLayers>
101 {
102  for (LayerTypes<CustomLayers...>& layer : network)
103  {
104  boost::apply_visitor(deleteVisitor, layer);
105  }
106 }
107 
108 template<typename OutputLayerType, typename InitializationRuleType,
109  typename... CustomLayers>
110 template<typename OptimizerType>
111 typename std::enable_if<
112  HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
113  ::value, void>::type
115 WarnMessageMaxIterations(OptimizerType& optimizer, size_t samples) const
116 {
117  if (optimizer.MaxIterations() < samples &&
118  optimizer.MaxIterations() != 0)
119  {
120  Log::Warn << "The optimizer's maximum number of iterations "
121  << "is less than the size of the dataset; the "
122  << "optimizer will not pass over the entire "
123  << "dataset. To fix this, modify the maximum "
124  << "number of iterations to be at least equal "
125  << "to the number of points of your dataset "
126  << "(" << samples << ")." << std::endl;
127  }
128 }
129 
130 template<typename OutputLayerType, typename InitializationRuleType,
131  typename... CustomLayers>
132 template<typename OptimizerType>
133 typename std::enable_if<
134  !HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
135  ::value, void>::type
137 WarnMessageMaxIterations(OptimizerType& /* optimizer */,
138  size_t /* samples */) const
139 {
140  return;
141 }
142 
143 template<typename OutputLayerType, typename InitializationRuleType,
144  typename... CustomLayers>
145 template<typename OptimizerType, typename... CallbackTypes>
147  arma::cube predictors,
148  arma::cube responses,
149  OptimizerType& optimizer,
150  CallbackTypes&&... callbacks)
151 {
152  CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
153  network, predictors.n_rows, "RNN<>::Train()");
154 
155  numFunctions = responses.n_cols;
156 
157  this->predictors = std::move(predictors);
158  this->responses = std::move(responses);
159 
160  this->deterministic = true;
161  ResetDeterministic();
162 
163  if (!reset)
164  {
165  ResetParameters();
166  }
167 
168  WarnMessageMaxIterations<OptimizerType>(optimizer, this->predictors.n_cols);
169 
170  // Train the model.
171  Timer::Start("rnn_optimization");
172  const double out = optimizer.Optimize(*this, parameter, callbacks...);
173  Timer::Stop("rnn_optimization");
174 
175  Log::Info << "RNN::RNN(): final objective of trained model is " << out
176  << "." << std::endl;
177  return out;
178 }
179 
180 template<typename OutputLayerType, typename InitializationRuleType,
181  typename... CustomLayers>
182 void RNN<OutputLayerType, InitializationRuleType,
183  CustomLayers...>::ResetCells()
184 {
185  for (size_t i = 1; i < network.size(); ++i)
186  {
187  boost::apply_visitor(ResetCellVisitor(rho), network[i]);
188  }
189 }
190 
191 template<typename OutputLayerType, typename InitializationRuleType,
192  typename... CustomLayers>
193 template<typename OptimizerType, typename... CallbackTypes>
195  arma::cube predictors,
196  arma::cube responses,
197  CallbackTypes&&... callbacks)
198 {
199  CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
200  network, predictors.n_rows, "RNN<>::Train()");
201 
202  numFunctions = responses.n_cols;
203 
204  this->predictors = std::move(predictors);
205  this->responses = std::move(responses);
206 
207  this->deterministic = true;
208  ResetDeterministic();
209 
210  if (!reset)
211  {
212  ResetParameters();
213  }
214 
215  OptimizerType optimizer;
216 
217  WarnMessageMaxIterations<OptimizerType>(optimizer, this->predictors.n_cols);
218 
219  // Train the model.
220  Timer::Start("rnn_optimization");
221  const double out = optimizer.Optimize(*this, parameter, callbacks...);
222  Timer::Stop("rnn_optimization");
223 
224  Log::Info << "RNN::RNN(): final objective of trained model is " << out
225  << "." << std::endl;
226  return out;
227 }
228 
229 template<typename OutputLayerType, typename InitializationRuleType,
230  typename... CustomLayers>
232  arma::cube predictors, arma::cube& results, const size_t batchSize)
233 {
234  CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
235  network, predictors.n_rows, "RNN<>::Predict()");
236 
237  ResetCells();
238 
239  if (parameter.is_empty())
240  {
241  ResetParameters();
242  }
243 
244  if (!deterministic)
245  {
246  deterministic = true;
247  ResetDeterministic();
248  }
249 
250  const size_t effectiveBatchSize = std::min(batchSize,
251  size_t(predictors.n_cols));
252 
253  Forward(arma::mat(predictors.slice(0).colptr(0), predictors.n_rows,
254  effectiveBatchSize, false, true));
255  arma::mat resultsTemp = boost::apply_visitor(outputParameterVisitor,
256  network.back());
257 
258  outputSize = resultsTemp.n_rows;
259  results = arma::zeros<arma::cube>(outputSize, predictors.n_cols, rho);
260  results.slice(0).submat(0, 0, results.n_rows - 1,
261  effectiveBatchSize - 1) = resultsTemp;
262 
263  // Process in accordance with the given batch size.
264  for (size_t begin = 0; begin < predictors.n_cols; begin += batchSize)
265  {
266  const size_t effectiveBatchSize = std::min(batchSize,
267  size_t(predictors.n_cols - begin));
268  for (size_t seqNum = !begin; seqNum < rho; ++seqNum)
269  {
270  Forward(arma::mat(predictors.slice(seqNum).colptr(begin),
271  predictors.n_rows, effectiveBatchSize, false, true));
272 
273  results.slice(seqNum).submat(0, begin, results.n_rows - 1, begin +
274  effectiveBatchSize - 1) = boost::apply_visitor(outputParameterVisitor,
275  network.back());
276  }
277  }
278 }
279 
280 template<typename OutputLayerType, typename InitializationRuleType,
281  typename... CustomLayers>
283  const arma::mat& /* parameters */,
284  const size_t begin,
285  const size_t batchSize,
286  const bool deterministic)
287 {
288  if (parameter.is_empty())
289  {
290  ResetParameters();
291  }
292 
293  if (deterministic != this->deterministic)
294  {
295  this->deterministic = deterministic;
296  ResetDeterministic();
297  }
298 
299  if (!inputSize)
300  {
301  inputSize = predictors.n_rows;
302  targetSize = responses.n_rows;
303  }
304  else if (targetSize == 0)
305  {
306  targetSize = responses.n_rows;
307  }
308 
309  ResetCells();
310 
311  double performance = 0;
312  size_t responseSeq = 0;
313 
314  for (size_t seqNum = 0; seqNum < rho; ++seqNum)
315  {
316  // Wrap a matrix around our data to avoid a copy.
317  arma::mat stepData(predictors.slice(seqNum).colptr(begin),
318  predictors.n_rows, batchSize, false, true);
319  Forward(stepData);
320  if (!single)
321  {
322  responseSeq = seqNum;
323  }
324 
325  performance += outputLayer.Forward(boost::apply_visitor(
326  outputParameterVisitor, network.back()),
327  arma::mat(responses.slice(responseSeq).colptr(begin),
328  responses.n_rows, batchSize, false, true));
329  }
330 
331  if (outputSize == 0)
332  {
333  outputSize = boost::apply_visitor(outputParameterVisitor,
334  network.back()).n_elem / batchSize;
335  }
336 
337  return performance;
338 }
339 
340 template<typename OutputLayerType, typename InitializationRuleType,
341  typename... CustomLayers>
343  const arma::mat& parameters,
344  const size_t begin,
345  const size_t batchSize)
346 {
347  return Evaluate(parameters, begin, batchSize, true);
348 }
349 
350 template<typename OutputLayerType, typename InitializationRuleType,
351  typename... CustomLayers>
352 template<typename GradType>
354 EvaluateWithGradient(const arma::mat& /* parameters */,
355  const size_t begin,
356  GradType& gradient,
357  const size_t batchSize)
358 {
359  // Initialize passed gradient.
360  if (gradient.is_empty())
361  {
362  if (parameter.is_empty())
363  {
364  ResetParameters();
365  }
366 
367  gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
368  }
369  else
370  {
371  gradient.zeros();
372  }
373 
374  if (this->deterministic)
375  {
376  this->deterministic = false;
377  ResetDeterministic();
378  }
379 
380  if (!inputSize)
381  {
382  inputSize = predictors.n_rows;
383  targetSize = responses.n_rows;
384  }
385  else if (targetSize == 0)
386  {
387  targetSize = responses.n_rows;
388  }
389 
390  ResetCells();
391 
392  double performance = 0;
393  size_t responseSeq = 0;
394  const size_t effectiveRho = std::min(rho, size_t(responses.size()));
395 
396  for (size_t seqNum = 0; seqNum < effectiveRho; ++seqNum)
397  {
398  // Wrap a matrix around our data to avoid a copy.
399  arma::mat stepData(predictors.slice(seqNum).colptr(begin),
400  predictors.n_rows, batchSize, false, true);
401  Forward(stepData);
402  if (!single)
403  {
404  responseSeq = seqNum;
405  }
406 
407  for (size_t l = 0; l < network.size(); ++l)
408  {
409  boost::apply_visitor(SaveOutputParameterVisitor(moduleOutputParameter),
410  network[l]);
411  }
412 
413  performance += outputLayer.Forward(boost::apply_visitor(
414  outputParameterVisitor, network.back()),
415  arma::mat(responses.slice(responseSeq).colptr(begin),
416  responses.n_rows, batchSize, false, true));
417  }
418 
419  if (outputSize == 0)
420  {
421  outputSize = boost::apply_visitor(outputParameterVisitor,
422  network.back()).n_elem / batchSize;
423  }
424 
425  // Initialize current/working gradient.
426  if (currentGradient.is_empty())
427  {
428  currentGradient = arma::zeros<arma::mat>(parameter.n_rows,
429  parameter.n_cols);
430  }
431 
432  ResetGradients(currentGradient);
433 
434  for (size_t seqNum = 0; seqNum < effectiveRho; ++seqNum)
435  {
436  currentGradient.zeros();
437  for (size_t l = 0; l < network.size(); ++l)
438  {
439  boost::apply_visitor(LoadOutputParameterVisitor(moduleOutputParameter),
440  network[network.size() - 1 - l]);
441  }
442 
443  if (single && seqNum > 0)
444  {
445  error.zeros();
446  }
447  else if (single && seqNum == 0)
448  {
449  outputLayer.Backward(boost::apply_visitor(
450  outputParameterVisitor, network.back()),
451  arma::mat(responses.slice(0).colptr(begin),
452  responses.n_rows, batchSize, false, true), error);
453  }
454  else
455  {
456  outputLayer.Backward(boost::apply_visitor(
457  outputParameterVisitor, network.back()),
458  arma::mat(responses.slice(effectiveRho - seqNum - 1).colptr(begin),
459  responses.n_rows, batchSize, false, true), error);
460  }
461 
462  Backward();
463  Gradient(
464  arma::mat(predictors.slice(effectiveRho - seqNum - 1).colptr(begin),
465  predictors.n_rows, batchSize, false, true));
466  gradient += currentGradient;
467  }
468 
469  return performance;
470 }
471 
472 template<typename OutputLayerType, typename InitializationRuleType,
473  typename... CustomLayers>
475  const arma::mat& parameters,
476  const size_t begin,
477  arma::mat& gradient,
478  const size_t batchSize)
479 {
480  this->EvaluateWithGradient(parameters, begin, gradient, batchSize);
481 }
482 
483 template<typename OutputLayerType, typename InitializationRuleType,
484  typename... CustomLayers>
486 {
487  arma::cube newPredictors, newResponses;
488  math::ShuffleData(predictors, responses, newPredictors, newResponses);
489 
490  predictors = std::move(newPredictors);
491  responses = std::move(newResponses);
492 }
493 
494 template<typename OutputLayerType, typename InitializationRuleType,
495  typename... CustomLayers>
496 void RNN<OutputLayerType, InitializationRuleType,
497  CustomLayers...>::ResetParameters()
498 {
499  ResetDeterministic();
500 
501  // Reset the network parameter with the given initialization rule.
502  NetworkInitialization<InitializationRuleType,
503  CustomLayers...> networkInit(initializeRule);
504  networkInit.Initialize(network, parameter);
505 
506  reset = true;
507 }
508 
509 template<typename OutputLayerType, typename InitializationRuleType,
510  typename... CustomLayers>
512 {
513  ResetParameters();
514  ResetCells();
515  currentGradient.zeros();
516  ResetGradients(currentGradient);
517 }
518 
519 template<typename OutputLayerType, typename InitializationRuleType,
520  typename... CustomLayers>
521 void RNN<OutputLayerType, InitializationRuleType,
522  CustomLayers...>::ResetDeterministic()
523 {
524  DeterministicSetVisitor deterministicSetVisitor(deterministic);
525  std::for_each(network.begin(), network.end(),
526  boost::apply_visitor(deterministicSetVisitor));
527 }
528 
529 template<typename OutputLayerType, typename InitializationRuleType,
530  typename... CustomLayers>
531 void RNN<OutputLayerType, InitializationRuleType,
532  CustomLayers...>::ResetGradients(
533  arma::mat& gradient)
534 {
535  size_t offset = 0;
536  for (LayerTypes<CustomLayers...>& layer : network)
537  {
538  offset += boost::apply_visitor(GradientSetVisitor(gradient, offset), layer);
539  }
540 }
541 
542 template<typename OutputLayerType, typename InitializationRuleType,
543  typename... CustomLayers>
544 template<typename InputType>
545 void RNN<OutputLayerType, InitializationRuleType,
546  CustomLayers...>::Forward(const InputType& input)
547 {
548  boost::apply_visitor(ForwardVisitor(input,
549  boost::apply_visitor(outputParameterVisitor, network.front())),
550  network.front());
551 
552  for (size_t i = 1; i < network.size(); ++i)
553  {
554  boost::apply_visitor(ForwardVisitor(
555  boost::apply_visitor(outputParameterVisitor, network[i - 1]),
556  boost::apply_visitor(outputParameterVisitor, network[i])),
557  network[i]);
558  }
559 }
560 
561 template<typename OutputLayerType, typename InitializationRuleType,
562  typename... CustomLayers>
564 {
565  boost::apply_visitor(BackwardVisitor(
566  boost::apply_visitor(outputParameterVisitor, network.back()),
567  error, boost::apply_visitor(deltaVisitor,
568  network.back())), network.back());
569 
570  for (size_t i = 2; i < network.size(); ++i)
571  {
572  boost::apply_visitor(BackwardVisitor(
573  boost::apply_visitor(outputParameterVisitor,
574  network[network.size() - i]), boost::apply_visitor(
575  deltaVisitor, network[network.size() - i + 1]),
576  boost::apply_visitor(deltaVisitor, network[network.size() - i])),
577  network[network.size() - i]);
578  }
579 }
580 
581 template<typename OutputLayerType, typename InitializationRuleType,
582  typename... CustomLayers>
583 template<typename InputType>
584 void RNN<OutputLayerType, InitializationRuleType,
585  CustomLayers...>::Gradient(const InputType& input)
586 {
587  boost::apply_visitor(GradientVisitor(input,
588  boost::apply_visitor(deltaVisitor, network[1])), network.front());
589 
590  for (size_t i = 1; i < network.size() - 1; ++i)
591  {
592  boost::apply_visitor(GradientVisitor(
593  boost::apply_visitor(outputParameterVisitor, network[i - 1]),
594  boost::apply_visitor(deltaVisitor, network[i + 1])),
595  network[i]);
596  }
597 }
598 
599 template<typename OutputLayerType, typename InitializationRuleType,
600  typename... CustomLayers>
601 template<typename Archive>
603  Archive& ar, const uint32_t /* version */)
604 {
605  ar(CEREAL_NVP(parameter));
606  ar(CEREAL_NVP(rho));
607  ar(CEREAL_NVP(single));
608  ar(CEREAL_NVP(inputSize));
609  ar(CEREAL_NVP(outputSize));
610  ar(CEREAL_NVP(targetSize));
611  ar(CEREAL_NVP(reset));
612 
613  if (cereal::is_loading<Archive>())
614  {
615  std::for_each(network.begin(), network.end(),
616  boost::apply_visitor(deleteVisitor));
617  network.clear();
618  }
619 
620  ar(CEREAL_VECTOR_VARIANT_POINTER(network));
621 
622  // If we are loading, we need to initialize the weights.
623  if (cereal::is_loading<Archive>())
624  {
625  size_t offset = 0;
626  for (LayerTypes<CustomLayers...>& layer : network)
627  {
628  offset += boost::apply_visitor(WeightSetVisitor(parameter, offset),
629  layer);
630 
631  boost::apply_visitor(resetVisitor, layer);
632  }
633 
634  deterministic = true;
635  ResetDeterministic();
636  }
637 }
638 
639 } // namespace ann
640 } // namespace mlpack
641 
642 #endif
ResetCellVisitor executes the ResetCell() function.
Definition: reset_cell_visitor.hpp:26
BackwardVisitor executes the Backward() function given the input, error and delta parameter...
Definition: backward_visitor.hpp:28
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: pointer_wrapper.hpp:23
GradientSetVisitor update the gradient parameter given the gradient set.
Definition: gradient_set_visitor.hpp:26
SaveOutputParameterVisitor saves the output parameter into the given parameter set.
Definition: save_output_parameter_visitor.hpp:27
DeterministicSetVisitor set the deterministic parameter given the deterministic value.
Definition: deterministic_set_visitor.hpp:28
Implementation of a standard recurrent neural network container.
Definition: rnn.hpp:45
RNN(const size_t rho, const bool single=false, OutputLayerType outputLayer=OutputLayerType(), InitializationRuleType initializeRule=InitializationRuleType())
Create the RNN object.
Definition: rnn_impl.hpp:35
WeightSetVisitor update the module parameters given the parameters set.
Definition: weight_set_visitor.hpp:26
ForwardVisitor executes the Forward() function given the input and output parameter.
Definition: forward_visitor.hpp:28
SearchModeVisitor executes the Gradient() method of the given module using the input and delta parame...
Definition: gradient_visitor.hpp:28
void ShuffleData(const MatType &inputPoints, const LabelsType &inputLabels, MatType &outputPoints, LabelsType &outputLabels, const std::enable_if_t<!arma::is_SpMat< MatType >::value > *=0, const std::enable_if_t<!arma::is_Cube< MatType >::value > *=0)
Shuffle a dataset and associated labels (or responses).
Definition: shuffle_data.hpp:28
LoadOutputParameterVisitor restores the output parameter using the given parameter set...
Definition: load_output_parameter_visitor.hpp:28
This class is used to initialize the network with the given initialization rule.
Definition: network_init.hpp:33
#define CEREAL_VECTOR_VARIANT_POINTER(T)
Cereal does not support the serialization of raw pointer.
Definition: pointer_vector_variant_wrapper.hpp:92