12 #ifndef MLPACK_METHODS_ANN_RNN_IMPL_HPP 13 #define MLPACK_METHODS_ANN_RNN_IMPL_HPP 33 template<
typename OutputLayerType,
typename InitializationRuleType,
34 typename... CustomLayers>
38 OutputLayerType outputLayer,
39 InitializationRuleType initializeRule) :
41 outputLayer(
std::move(outputLayer)),
42 initializeRule(
std::move(initializeRule)),
54 template<
typename OutputLayerType,
typename InitializationRuleType,
55 typename... CustomLayers>
59 outputLayer(network.outputLayer),
60 initializeRule(network.initializeRule),
61 inputSize(network.inputSize),
62 outputSize(network.outputSize),
63 targetSize(network.targetSize),
65 single(network.single),
66 parameter(network.parameter),
67 numFunctions(network.numFunctions),
68 deterministic(network.deterministic)
70 for (
size_t i = 0; i < network.network.size(); ++i)
72 this->network.push_back(boost::apply_visitor(copyVisitor,
74 boost::apply_visitor(resetVisitor, this->network.back());
78 template<
typename OutputLayerType,
typename InitializationRuleType,
79 typename... CustomLayers>
82 rho(
std::move(network.rho)),
83 outputLayer(
std::move(network.outputLayer)),
84 initializeRule(
std::move(network.initializeRule)),
85 inputSize(
std::move(network.inputSize)),
86 outputSize(
std::move(network.outputSize)),
87 targetSize(
std::move(network.targetSize)),
88 reset(
std::move(network.reset)),
89 single(
std::move(network.single)),
90 network(
std::move(network.network)),
91 parameter(
std::move(network.parameter)),
92 numFunctions(
std::move(network.numFunctions)),
93 deterministic(
std::move(network.deterministic))
98 template<
typename OutputLayerType,
typename InitializationRuleType,
99 typename... CustomLayers>
102 for (LayerTypes<CustomLayers...>& layer : network)
104 boost::apply_visitor(deleteVisitor, layer);
108 template<
typename OutputLayerType,
typename InitializationRuleType,
109 typename... CustomLayers>
110 template<
typename OptimizerType>
111 typename std::enable_if<
112 HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
117 if (optimizer.MaxIterations() < samples &&
118 optimizer.MaxIterations() != 0)
120 Log::Warn <<
"The optimizer's maximum number of iterations " 121 <<
"is less than the size of the dataset; the " 122 <<
"optimizer will not pass over the entire " 123 <<
"dataset. To fix this, modify the maximum " 124 <<
"number of iterations to be at least equal " 125 <<
"to the number of points of your dataset " 126 <<
"(" << samples <<
")." << std::endl;
130 template<
typename OutputLayerType,
typename InitializationRuleType,
131 typename... CustomLayers>
132 template<
typename OptimizerType>
133 typename std::enable_if<
134 !HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
143 template<
typename OutputLayerType,
typename InitializationRuleType,
144 typename... CustomLayers>
145 template<
typename OptimizerType,
typename... CallbackTypes>
147 arma::cube predictors,
148 arma::cube responses,
149 OptimizerType& optimizer,
150 CallbackTypes&&... callbacks)
152 CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
153 network, predictors.n_rows,
"RNN<>::Train()");
155 numFunctions = responses.n_cols;
157 this->predictors = std::move(predictors);
158 this->responses = std::move(responses);
160 this->deterministic =
true;
161 ResetDeterministic();
168 WarnMessageMaxIterations<OptimizerType>(optimizer, this->predictors.n_cols);
171 Timer::Start(
"rnn_optimization");
172 const double out = optimizer.Optimize(*
this, parameter, callbacks...);
173 Timer::Stop(
"rnn_optimization");
175 Log::Info <<
"RNN::RNN(): final objective of trained model is " << out
180 template<
typename OutputLayerType,
typename InitializationRuleType,
181 typename... CustomLayers>
182 void RNN<OutputLayerType, InitializationRuleType,
183 CustomLayers...>::ResetCells()
185 for (
size_t i = 1; i < network.size(); ++i)
191 template<
typename OutputLayerType,
typename InitializationRuleType,
192 typename... CustomLayers>
193 template<
typename OptimizerType,
typename... CallbackTypes>
195 arma::cube predictors,
196 arma::cube responses,
197 CallbackTypes&&... callbacks)
199 CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
200 network, predictors.n_rows,
"RNN<>::Train()");
202 numFunctions = responses.n_cols;
204 this->predictors = std::move(predictors);
205 this->responses = std::move(responses);
207 this->deterministic =
true;
208 ResetDeterministic();
215 OptimizerType optimizer;
217 WarnMessageMaxIterations<OptimizerType>(optimizer, this->predictors.n_cols);
220 Timer::Start(
"rnn_optimization");
221 const double out = optimizer.Optimize(*
this, parameter, callbacks...);
222 Timer::Stop(
"rnn_optimization");
224 Log::Info <<
"RNN::RNN(): final objective of trained model is " << out
229 template<
typename OutputLayerType,
typename InitializationRuleType,
230 typename... CustomLayers>
232 arma::cube predictors, arma::cube& results,
const size_t batchSize)
234 CheckInputShape<std::vector<LayerTypes<CustomLayers...> > >(
235 network, predictors.n_rows,
"RNN<>::Predict()");
239 if (parameter.is_empty())
246 deterministic =
true;
247 ResetDeterministic();
250 const size_t effectiveBatchSize = std::min(batchSize,
251 size_t(predictors.n_cols));
253 Forward(arma::mat(predictors.slice(0).colptr(0), predictors.n_rows,
254 effectiveBatchSize,
false,
true));
255 arma::mat resultsTemp = boost::apply_visitor(outputParameterVisitor,
258 outputSize = resultsTemp.n_rows;
259 results = arma::zeros<arma::cube>(outputSize, predictors.n_cols, rho);
260 results.slice(0).submat(0, 0, results.n_rows - 1,
261 effectiveBatchSize - 1) = resultsTemp;
264 for (
size_t begin = 0; begin < predictors.n_cols; begin += batchSize)
266 const size_t effectiveBatchSize = std::min(batchSize,
267 size_t(predictors.n_cols - begin));
268 for (
size_t seqNum = !begin; seqNum < rho; ++seqNum)
270 Forward(arma::mat(predictors.slice(seqNum).colptr(begin),
271 predictors.n_rows, effectiveBatchSize,
false,
true));
273 results.slice(seqNum).submat(0, begin, results.n_rows - 1, begin +
274 effectiveBatchSize - 1) = boost::apply_visitor(outputParameterVisitor,
280 template<
typename OutputLayerType,
typename InitializationRuleType,
281 typename... CustomLayers>
285 const size_t batchSize,
286 const bool deterministic)
288 if (parameter.is_empty())
293 if (deterministic != this->deterministic)
295 this->deterministic = deterministic;
296 ResetDeterministic();
301 inputSize = predictors.n_rows;
302 targetSize = responses.n_rows;
304 else if (targetSize == 0)
306 targetSize = responses.n_rows;
311 double performance = 0;
312 size_t responseSeq = 0;
314 for (
size_t seqNum = 0; seqNum < rho; ++seqNum)
317 arma::mat stepData(predictors.slice(seqNum).colptr(begin),
318 predictors.n_rows, batchSize,
false,
true);
322 responseSeq = seqNum;
325 performance += outputLayer.Forward(boost::apply_visitor(
326 outputParameterVisitor, network.back()),
327 arma::mat(responses.slice(responseSeq).colptr(begin),
328 responses.n_rows, batchSize,
false,
true));
333 outputSize = boost::apply_visitor(outputParameterVisitor,
334 network.back()).n_elem / batchSize;
340 template<
typename OutputLayerType,
typename InitializationRuleType,
341 typename... CustomLayers>
343 const arma::mat& parameters,
345 const size_t batchSize)
347 return Evaluate(parameters, begin, batchSize,
true);
350 template<
typename OutputLayerType,
typename InitializationRuleType,
351 typename... CustomLayers>
352 template<
typename GradType>
357 const size_t batchSize)
360 if (gradient.is_empty())
362 if (parameter.is_empty())
367 gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
374 if (this->deterministic)
376 this->deterministic =
false;
377 ResetDeterministic();
382 inputSize = predictors.n_rows;
383 targetSize = responses.n_rows;
385 else if (targetSize == 0)
387 targetSize = responses.n_rows;
392 double performance = 0;
393 size_t responseSeq = 0;
394 const size_t effectiveRho = std::min(rho,
size_t(responses.size()));
396 for (
size_t seqNum = 0; seqNum < effectiveRho; ++seqNum)
399 arma::mat stepData(predictors.slice(seqNum).colptr(begin),
400 predictors.n_rows, batchSize,
false,
true);
404 responseSeq = seqNum;
407 for (
size_t l = 0; l < network.size(); ++l)
413 performance += outputLayer.Forward(boost::apply_visitor(
414 outputParameterVisitor, network.back()),
415 arma::mat(responses.slice(responseSeq).colptr(begin),
416 responses.n_rows, batchSize,
false,
true));
421 outputSize = boost::apply_visitor(outputParameterVisitor,
422 network.back()).n_elem / batchSize;
426 if (currentGradient.is_empty())
428 currentGradient = arma::zeros<arma::mat>(parameter.n_rows,
432 ResetGradients(currentGradient);
434 for (
size_t seqNum = 0; seqNum < effectiveRho; ++seqNum)
436 currentGradient.zeros();
437 for (
size_t l = 0; l < network.size(); ++l)
440 network[network.size() - 1 - l]);
443 if (single && seqNum > 0)
447 else if (single && seqNum == 0)
449 outputLayer.Backward(boost::apply_visitor(
450 outputParameterVisitor, network.back()),
451 arma::mat(responses.slice(0).colptr(begin),
452 responses.n_rows, batchSize,
false,
true), error);
456 outputLayer.Backward(boost::apply_visitor(
457 outputParameterVisitor, network.back()),
458 arma::mat(responses.slice(effectiveRho - seqNum - 1).colptr(begin),
459 responses.n_rows, batchSize,
false,
true), error);
464 arma::mat(predictors.slice(effectiveRho - seqNum - 1).colptr(begin),
465 predictors.n_rows, batchSize,
false,
true));
466 gradient += currentGradient;
472 template<
typename OutputLayerType,
typename InitializationRuleType,
473 typename... CustomLayers>
475 const arma::mat& parameters,
478 const size_t batchSize)
480 this->EvaluateWithGradient(parameters, begin, gradient, batchSize);
483 template<
typename OutputLayerType,
typename InitializationRuleType,
484 typename... CustomLayers>
487 arma::cube newPredictors, newResponses;
490 predictors = std::move(newPredictors);
491 responses = std::move(newResponses);
494 template<
typename OutputLayerType,
typename InitializationRuleType,
495 typename... CustomLayers>
496 void RNN<OutputLayerType, InitializationRuleType,
497 CustomLayers...>::ResetParameters()
499 ResetDeterministic();
503 CustomLayers...> networkInit(initializeRule);
504 networkInit.Initialize(network, parameter);
509 template<
typename OutputLayerType,
typename InitializationRuleType,
510 typename... CustomLayers>
515 currentGradient.zeros();
516 ResetGradients(currentGradient);
519 template<
typename OutputLayerType,
typename InitializationRuleType,
520 typename... CustomLayers>
521 void RNN<OutputLayerType, InitializationRuleType,
522 CustomLayers...>::ResetDeterministic()
525 std::for_each(network.begin(), network.end(),
526 boost::apply_visitor(deterministicSetVisitor));
529 template<
typename OutputLayerType,
typename InitializationRuleType,
530 typename... CustomLayers>
531 void RNN<OutputLayerType, InitializationRuleType,
532 CustomLayers...>::ResetGradients(
536 for (LayerTypes<CustomLayers...>& layer : network)
542 template<
typename OutputLayerType,
typename InitializationRuleType,
543 typename... CustomLayers>
544 template<
typename InputType>
545 void RNN<OutputLayerType, InitializationRuleType,
546 CustomLayers...>::Forward(
const InputType& input)
549 boost::apply_visitor(outputParameterVisitor, network.front())),
552 for (
size_t i = 1; i < network.size(); ++i)
555 boost::apply_visitor(outputParameterVisitor, network[i - 1]),
556 boost::apply_visitor(outputParameterVisitor, network[i])),
561 template<
typename OutputLayerType,
typename InitializationRuleType,
562 typename... CustomLayers>
566 boost::apply_visitor(outputParameterVisitor, network.back()),
567 error, boost::apply_visitor(deltaVisitor,
568 network.back())), network.back());
570 for (
size_t i = 2; i < network.size(); ++i)
573 boost::apply_visitor(outputParameterVisitor,
574 network[network.size() - i]), boost::apply_visitor(
575 deltaVisitor, network[network.size() - i + 1]),
576 boost::apply_visitor(deltaVisitor, network[network.size() - i])),
577 network[network.size() - i]);
581 template<
typename OutputLayerType,
typename InitializationRuleType,
582 typename... CustomLayers>
583 template<
typename InputType>
584 void RNN<OutputLayerType, InitializationRuleType,
585 CustomLayers...>::Gradient(
const InputType& input)
588 boost::apply_visitor(deltaVisitor, network[1])), network.front());
590 for (
size_t i = 1; i < network.size() - 1; ++i)
593 boost::apply_visitor(outputParameterVisitor, network[i - 1]),
594 boost::apply_visitor(deltaVisitor, network[i + 1])),
599 template<
typename OutputLayerType,
typename InitializationRuleType,
600 typename... CustomLayers>
601 template<
typename Archive>
603 Archive& ar,
const uint32_t )
605 ar(CEREAL_NVP(parameter));
607 ar(CEREAL_NVP(single));
608 ar(CEREAL_NVP(inputSize));
609 ar(CEREAL_NVP(outputSize));
610 ar(CEREAL_NVP(targetSize));
611 ar(CEREAL_NVP(reset));
613 if (cereal::is_loading<Archive>())
615 std::for_each(network.begin(), network.end(),
616 boost::apply_visitor(deleteVisitor));
623 if (cereal::is_loading<Archive>())
626 for (LayerTypes<CustomLayers...>& layer : network)
631 boost::apply_visitor(resetVisitor, layer);
634 deterministic =
true;
635 ResetDeterministic();
ResetCellVisitor executes the ResetCell() function.
Definition: reset_cell_visitor.hpp:26
BackwardVisitor executes the Backward() function given the input, error and delta parameter...
Definition: backward_visitor.hpp:28
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: pointer_wrapper.hpp:23
GradientSetVisitor update the gradient parameter given the gradient set.
Definition: gradient_set_visitor.hpp:26
SaveOutputParameterVisitor saves the output parameter into the given parameter set.
Definition: save_output_parameter_visitor.hpp:27
DeterministicSetVisitor set the deterministic parameter given the deterministic value.
Definition: deterministic_set_visitor.hpp:28
Implementation of a standard recurrent neural network container.
Definition: rnn.hpp:45
RNN(const size_t rho, const bool single=false, OutputLayerType outputLayer=OutputLayerType(), InitializationRuleType initializeRule=InitializationRuleType())
Create the RNN object.
Definition: rnn_impl.hpp:35
WeightSetVisitor update the module parameters given the parameters set.
Definition: weight_set_visitor.hpp:26
ForwardVisitor executes the Forward() function given the input and output parameter.
Definition: forward_visitor.hpp:28
SearchModeVisitor executes the Gradient() method of the given module using the input and delta parame...
Definition: gradient_visitor.hpp:28
void ShuffleData(const MatType &inputPoints, const LabelsType &inputLabels, MatType &outputPoints, LabelsType &outputLabels, const std::enable_if_t<!arma::is_SpMat< MatType >::value > *=0, const std::enable_if_t<!arma::is_Cube< MatType >::value > *=0)
Shuffle a dataset and associated labels (or responses).
Definition: shuffle_data.hpp:28
LoadOutputParameterVisitor restores the output parameter using the given parameter set...
Definition: load_output_parameter_visitor.hpp:28
This class is used to initialize the network with the given initialization rule.
Definition: network_init.hpp:33
#define CEREAL_VECTOR_VARIANT_POINTER(T)
Cereal does not support the serialization of raw pointer.
Definition: pointer_vector_variant_wrapper.hpp:92