Aakash-kaushik/mlpack/q__learning_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_Q_LEARNING_HPP
 #define MLPACK_METHODS_RL_Q_LEARNING_HPP

 #include <mlpack/prereqs.hpp>
 #include <ensmallen.hpp>

 #include "replay/random_replay.hpp"
 #include "replay/prioritized_replay.hpp"
 #include "training_config.hpp"

 namespace mlpack {
 namespace rl {

 template <
   typename EnvironmentType,
   typename NetworkType,
   typename UpdaterType,
   typename PolicyType,
   typename ReplayType = RandomReplay<EnvironmentType>
 >
 class QLearning
 {
  public:
   using StateType = typename EnvironmentType::State;

   using ActionType = typename EnvironmentType::Action;

   QLearning(TrainingConfig& config,
             NetworkType& network,
             PolicyType& policy,
             ReplayType& replayMethod,
             UpdaterType updater = UpdaterType(),
             EnvironmentType environment = EnvironmentType());

   ~QLearning();

   void TrainAgent();

   void TrainCategoricalAgent();

   void SelectAction();

   double Episode();

   size_t& TotalSteps() { return totalSteps; }
   const size_t& TotalSteps() const { return totalSteps; }

   StateType& State() { return state; }
   const StateType& State() const { return state; }

   const ActionType& Action() const { return action; }

   EnvironmentType& Environment() { return environment; }
   const EnvironmentType& Environment() const { return environment; }

   bool& Deterministic() { return deterministic; }
   const bool& Deterministic() const { return deterministic; }

   const NetworkType& Network() const { return learningNetwork; }
   NetworkType& Network() { return learningNetwork; }

  private:
   arma::Col<size_t> BestAction(const arma::mat& actionValues);

   TrainingConfig& config;

   NetworkType& learningNetwork;

   NetworkType targetNetwork;

   PolicyType& policy;

   ReplayType& replayMethod;

   UpdaterType updater;
   #if ENS_VERSION_MAJOR >= 2
   typename UpdaterType::template Policy<arma::mat, arma::mat>* updatePolicy;
   #endif

   EnvironmentType environment;

   size_t totalSteps;

   StateType state;

   ActionType action;

   bool deterministic;
 };

 } // namespace rl
 } // namespace mlpack

 // Include implementation
 #include "q_learning_impl.hpp"
 #endif
mlpack::rl::QLearning::Environment
EnvironmentType & Environment()
Modify the environment in which the agent is.
Definition: q_learning.hpp:128

mlpack::rl::QLearning::StateType
typename EnvironmentType::State StateType
Convenient typedef for state.
Definition: q_learning.hpp:63

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1

random_replay.hpp

mlpack::rl::QLearning::Action
const ActionType & Action() const
Get the action of the agent.
Definition: q_learning.hpp:125

mlpack::rl::QLearning::QLearning
QLearning(TrainingConfig &config, NetworkType &network, PolicyType &policy, ReplayType &replayMethod, UpdaterType updater=UpdaterType(), EnvironmentType environment=EnvironmentType())
Create the QLearning object with given settings.
Definition: q_learning_impl.hpp:33

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::QLearning::~QLearning
~QLearning()
Clean memory.
Definition: q_learning_impl.hpp:87

mlpack::rl::QLearning::SelectAction
void SelectAction()
Select an action, given an agent.
Definition: q_learning_impl.hpp:331

mlpack::rl::QLearning::Deterministic
const bool & Deterministic() const
Get the indicator of training mode / test mode.
Definition: q_learning.hpp:135

mlpack::rl::QLearning::TotalSteps
const size_t & TotalSteps() const
Get total steps from beginning.
Definition: q_learning.hpp:117

mlpack::rl::QLearning::State
const StateType & State() const
Get the state of the agent.
Definition: q_learning.hpp:122

q_learning_impl.hpp

mlpack::rl::QLearning::Episode
double Episode()
Execute an episode.
Definition: q_learning_impl.hpp:354

mlpack::rl::QLearning::TrainAgent
void TrainAgent()
Trains the DQN agent(non-categorical).
Definition: q_learning_impl.hpp:133

mlpack::rl::QLearning::Deterministic
bool & Deterministic()
Modify the training mode / test mode indicator.
Definition: q_learning.hpp:133

mlpack::rl::QLearning::Network
const NetworkType & Network() const
Return the learning network.
Definition: q_learning.hpp:138

mlpack::rl::QLearning::TotalSteps
size_t & TotalSteps()
Modify total steps from beginning.
Definition: q_learning.hpp:115

mlpack::rl::TrainingConfig
Definition: training_config.hpp:19

prioritized_replay.hpp

mlpack::rl::QLearning::Environment
const EnvironmentType & Environment() const
Get the environment in which the agent is.
Definition: q_learning.hpp:130

mlpack::rl::QLearning
Implementation of various Q-Learning algorithms, such as DQN, double DQN.
Definition: q_learning.hpp:59

training_config.hpp

mlpack::rl::QLearning::Network
NetworkType & Network()
Modify the learning network.
Definition: q_learning.hpp:140

mlpack::rl::QLearning::ActionType
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Definition: q_learning.hpp:66

mlpack::rl::QLearning::State
StateType & State()
Modify the state of the agent.
Definition: q_learning.hpp:120

mlpack::rl::QLearning::TrainCategoricalAgent
void TrainCategoricalAgent()
Trains the DQN agent of categorical type.
Definition: q_learning_impl.hpp:220