Aakash-kaushik/mlpack/async__learning__impl_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_ASYNC_LEARNING_IMPL_HPP
 #define MLPACK_METHODS_RL_ASYNC_LEARNING_IMPL_HPP

 #include <mlpack/prereqs.hpp>
 #include "queue"

 namespace mlpack {
 namespace rl {

 template <
   typename WorkerType,
   typename EnvironmentType,
   typename NetworkType,
   typename UpdaterType,
   typename PolicyType
 >
 AsyncLearning<
   WorkerType,
   EnvironmentType,
   NetworkType,
   UpdaterType,
   PolicyType
 >::AsyncLearning(
     TrainingConfig config,
     NetworkType network,
     PolicyType policy,
     UpdaterType updater,
     EnvironmentType environment):
     config(std::move(config)),
     learningNetwork(std::move(network)),
     policy(std::move(policy)),
     updater(std::move(updater)),
     environment(std::move(environment))
 { /* Nothing to do here. */ };

 template <
   typename WorkerType,
   typename EnvironmentType,
   typename NetworkType,
   typename UpdaterType,
   typename PolicyType
 >
 template <typename Measure>
 void AsyncLearning<
   WorkerType,
   EnvironmentType,
   NetworkType,
   UpdaterType,
   PolicyType
 >::Train(Measure& measure)
 {
   NetworkType learningNetwork = std::move(this->learningNetwork);
   if (learningNetwork.Parameters().is_empty())
     learningNetwork.ResetParameters();
   NetworkType targetNetwork = learningNetwork;
   size_t totalSteps = 0;
   PolicyType policy = this->policy;
   bool stop = false;

   // Set up worker pool, worker 0 will be deterministic for evaluation.
   std::vector<WorkerType> workers;
   for (size_t i = 0; i <= config.NumWorkers(); ++i)
   {
     workers.push_back(WorkerType(updater, environment, config, !i));
     workers.back().Initialize(learningNetwork);
   }
   // Set up task queue corresponding to worker pool.
   std::queue<size_t> tasks;
   for (size_t i = 0; i <= config.NumWorkers(); ++i)
     tasks.push(i);

   size_t numThreads = 0;
   #pragma omp parallel reduction(+:numThreads)
   numThreads++;
   Log::Debug << numThreads << " threads will be used in total." << std::endl;

   #pragma omp parallel for shared(stop, workers, tasks, learningNetwork, \
       targetNetwork, totalSteps, policy)
   for (omp_size_t i = 0; i < numThreads; ++i)
   {
     #pragma omp critical
     {
       #ifdef HAS_OPENMP
         Log::Debug << "Thread " << omp_get_thread_num() <<
             " started." << std::endl;
       #endif
     }
     size_t task = std::numeric_limits<size_t>::max();
     while (!stop)
     {
       // Assign task to current thread from queue.
       #pragma omp critical
       {
         if (task != std::numeric_limits<size_t>::max())
           tasks.push(task);

         if (!tasks.empty())
         {
           task = tasks.front();
           tasks.pop();
         }
       };

       // This may happen when threads are more than workers.
       if (task == std::numeric_limits<size_t>::max())
         continue;

       // Get corresponding worker.
       WorkerType& worker = workers[task];
       double episodeReturn;
       if (worker.Step(learningNetwork, targetNetwork, totalSteps,
           policy, episodeReturn) && !task)
       {
         stop = measure(episodeReturn);
       }
     }
   }

   // Write back the learning network.
   this->learningNetwork = std::move(learningNetwork);
 };

 } // namespace rl
 } // namespace mlpack

 #endif

mlpack::Log::Debug
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1

mlpack::rl::TrainingConfig::NumWorkers
size_t NumWorkers() const
Get the amount of workers.
Definition: training_config.hpp:74

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

std
Definition: pointer_wrapper.hpp:23

mlpack::rl::TrainingConfig
Definition: training_config.hpp:19

mlpack::rl::AsyncLearning::Train
void Train(Measure &measure)
Starting async training.
Definition: async_learning_impl.hpp:62

mlpack::rl::AsyncLearning
Wrapper of various asynchronous learning algorithms, e.g.
Definition: async_learning.hpp:57