12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 29 template <
typename EnvironmentType>
34 using State =
typename EnvironmentType::State;
37 using Action =
typename EnvironmentType::Action;
48 const double minReward = -1.0,
49 const double maxReward = 1.0) :
50 environment(environment),
64 return environment.InitialSample();
76 return environment.IsTerminal(state);
93 double unclippedReward = environment.Sample(state, action, nextState);
109 return Sample(state, action, nextState);
129 EnvironmentType environment;
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
Definition: reward_clipping.hpp:62
double MinReward() const
Get the minimum reward value.
Definition: reward_clipping.hpp:118
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
Definition: reward_clipping.hpp:74
EnvironmentType & Environment() const
Get the environment.
Definition: reward_clipping.hpp:113
typename EnvironmentType::State State
Convenient typedef for state.
Definition: reward_clipping.hpp:34
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
Definition: reward_clipping.hpp:88
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & MaxReward()
Modify the maximum reward value.
Definition: reward_clipping.hpp:125
EnvironmentType & Environment()
Modify the environment.
Definition: reward_clipping.hpp:115
Miscellaneous math clamping routines.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
Definition: reward_clipping.hpp:47
double Sample(const State &state, const Action &action)
Dynamics of Environment.
Definition: reward_clipping.hpp:106
typename EnvironmentType::Action Action
Convenient typedef for action.
Definition: reward_clipping.hpp:37
double & MinReward()
Modify the minimum reward value.
Definition: reward_clipping.hpp:120
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
Definition: reward_clipping.hpp:30
double MaxReward() const
Get the maximum reward value.
Definition: reward_clipping.hpp:123
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53