mlpack
reward_clipping.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
14 
15 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace rl {
20 
29 template <typename EnvironmentType>
31 {
32  public:
34  using State = typename EnvironmentType::State;
35 
37  using Action = typename EnvironmentType::Action;
38 
47  RewardClipping(EnvironmentType& environment,
48  const double minReward = -1.0,
49  const double maxReward = 1.0) :
50  environment(environment),
51  minReward(minReward),
52  maxReward(maxReward)
53  {
54  // Nothing to do here
55  }
56 
63  {
64  return environment.InitialSample();
65  }
66 
74  bool IsTerminal(const State& state) const
75  {
76  return environment.IsTerminal(state);
77  }
78 
88  double Sample(const State& state,
89  const Action& action,
90  State& nextState)
91  {
92  // Get original unclipped reward from base environment.
93  double unclippedReward = environment.Sample(state, action, nextState);
94  // Clip rewards according to the min and max limit and return.
95  return math::ClampRange(unclippedReward, minReward, maxReward);
96  }
97 
106  double Sample(const State& state, const Action& action)
107  {
108  State nextState;
109  return Sample(state, action, nextState);
110  }
111 
113  EnvironmentType& Environment() const { return environment; }
115  EnvironmentType& Environment() { return environment; }
116 
118  double MinReward() const { return minReward; }
120  double& MinReward() { return minReward; }
121 
123  double MaxReward() const { return maxReward; }
125  double& MaxReward() { return maxReward; }
126 
127  private:
129  EnvironmentType environment;
130 
132  double minReward;
133 
135  double maxReward;
136 };
137 
138 } // namespace rl
139 } // namespace mlpack
140 
141 #endif
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
Definition: reward_clipping.hpp:62
double MinReward() const
Get the minimum reward value.
Definition: reward_clipping.hpp:118
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
Definition: reward_clipping.hpp:74
EnvironmentType & Environment() const
Get the environment.
Definition: reward_clipping.hpp:113
typename EnvironmentType::State State
Convenient typedef for state.
Definition: reward_clipping.hpp:34
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
Definition: reward_clipping.hpp:88
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & MaxReward()
Modify the maximum reward value.
Definition: reward_clipping.hpp:125
EnvironmentType & Environment()
Modify the environment.
Definition: reward_clipping.hpp:115
Miscellaneous math clamping routines.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
Definition: reward_clipping.hpp:47
double Sample(const State &state, const Action &action)
Dynamics of Environment.
Definition: reward_clipping.hpp:106
typename EnvironmentType::Action Action
Convenient typedef for action.
Definition: reward_clipping.hpp:37
double & MinReward()
Modify the minimum reward value.
Definition: reward_clipping.hpp:120
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
Definition: reward_clipping.hpp:30
double MaxReward() const
Get the maximum reward value.
Definition: reward_clipping.hpp:123
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53