13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 98 Action::actions action;
101 static const size_t size = 3;
123 const double gravity = 9.81,
124 const double linkLength1 = 1.0,
125 const double linkLength2 = 1.0,
126 const double linkMass1 = 1.0,
127 const double linkMass2 = 1.0,
128 const double linkCom1 = 0.5,
129 const double linkCom2 = 0.5,
130 const double linkMoi = 1.0,
131 const double maxVel1 = 4 * M_PI,
132 const double maxVel2 = 9 * M_PI,
133 const double dt = 0.2,
134 const double doneReward = 0) :
137 linkLength1(linkLength1),
138 linkLength2(linkLength2),
139 linkMass1(linkMass1),
140 linkMass2(linkMass2),
147 doneReward(doneReward),
168 arma::colvec currentState = {state.
Theta1(), state.
Theta2(),
171 arma::colvec currentNextState =
Rk4(currentState,
Torque(action));
173 nextState.
Theta1() =
Wrap(currentNextState[0], -M_PI, M_PI);
175 nextState.
Theta2() =
Wrap(currentNextState[1], -M_PI, M_PI);
187 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
207 return Sample(state, action, nextState);
216 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
227 if (maxSteps != 0 && stepsPerformed >= maxSteps)
229 Log::Info <<
"Episode terminated due to the maximum number of steps" 233 else if (-std::cos(state.
Theta1()) - std::cos(state.
Theta1() +
236 Log::Info <<
"Episode terminated due to agent succeeding.";
249 arma::colvec
Dsdt(arma::colvec state,
const double torque)
const 251 const double m1 = linkMass1;
252 const double m2 = linkMass2;
253 const double l1 = linkLength1;
254 const double lc1 = linkCom1;
255 const double lc2 = linkCom2;
256 const double I1 = linkMoi;
257 const double I2 = linkMoi;
258 const double g = gravity;
259 const double a = torque;
260 const double theta1 = state[0];
261 const double theta2 = state[1];
263 arma::colvec values(4);
264 values[0] = state[2];
265 values[1] = state[3];
267 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
268 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
270 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
273 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 - M_PI / 2.);
275 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
276 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
277 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
278 std::cos(theta1 - M_PI / 2) + phi2;
280 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
281 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
282 std::pow(d2, 2) / d1);
284 values[2] = -(d2 * values[3] + phi1) / d1;
299 const double minimum,
300 const double maximum)
const 302 const double diff = maximum - minimum;
306 value = value - diff;
308 else if (value < minimum)
310 value = value + diff;
335 arma::colvec
Rk4(
const arma::colvec state,
const double torque)
const 337 arma::colvec k1 =
Dsdt(state, torque);
338 arma::colvec k2 =
Dsdt(state + dt * k1 / 2, torque);
339 arma::colvec k3 =
Dsdt(state + dt * k2 / 2, torque);
340 arma::colvec k4 =
Dsdt(state + dt * k3, torque);
341 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
395 size_t stepsPerformed;
size_t StepsPerformed() const
Get the number of steps performed.
Definition: acrobot.hpp:347
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:160
Acrobot(const size_t maxSteps=500, const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.
Definition: acrobot.hpp:122
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
State InitialSample()
This function does random initialization of state space.
Definition: acrobot.hpp:213
arma::colvec & Data()
Modify the state representation.
Definition: acrobot.hpp:52
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
Definition: acrobot.hpp:298
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Definition: acrobot.hpp:335
double & Theta2()
Modify value of theta (two).
Definition: acrobot.hpp:62
double Theta2() const
Get value of theta (two).
Definition: acrobot.hpp:60
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: acrobot.hpp:75
double & AngularVelocity2()
Modify the angular velocity (two).
Definition: acrobot.hpp:72
double Theta1() const
Get value of theta (one).
Definition: acrobot.hpp:55
Definition: acrobot.hpp:88
double & AngularVelocity1()
Modify the angular velocity (one).
Definition: acrobot.hpp:67
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: acrobot.hpp:352
static constexpr size_t dimension
Dimension of the encoded state.
Definition: acrobot.hpp:78
double Torque(const Action &action) const
This function calculates the torque for a particular action.
Definition: acrobot.hpp:322
Implementation of Acrobot game.
Definition: acrobot.hpp:28
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double AngularVelocity2() const
Get value of Angular velocity (two).
Definition: acrobot.hpp:70
double AngularVelocity1() const
Get value of Angular velocity (one).
Definition: acrobot.hpp:65
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: acrobot.hpp:350
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:204
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
double & Theta1()
Modify value of theta (one).
Definition: acrobot.hpp:57
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
Definition: acrobot.hpp:249
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Definition: acrobot.hpp:225
State()
Construct a state instance.
Definition: acrobot.hpp:41
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
Definition: acrobot.hpp:35
State(const arma::colvec &data)
Construct a state instance from given data.
Definition: acrobot.hpp:48