13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 48 State(
const arma::colvec& data) : data(data)
52 arma::colvec&
Data() {
return data; }
55 double Theta1()
const {
return data[0]; }
60 double Theta2()
const {
return data[1]; }
75 const arma::colvec&
Encode()
const {
return data; }
117 const double linkLength1 = 1.0,
118 const double linkLength2 = 1.0,
119 const double linkMass1 = 1.0,
120 const double linkMass2 = 1.0,
121 const double linkCom1 = 0.5,
122 const double linkCom2 = 0.5,
123 const double linkMoi = 1.0,
124 const double maxVel1 = 4 *
M_PI,
125 const double maxVel2 = 9 *
M_PI,
126 const double dt = 0.2,
127 const double doneReward = 0,
128 const size_t maxSteps = 0) :
130 linkLength1(linkLength1),
131 linkLength2(linkLength2),
132 linkMass1(linkMass1),
133 linkMass2(linkMass2),
140 doneReward(doneReward),
162 arma::colvec currentState = {state.
Theta1(), state.
Theta2(),
165 arma::colvec currentNextState =
Rk4(currentState,
Torque(action));
181 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
201 return Sample(state, action, nextState);
210 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
221 if (maxSteps != 0 && stepsPerformed >= maxSteps)
223 Log::Info <<
"Episode terminated due to the maximum number of steps" 227 else if (-std::cos(state.
Theta1()) - std::cos(state.
Theta1() +
230 Log::Info <<
"Episode terminated due to agent succeeding.";
243 arma::colvec
Dsdt(arma::colvec state,
const double torque)
const 245 const double m1 = linkMass1;
246 const double m2 = linkMass2;
247 const double l1 = linkLength1;
248 const double lc1 = linkCom1;
249 const double lc2 = linkCom2;
250 const double I1 = linkMoi;
251 const double I2 = linkMoi;
252 const double g = gravity;
253 const double a = torque;
254 const double theta1 = state[0];
255 const double theta2 = state[1];
257 arma::colvec values(4);
258 values[0] = state[2];
259 values[1] = state[3];
261 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
262 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
264 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
267 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 -
M_PI / 2.);
269 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
270 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
271 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
272 std::cos(theta1 -
M_PI / 2) + phi2;
274 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
275 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
276 std::pow(d2, 2) / d1);
278 values[2] = -(d2 * values[3] + phi1) / d1;
293 const double minimum,
294 const double maximum)
const 296 const double diff = maximum - minimum;
300 value = value - diff;
302 else if (value < minimum)
304 value = value + diff;
329 arma::colvec
Rk4(
const arma::colvec state,
const double torque)
const 331 arma::colvec k1 =
Dsdt(state, torque);
332 arma::colvec k2 =
Dsdt(state + dt * k1 / 2, torque);
333 arma::colvec k3 =
Dsdt(state + dt * k2 / 2, torque);
334 arma::colvec k4 =
Dsdt(state + dt * k3, torque);
335 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
389 size_t stepsPerformed;
arma::colvec & Data()
Modify the state representation.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Linear algebra utility functions, generally performed on matrices or vectors.
double & AngularVelocity1()
Modify the angular velocity (one).
double AngularVelocity1() const
Get value of Angular velocity (one).
size_t MaxSteps() const
Get the maximum number of steps allowed.
const arma::colvec & Encode() const
Encode the state to a column vector.
size_t StepsPerformed() const
Get the number of steps performed.
double & Theta1()
Modify value of theta (one).
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
double Torque(const Action &action) const
This function calculates the torque for a particular action.
double & Theta2()
Modify value of theta (two).
double & AngularVelocity2()
Modify the angular velocity (two).
static constexpr size_t dimension
Dimension of the encoded state.
Implementation of Acrobot game.
size_t & MaxSteps()
Set the maximum number of steps allowed.
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
double AngularVelocity2() const
Get value of Angular velocity (two).
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta2() const
Get value of theta (two).
double Random()
Generates a uniform random number between 0 and 1.
double Theta1() const
Get value of theta (one).
State()
Construct a state instance.
State(const arma::colvec &data)
Construct a state instance from given data.
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
State InitialSample()
This function does random initialization of state space.
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Acrobot(const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0, const size_t maxSteps=0)
Construct a Acrobot instance using the given constants.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.