pendulum.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
31 class Pendulum
32 {
33  public:
38  class State
39  {
40  public:
44  State() : data(dimension, arma::fill::zeros)
45  { /* Nothing to do here. */ }
46 
52  State(const arma::colvec& data): data(data)
53  { /* Nothing to do here. */ }
54 
56  arma::colvec& Data() { return data; }
57 
59  double Theta() const { return data[0]; }
61  double& Theta() { return data[0]; }
62 
64  double AngularVelocity() const { return data[1]; }
66  double& AngularVelocity() { return data[1]; }
67 
69  const arma::colvec& Encode() const { return data; }
70 
72  static constexpr size_t dimension = 2;
73 
74  private:
76  arma::colvec data;
77  };
78 
84  struct Action
85  {
86  double action[1];
87  // Storing degree of freedom
88  const int size = 1;
89  };
90 
101  Pendulum(const double maxAngularVelocity = 8,
102  const double maxTorque = 2.0,
103  const double dt = 0.05,
104  const double doneReward = 0.0,
105  const size_t maxSteps = 200) :
106  maxAngularVelocity(maxAngularVelocity),
107  maxTorque(maxTorque),
108  dt(dt),
109  doneReward(doneReward),
110  maxSteps(maxSteps),
111  stepsPerformed(0)
112  { /* Nothing to do here */ }
113 
123  double Sample(const State& state,
124  const Action& action,
125  State& nextState)
126  {
127  // Update the number of steps performed.
128  stepsPerformed++;
129 
130  // Get current state.
131  double theta = state.Theta();
132  double angularVelocity = state.AngularVelocity();
133 
134  // Define constants which specify our pendulum.
135  const double gravity = 10.0;
136  const double mass = 1.0;
137  const double length = 1.0;
138 
139  // Get action and clip the values between max and min limits.
140  double torque = math::ClampRange(action.action[0], -maxTorque, maxTorque);
141 
142  // Calculate costs of taking this action in the current state.
143  double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
144  std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
145 
146  // Calculate new state values and assign to the next state.
147  double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
148  length) * std::sin(theta + M_PI) + 3.0 / (mass * std::pow(length, 2)) *
149  torque) * dt;
150  nextState.Theta() = theta + newAngularVelocity * dt;
151  nextState.AngularVelocity() = math::ClampRange(newAngularVelocity,
152  -maxAngularVelocity, maxAngularVelocity);
153 
154  // Return the reward of taking the action in current state.
155  // The reward is simply the negative of cost incurred for the action.
156  return -costs;
157  }
158 
166  double Sample(const State& state, const Action& action)
167  {
168  State nextState;
169  return Sample(state, action, nextState);
170  }
171 
179  {
180  State state;
181  state.Theta() = math::Random(-M_PI, M_PI);
182  state.AngularVelocity() = math::Random(-1.0, 1.0);
183  stepsPerformed = 0;
184  return state;
185  }
186 
192  double AngleNormalize(double theta) const
193  {
194  // Scale angle within [-pi, pi).
195  double x = fmod(theta + M_PI, 2 * M_PI);
196  if (x < 0)
197  x += 2 * M_PI;
198  return x - M_PI;
199  }
200 
207  bool IsTerminal(const State& state) const
208  {
209  if (maxSteps != 0 && stepsPerformed >= maxSteps)
210  {
211  Log::Info << "Episode terminated due to the maximum number of steps"
212  "being taken.";
213  return true;
214  }
215  return false;
216  }
217 
219  size_t StepsPerformed() const { return stepsPerformed; }
220 
222  size_t MaxSteps() const { return maxSteps; }
224  size_t& MaxSteps() { return maxSteps; }
225 
226  private:
228  double maxAngularVelocity;
229 
231  double maxTorque;
232 
234  double dt;
235 
237  double doneReward;
238 
240  size_t maxSteps;
241 
243  size_t stepsPerformed;
244 };
245 
246 } // namespace rl
247 } // namespace mlpack
248 
249 #endif
double Theta() const
Get the theta.
Definition: pendulum.hpp:59
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:61
Implementation of Pendulum task.
Definition: pendulum.hpp:31
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:222
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:123
Pendulum(const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double doneReward=0.0, const size_t maxSteps=200)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:101
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of action of Pendulum.
Definition: pendulum.hpp:84
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:56
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:52
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:192
#define M_PI
Definition: prereqs.hpp:39
bool IsTerminal(const State &state) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:207
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:64
Miscellaneous math clamping routines.
State()
Construct a state instance.
Definition: pendulum.hpp:44
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:219
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:66
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:72
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:224
Implementation of state of Pendulum.
Definition: pendulum.hpp:38
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:166
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:178
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: pendulum.hpp:69
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53