mlpack: a scalable c++ machine learning library
mlpack  2.0.2
discrete_distribution.hpp
Go to the documentation of this file.
1 
15 #ifndef mlpack_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
16 #define mlpack_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
17 
18 #include <mlpack/core.hpp>
19 
20 namespace mlpack {
21 namespace distribution {
22 
46 {
47  public:
51  DiscreteDistribution() { /* nothing to do */ }
52 
61  DiscreteDistribution(const size_t numObservations) :
62  probabilities(arma::ones<arma::vec>(numObservations) / numObservations)
63  { /* nothing to do */ }
64 
72  {
73  // We must be sure that our distribution is normalized.
74  double sum = accu(probabilities);
75  if (sum > 0)
76  this->probabilities = probabilities / sum;
77  else
78  {
79  this->probabilities.set_size(probabilities.n_elem);
80  this->probabilities.fill(1 / probabilities.n_elem);
81  }
82  }
83 
87  static size_t Dimensionality() { return 1; }
88 
97  double Probability(const arma::vec& observation) const
98  {
99  // Adding 0.5 helps ensure that we cast the floating point to a size_t
100  // correctly.
101  const size_t obs = size_t(observation[0] + 0.5);
102 
103  // Ensure that the observation is within the bounds.
104  if (obs >= probabilities.n_elem)
105  {
106  Log::Debug << "DiscreteDistribution::Probability(): received observation "
107  << obs << "; observation must be in [0, " << probabilities.n_elem
108  << "] for this distribution." << std::endl;
109  }
110 
111  return probabilities(obs);
112  }
113 
122  double LogProbability(const arma::vec& observation) const
123  {
124  // TODO: consider storing log_probabilities instead
125  return log(Probability(observation));
126  }
127 
135  arma::vec Random() const;
136 
144  void Train(const arma::mat& observations);
145 
155  void Train(const arma::mat& observations,
156  const arma::vec& probabilities);
157 
159  const arma::vec& Probabilities() const { return probabilities; }
161  arma::vec& Probabilities() { return probabilities; }
162 
166  template<typename Archive>
167  void Serialize(Archive& ar, const unsigned int /* version */)
168  {
169  // We only need to save the probabilities, since that's all we hold.
170  ar & data::CreateNVP(probabilities, "probabilities");
171  }
172 
173  private:
174  arma::vec probabilities;
175 };
176 
177 } // namespace distribution
178 } // namespace mlpack
179 
180 #endif
DiscreteDistribution(const arma::vec &probabilities)
Define the discrete distribution as having the given probabilities for each observation.
double Probability(const arma::vec &observation) const
Return the probability of the given observation.
Linear algebra utility functions, generally performed on matrices or vectors.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename boost::enable_if< HasSerialize< T >>::type *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
void Serialize(Archive &ar, const unsigned int)
Serialize the distribution.
A discrete distribution where the only observations are discrete observations.
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
DiscreteDistribution()
Default constructor, which creates a distribution that has no observations.
arma::vec & Probabilities()
Modify the vector of probabilities.
double LogProbability(const arma::vec &observation) const
Return the log probability of the given observation.
void Train(const arma::mat &observations)
Estimate the probability distribution directly from the given observations.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
static mlpack_EXPORT util::NullOutStream Debug
mlpack_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:81
const arma::vec & Probabilities() const
Return the vector of probabilities.
static size_t Dimensionality()
Get the dimensionality of the distribution.
DiscreteDistribution(const size_t numObservations)
Define the discrete distribution as having numObservations possible observations. ...