mlpack: a scalable c++ machine learning library
mlpack  2.0.2
hoeffding_categorical_split.hpp
Go to the documentation of this file.
1 
15 #ifndef mlpack_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
16 #define mlpack_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
17 
18 #include <mlpack/core.hpp>
20 
21 namespace mlpack {
22 namespace tree {
23 
45 template<typename FitnessFunction>
47 {
48  public:
51 
59  HoeffdingCategoricalSplit(const size_t numCategories,
60  const size_t numClasses);
61 
68  HoeffdingCategoricalSplit(const size_t numCategories,
69  const size_t numClasses,
70  const HoeffdingCategoricalSplit& other);
71 
78  template<typename eT>
79  void Train(eT value, const size_t label);
80 
91  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
92  const;
93 
95  size_t NumChildren() const { return sufficientStatistics.n_cols; }
96 
104  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo);
105 
107  size_t MajorityClass() const;
109  double MajorityProbability() const;
110 
112  template<typename Archive>
113  void Serialize(Archive& ar, const unsigned int /* version */)
114  {
115  ar & data::CreateNVP(sufficientStatistics, "sufficientStatistics");
116  }
117 
118  private:
122  arma::Mat<size_t> sufficientStatistics;
123 };
124 
125 } // namespace tree
126 } // namespace mlpack
127 
128 // Include implementation.
129 #include "hoeffding_categorical_split_impl.hpp"
130 
131 #endif
void Serialize(Archive &ar, const unsigned int)
Serialize the categorical split.
Linear algebra utility functions, generally performed on matrices or vectors.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename boost::enable_if< HasSerialize< T >>::type *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness) const
Given the points seen so far, evaluate the fitness function, returning the gain for the best possible...
HoeffdingCategoricalSplit(const size_t numCategories, const size_t numClasses)
Create the HoeffdingCategoricalSplit given a number of categories for this dimension and a number of ...
arma::Mat< size_t > sufficientStatistics
The sufficient statistics for all points seen so far.
size_t MajorityClass() const
Get the majority class seen so far.
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo)
Gather the information for a split: get the labels of the child majorities, and initialize the SplitI...
CategoricalSplitInfo SplitInfo
The type of split information required by the HoeffdingCategoricalSplit.
size_t NumChildren() const
Return the number of children, if the node were to split.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double MajorityProbability() const
Get the probability of the majority class given the points seen so far.
This is the standard Hoeffding-bound categorical feature proposed in the paper below: ...
void Train(eT value, const size_t label)
Train on the given value with the given label.