mlpack: src/mlpack/core/data/dataset_info.hpp Source File
dataset_info.hpp
Go to the documentation of this file.
1 
16 #ifndef mlpack_CORE_DATA_DATASET_INFO_HPP
17 #define mlpack_CORE_DATA_DATASET_INFO_HPP
18 
19 #include <mlpack/core.hpp>
20 #include <unordered_map>
21 #include <boost/bimap.hpp>
22 
23 namespace mlpack {
24 namespace data {
25 
32 enum Datatype : bool /* bool is all the precision we need for two types */
33 {
34  numeric = 0,
36 };
37 
46 {
47  public:
53  DatasetInfo(const size_t dimensionality = 0);
54 
64  size_t MapString(const std::string& string, const size_t dimension);
65 
74  const std::string& UnmapString(const size_t value, const size_t dimension);
75 
77  Datatype Type(const size_t dimension) const;
79  Datatype& Type(const size_t dimension);
80 
85  size_t NumMappings(const size_t dimension) const;
86 
93  size_t Dimensionality() const;
94 
98  template<typename Archive>
99  void Serialize(Archive& ar, const unsigned int /* version */)
100  {
101  ar & data::CreateNVP(types, "types");
102  ar & data::CreateNVP(maps, "maps");
103  }
104 
105  private:
107  std::vector<Datatype> types;
108 
111  std::unordered_map<size_t, std::pair<boost::bimap<std::string, size_t>,
112  size_t>> maps;
113 
114 };
115 
116 } // namespace data
117 } // namespace mlpack
118 
119 #include "dataset_info_impl.hpp"
120 
121 #endif
std::unordered_map< size_t, std::pair< boost::bimap< std::string, size_t >, size_t > > maps
Mappings from strings to integers.
DatasetInfo(const size_t dimensionality=0)
Create the DatasetInfo object with the given dimensionality.
Linear algebra utility functions, generally performed on matrices or vectors.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename boost::enable_if< HasSerialize< T >>::type *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
Datatype
The Datatype enum specifies the types of data mlpack algorithms can use.
Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
size_t MapString(const std::string &string, const size_t dimension)
Given the string and the dimension to which it belongs, return its numeric mapping.
void Serialize(Archive &ar, const unsigned int)
Serialize the dataset information.
const std::string & UnmapString(const size_t value, const size_t dimension)
Return the string that corresponds to a given value in a given dimension.
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
size_t Dimensionality() const
Get the dimensionality of the DatasetInfo object (that is, how many dimensions it has information for...
std::vector< Datatype > types
Types of each dimension.
size_t NumMappings(const size_t dimension) const
Get the number of mappings for a particular dimension.