mock_categorical_data.hpp
Go to the documentation of this file.
1 
11 #ifndef MLPACK_TESTS_MOCK_CATEGORICAL_DATA_HPP
12 #define MLPACK_TESTS_MOCK_CATEGORICAL_DATA_HPP
13 
14 #include <mlpack/prereqs.hpp>
16 
20 inline void MockCategoricalData(arma::mat& d,
21  arma::Row<size_t>& l,
22  mlpack::data::DatasetInfo& datasetInfo)
23 {
24  // We'll build a spiral dataset plus two noisy categorical features. We need
25  // to build the distributions for the categorical features (they'll be
26  // discrete distributions).
28  // The distribution will be automatically normalized.
29  for (size_t i = 0; i < 5; ++i)
30  {
31  std::vector<arma::vec> probs;
32  probs.push_back(arma::vec(4, arma::fill::randu));
34  }
35 
37  for (size_t i = 0; i < 5; ++i)
38  {
39  std::vector<arma::vec> probs;
40  probs.push_back(arma::vec(2, arma::fill::randu));
42  }
43 
44  arma::mat spiralDataset(4, 4000);
45  arma::Row<size_t> labels(4000);
46  for (size_t i = 0; i < 4000; ++i)
47  {
48  // One circle every 2000 samples. Plus some noise.
49  const double magnitude = 2.0 + (double(i) / 200.0) +
50  0.5 * mlpack::math::Random();
51  const double angle = (i % 200) * (2 * M_PI) + mlpack::math::Random();
52 
53  const double x = magnitude * cos(angle);
54  const double y = magnitude * sin(angle);
55 
56  spiralDataset(0, i) = x;
57  spiralDataset(1, i) = y;
58 
59  // Set categorical features c1 and c2.
60  if (i < 800)
61  {
62  spiralDataset(2, i) = c1[1].Random()[0];
63  spiralDataset(3, i) = c2[1].Random()[0];
64  labels[i] = 1;
65  }
66  else if (i < 1600)
67  {
68  spiralDataset(2, i) = c1[3].Random()[0];
69  spiralDataset(3, i) = c2[3].Random()[0];
70  labels[i] = 3;
71  }
72  else if (i < 2400)
73  {
74  spiralDataset(2, i) = c1[2].Random()[0];
75  spiralDataset(3, i) = c2[2].Random()[0];
76  labels[i] = 2;
77  }
78  else if (i < 3200)
79  {
80  spiralDataset(2, i) = c1[0].Random()[0];
81  spiralDataset(3, i) = c2[0].Random()[0];
82  labels[i] = 0;
83  }
84  else
85  {
86  spiralDataset(2, i) = c1[4].Random()[0];
87  spiralDataset(3, i) = c2[4].Random()[0];
88  labels[i] = 4;
89  }
90  }
91 
92  // Now create the dataset info.
93  datasetInfo = mlpack::data::DatasetInfo(4);
96  // Set mappings.
97  datasetInfo.MapString<double>("0", 2);
98  datasetInfo.MapString<double>("1", 2);
99  datasetInfo.MapString<double>("2", 2);
100  datasetInfo.MapString<double>("3", 2);
101  datasetInfo.MapString<double>("0", 3);
102  datasetInfo.MapString<double>("1", 3);
103 
104  // Now shuffle the dataset.
105  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0, 3999,
106  4000));
107  d = arma::mat(4, 4000);
108  l = arma::Row<size_t>(4000);
109  for (size_t i = 0; i < 4000; ++i)
110  {
111  d.col(i) = spiralDataset.col(indices[i]);
112  l[i] = labels[indices[i]];
113  }
114 }
115 
116 #endif
T MapString(const InputType &input, const size_t dimension)
Given the input and the dimension to which it belongs, return its numeric mapping.
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
A discrete distribution where the only observations are discrete observations.
The core includes that mlpack expects; standard C++ includes and Armadillo.
#define M_PI
Definition: prereqs.hpp:39
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
void MockCategoricalData(arma::mat &d, arma::Row< size_t > &l, mlpack::data::DatasetInfo &datasetInfo)
Create a mock categorical dataset for testing.
DatasetMapper< data::IncrementPolicy > DatasetInfo
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:71