mlpack
test_function_tools.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_TESTS_TEST_FUNCTION_TOOLS_HPP
13 #define MLPACK_TESTS_TEST_FUNCTION_TOOLS_HPP
14 
15 #include <mlpack/core.hpp>
16 
19 
20 using namespace mlpack;
21 using namespace mlpack::distribution;
22 using namespace mlpack::regression;
23 
34 inline void LogisticRegressionTestData(arma::mat& data,
35  arma::mat& testData,
36  arma::mat& shuffledData,
37  arma::Row<size_t>& responses,
38  arma::Row<size_t>& testResponses,
39  arma::Row<size_t>& shuffledResponses)
40 {
41  // Generate a two-Gaussian dataset.
42  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
43  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
44 
45  data = arma::mat(3, 1000);
46  responses = arma::Row<size_t>(1000);
47  for (size_t i = 0; i < 500; ++i)
48  {
49  data.col(i) = g1.Random();
50  responses[i] = 0;
51  }
52  for (size_t i = 500; i < 1000; ++i)
53  {
54  data.col(i) = g2.Random();
55  responses[i] = 1;
56  }
57 
58  // Shuffle the dataset.
59  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0,
60  data.n_cols - 1, data.n_cols));
61  shuffledData = arma::mat(3, 1000);
62  shuffledResponses = arma::Row<size_t>(1000);
63  for (size_t i = 0; i < data.n_cols; ++i)
64  {
65  shuffledData.col(i) = data.col(indices[i]);
66  shuffledResponses[i] = responses[indices[i]];
67  }
68 
69  // Create a test set.
70  testData = arma::mat(3, 1000);
71  testResponses = arma::Row<size_t>(1000);
72  for (size_t i = 0; i < 500; ++i)
73  {
74  testData.col(i) = g1.Random();
75  testResponses[i] = 0;
76  }
77  for (size_t i = 500; i < 1000; ++i)
78  {
79  testData.col(i) = g2.Random();
80  testResponses[i] = 1;
81  }
82 }
83 
84 template<typename MatType>
85 void LoadBostonHousingDataset(MatType& trainData,
86  MatType& testData,
87  arma::rowvec& trainResponses,
88  arma::rowvec& testResponses,
89  data::DatasetInfo& info)
90 {
91  MatType dataset;
92  arma::rowvec responses;
93 
94  // Defining categorical deimensions.
95  info.SetDimensionality(13);
96  info.Type(3) = data::Datatype::categorical;
97  info.Type(8) = data::Datatype::categorical;
98 
99  if (!data::Load("boston_housing_price.csv", dataset, info))
100  FAIL("Cannot load test dataset boston_housing_price.csv!");
101  if (!data::Load("boston_housing_price_responses.csv", responses))
102  FAIL("Cannot load test dataset boston_housing_price_responses.csv!");
103 
104  data::Split(dataset, responses, trainData, testData,
105  trainResponses, testResponses, 0.3);
106 }
107 
108 inline double RMSE(const arma::Row<double>& predictions,
109  const arma::Row<double>& trueResponses)
110 {
111  double mse = arma::accu(arma::square(predictions - trueResponses)) /
112  predictions.n_elem;
113  return sqrt(mse);
114 }
115 
116 #endif
arma::vec Random() const
Return a randomly generated observation according to the probability distribution defined by this obj...
Definition: gaussian_distribution.cpp:79
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Definition: dataset_mapper.hpp:41
A single multivariate Gaussian distribution.
Definition: gaussian_distribution.hpp:24
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.
Definition: dataset_mapper_impl.hpp:41
void Split(const arma::Mat< T > &input, const LabelsType &inputLabel, arma::Mat< T > &trainData, arma::Mat< T > &testData, LabelsType &trainLabel, LabelsType &testLabel, const double testRatio, const bool shuffleData=true)
Given an input dataset and labels, split into a training set and test set.
Definition: split_data.hpp:255
Probability distributions.
Definition: diagonal_gaussian_distribution.hpp:18
void LogisticRegressionTestData(arma::mat &data, arma::mat &testData, arma::mat &shuffledData, arma::Row< size_t > &responses, arma::Row< size_t > &testResponses, arma::Row< size_t > &shuffledResponses)
Create the data for the a logistic regression test.
Definition: test_function_tools.hpp:34
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
Definition: dataset_mapper_impl.hpp:196
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
bool Load(const std::string &filename, arma::Mat< eT > &matrix, const bool fatal=false, const bool transpose=true, const arma::file_type inputLoadType=arma::auto_detect)
Loads a matrix from file, guessing the filetype from the extension.
Definition: load_impl.hpp:89
Regression methods.
Definition: bayesian_linear_regression.hpp:21