mlpack
dataset_mapper_impl.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_DATA_DATASET_INFO_IMPL_HPP
14 #define MLPACK_CORE_DATA_DATASET_INFO_IMPL_HPP
15 
16 // In case it hasn't already been included.
17 #include "dataset_mapper.hpp"
18 
19 namespace mlpack {
20 namespace data {
21 
22 // Default constructor.
23 template<typename PolicyType, typename InputType>
25  const size_t dimensionality) :
26  types(dimensionality, Datatype::numeric)
27 {
28  // Nothing to initialize here.
29 }
30 
31 template<typename PolicyType, typename InputType>
33  const size_t dimensionality) :
34  types(dimensionality, Datatype::numeric),
35  policy(std::move(policy))
36 {
37  // Nothing to initialize here.
38 }
39 
40 template<typename PolicyType, typename InputType>
42  const size_t dimensionality)
43 {
44  types = std::vector<Datatype>(dimensionality, Datatype::numeric);
45  maps.clear();
46 }
47 
48 // Utility helper function to call MapFirstPass.
49 template<typename PolicyType, typename InputType, typename T>
50 void CallMapFirstPass(
51  PolicyType& policy,
52  const InputType& input,
53  const size_t dimension,
54  std::vector<Datatype>& types,
55  const typename std::enable_if<PolicyType::NeedsFirstPass>::type* = 0)
56 {
57  policy.template MapFirstPass<T>(input, dimension, types);
58 }
59 
60 // Utility helper function that doesn't call anything.
61 template<typename PolicyType, typename InputType, typename T>
62 void CallMapFirstPass(
63  PolicyType& /* policy */,
64  const InputType& /* input */,
65  const size_t /* dimension */,
66  std::vector<Datatype>& /* types */,
67  const typename std::enable_if<!PolicyType::NeedsFirstPass>::type* = 0)
68 {
69  // Nothing to do here.
70 }
71 
72 template<typename PolicyType, typename InputType>
73 template<typename T>
75  const size_t dimension)
76 {
77  // Call the correct overload (via SFINAE).
78  CallMapFirstPass<PolicyType, InputType, T>(policy, input, dimension, types);
79 }
80 
81 // When we want to insert value into the map, we use the policy to map the
82 // input.
83 template<typename PolicyType, typename InputType>
84 template<typename T>
86  const InputType& input,
87  const size_t dimension)
88 {
89  return policy.template MapString<MapType, T>(input, dimension, maps, types);
90 }
91 
97 template<typename T>
98 inline bool isnanSafe(const T& /* t */)
99 {
100  return false;
101 }
102 
103 template<>
104 inline bool isnanSafe(const double& t)
105 {
106  return std::isnan(t);
107 }
108 
109 template<>
110 inline bool isnanSafe(const float& t)
111 {
112  return std::isnan(t);
113 }
114 
115 template<>
116 inline bool isnanSafe(const long double& t)
117 {
118  return std::isnan(t);
119 }
120 
121 
122 // Return the input corresponding to a value in a given dimension.
123 template<typename PolicyType, typename InputType>
124 template<typename T>
126  const T value,
127  const size_t dimension,
128  const size_t unmappingIndex) const
129 {
130  // If the value is std::numeric_limits<T>::quiet_NaN(), we can't use it as a
131  // key---so we will use something else...
132  const T usedValue = isnanSafe(value) ?
133  std::nexttoward(std::numeric_limits<T>::max(), T(0)) :
134  value;
135 
136  // Throw an exception if the value doesn't exist.
137  if (maps.at(dimension).second.count(usedValue) == 0)
138  {
139  std::ostringstream oss;
140  oss << "DatasetMapper<PolicyType, InputType>::UnmapString(): value '"
141  << value << "' unknown for dimension " << dimension;
142  throw std::invalid_argument(oss.str());
143  }
144 
145  if (unmappingIndex >= maps.at(dimension).second.at(usedValue).size())
146  {
147  std::ostringstream oss;
148  oss << "DatasetMapper<PolicyType, InputType>::UnmapString(): value '"
149  << value << "' only has "
150  << maps.at(dimension).second.at(usedValue).size()
151  << " unmappings, but unmappingIndex is " << unmappingIndex << "!";
152  throw std::invalid_argument(oss.str());
153  }
154 
155  return maps.at(dimension).second.at(usedValue)[unmappingIndex];
156 }
157 
158 template<typename PolicyType, typename InputType>
159 template<typename T>
161  const T value,
162  const size_t dimension) const
163 {
164  // If the value is std::numeric_limits<T>::quiet_NaN(), we can't use it as a
165  // key---so we will use something else...
166  if (isnanSafe(value))
167  {
168  const T newValue = std::nexttoward(std::numeric_limits<T>::max(), T(0));
169  return maps.at(dimension).second.at(newValue).size();
170  }
171 
172  return maps.at(dimension).second.at(value).size();
173 }
174 
175 // Return the value corresponding to an input in a given dimension.
176 template<typename PolicyType, typename InputType>
177 inline typename PolicyType::MappedType
179  const InputType& input,
180  const size_t dimension)
181 {
182  // Throw an exception if the value doesn't exist.
183  if (maps[dimension].first.count(input) == 0)
184  {
185  std::ostringstream oss;
186  oss << "DatasetMapper<PolicyType, InputType>::UnmapValue(): input '"
187  << input << "' unknown for dimension " << dimension;
188  throw std::invalid_argument(oss.str());
189  }
190 
191  return maps[dimension].first.at(input);
192 }
193 
194 // Get the type of a particular dimension.
195 template<typename PolicyType, typename InputType>
197  const size_t dimension) const
198 {
199  if (dimension >= types.size())
200  {
201  std::ostringstream oss;
202  oss << "requested type of dimension " << dimension << ", but dataset only "
203  << "has " << types.size() << " dimensions";
204  throw std::invalid_argument(oss.str());
205  }
206 
207  return types[dimension];
208 }
209 
210 template<typename PolicyType, typename InputType>
212  const size_t dimension)
213 {
214  if (dimension >= types.size())
215  types.resize(dimension + 1, Datatype::numeric);
216 
217  return types[dimension];
218 }
219 
220 template<typename PolicyType, typename InputType>
221 inline size_t
223 {
224  return (maps.count(dimension) == 0) ? 0 : maps.at(dimension).first.size();
225 }
226 
227 template<typename PolicyType, typename InputType>
229 {
230  return types.size();
231 }
232 
233 template<typename PolicyType, typename InputType>
234 inline const PolicyType& DatasetMapper<PolicyType, InputType>::Policy() const
235 {
236  return this->policy;
237 }
238 
239 template<typename PolicyType, typename InputType>
241 {
242  return this->policy;
243 }
244 
245 template<typename PolicyType, typename InputType>
246 inline void DatasetMapper<PolicyType, InputType>::Policy(PolicyType&& policy)
247 {
248  this->policy = std::forward<PolicyType>(policy);
249 }
250 
251 } // namespace data
252 } // namespace mlpack
253 
254 #endif
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Definition: dataset_mapper.hpp:41
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Datatype
The Datatype enum specifies the types of data mlpack algorithms can use.
Definition: datatype.hpp:24
Definition: pointer_wrapper.hpp:23
DatasetMapper(const size_t dimensionality=0)
Create the DatasetMapper object with the given dimensionality.
Definition: dataset_mapper_impl.hpp:24
bool isnanSafe(const T &)
A safe version of isnan() that only gets called when the type has a NaN at all.
Definition: dataset_mapper_impl.hpp:98