13 #ifndef MLPACK_CORE_DATA_ONE_HOT_ENCODING_IMPL_HPP 14 #define MLPACK_CORE_DATA_ONE_HOT_ENCODING_IMPL_HPP 32 template<
typename RowType,
typename MatType>
36 arma::Row<size_t> labels;
37 labels.set_size(labelsIn.n_elem);
41 std::unordered_map<typename MatType::elem_type, size_t> labelMap;
43 for (
size_t i = 0; i < labelsIn.n_elem; ++i)
46 if (labelMap.count(labelsIn[i]) != 0)
48 labels[i] = labelMap[labelsIn[i]] - 1;
53 labelMap[labelsIn[i]] = curLabel + 1;
59 output.zeros(curLabel, labelsIn.n_elem);
61 for (
size_t i = 0; i < labelsIn.n_elem; ++i)
63 output(labels[i], i) = 1;
79 const arma::Col<size_t>& indices,
80 arma::Mat<eT>& output)
83 if (indices.n_elem == 0)
94 arma::Col<size_t> dimensionOffsets(input.n_rows, arma::fill::ones);
97 std::unordered_map<size_t, std::unordered_map<eT, size_t>> mappings;
98 for (
size_t i = 0; i < indices.n_elem; ++i)
100 dimensionOffsets[indices[i]] = 0;
102 std::make_pair(indices[i], std::unordered_map<eT, size_t>()));
105 for (
size_t col = 0; col < input.n_cols; ++col)
107 for (
size_t row = 0; row < input.n_rows; ++row)
109 if (mappings.count(row) != 0)
112 if (mappings[row].count(input(row, col)) == 0)
113 mappings[row][input(row, col)] = dimensionOffsets[row]++;
121 for (
size_t i = 1; i < dimensionOffsets.n_elem; ++i)
122 dimensionOffsets[i] += dimensionOffsets[i - 1];
125 output.zeros(dimensionOffsets[dimensionOffsets.n_elem - 1], input.n_cols);
128 for (
size_t col = 0; col < input.n_cols; ++col)
130 for (
size_t row = 0; row < input.n_rows; ++row)
132 const size_t dimOffset = (row == 0) ? 0 : dimensionOffsets[row - 1];
133 if (mappings.count(row) != 0)
135 output(dimOffset + mappings[row][input(row, col)], col) = eT(1);
140 output(dimOffset, col) = input(row, col);
156 template<
typename eT>
158 arma::Mat<eT>& output,
161 std::vector<size_t> indices;
164 if (datasetInfo.
Type(i) == data::Datatype::categorical)
166 indices.push_back(i);
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Definition: dataset_mapper.hpp:41
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void OneHotEncoding(const RowType &labelsIn, MatType &output)
Given a set of labels of a particular datatype, convert them to binary vector.
Definition: one_hot_encoding_impl.hpp:33
size_t Dimensionality() const
Get the dimensionality of the DatasetMapper object (that is, how many dimensions it has information f...
Definition: dataset_mapper_impl.hpp:228
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
Definition: dataset_mapper_impl.hpp:196