13 #ifndef MLPACK_CORE_DATA_STRING_ENCODING_HPP 14 #define MLPACK_CORE_DATA_STRING_ENCODING_HPP 33 template<
typename EncodingPolicyType,
34 typename DictionaryType>
42 template<
typename ... ArgTypes>
61 StringEncoding&
operator=(
const StringEncoding&) =
default;
67 StringEncoding&
operator=(StringEncoding&&) =
default;
84 template<
typename TokenizerType>
86 const TokenizerType& tokenizer);
118 template<
typename OutputType,
typename TokenizerType>
119 void Encode(
const std::vector<std::string>& input,
121 const TokenizerType& tokenizer);
124 const DictionaryType&
Dictionary()
const {
return dictionary; }
136 template<
typename Archive>
137 void serialize(Archive& ar,
const uint32_t );
168 template<
typename OutputType,
typename TokenizerType,
typename PolicyType>
169 void EncodeHelper(
const std::vector<std::string>& input,
171 const TokenizerType& tokenizer,
197 template<
typename TokenizerType,
typename PolicyType,
typename ElemType>
198 void EncodeHelper(
const std::vector<std::string>& input,
199 std::vector<std::vector<ElemType>>& output,
200 const TokenizerType& tokenizer,
203 PolicyType>::onePassEncoding>::type* = 0);
207 EncodingPolicyType encodingPolicy;
209 DictionaryType dictionary;
This is a template struct that provides some information about various encoding policies.
Definition: policy_traits.hpp:27
DictionaryType & Dictionary()
Modify the dictionary.
Definition: string_encoding.hpp:126
void CreateMap(const std::string &input, const TokenizerType &tokenizer)
Initialize the dictionary using the given corpus.
Definition: string_encoding_impl.hpp:65
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.
EncodingPolicyType & EncodingPolicy()
Modify the encoding policy object.
Definition: string_encoding.hpp:131
The class translates a set of strings into numbers using various encoding algorithms.
Definition: string_encoding.hpp:35
const DictionaryType & Dictionary() const
Return the dictionary.
Definition: string_encoding.hpp:124
void serialize(Archive &ar, const uint32_t)
Serialize the class to the given archive.
Definition: string_encoding_impl.hpp:201
StringEncoding & operator=(const StringEncoding &)=default
Default copy assignment operator.
StringEncoding(ArgTypes &&... args)
Pass the given arguments to the policy constructor and create the StringEncoding object using the pol...
Definition: string_encoding_impl.hpp:25
const EncodingPolicyType & EncodingPolicy() const
Return the encoding policy object.
Definition: string_encoding.hpp:129
void Clear()
Clear the dictionary.
Definition: string_encoding_impl.hpp:58
void Encode(const std::vector< std::string > &input, OutputType &output, const TokenizerType &tokenizer)
Encode the given text and write the result to the given output.
Definition: string_encoding_impl.hpp:91