31 #ifndef STRUCT_CORPUSPROPERTIES_HPP_ 32 #define STRUCT_CORPUSPROPERTIES_HPP_ 157 std::uint16_t setSourceType,
158 const std::string& setSourceTable,
159 const std::string& setSourceColumn,
160 const std::vector<std::uint16_t>& setManipulators,
161 const std::vector<std::string>& setModels,
162 const std::vector<std::string>& setDictionaries,
163 const std::vector<std::string>& setLanguages,
164 const std::vector<std::uint16_t>& setSavePoints,
165 std::uint64_t setFreeMemoryEvery
167 sourceTable{setSourceTable},
168 sourceColumn{setSourceColumn},
169 manipulators{setManipulators},
171 dictionaries{setDictionaries},
172 languages{setLanguages},
198 std::uint16_t setSourceType,
199 const std::string& setSourceTable,
200 const std::string& setSourceColumn,
201 std::uint64_t setFreeMemoryEvery
203 sourceTable{setSourceTable},
204 sourceColumn{setSourceColumn},
CorpusProperties(std::uint16_t setSourceType, const std::string &setSourceTable, const std::string &setSourceColumn, std::uint64_t setFreeMemoryEvery)
Constructor setting properties for a continuous corpus.
Definition: CorpusProperties.hpp:197
CorpusProperties()=default
Default constructor.
std::uint64_t freeMemoryEvery
Number of processed bytes in a continuous corpus after which memory will be freed.
Definition: CorpusProperties.hpp:90
bool tokenize
Tokenization.
Definition: CorpusProperties.hpp:97
CorpusProperties(std::uint16_t setSourceType, const std::string &setSourceTable, const std::string &setSourceColumn, const std::vector< std::uint16_t > &setManipulators, const std::vector< std::string > &setModels, const std::vector< std::string > &setDictionaries, const std::vector< std::string > &setLanguages, const std::vector< std::uint16_t > &setSavePoints, std::uint64_t setFreeMemoryEvery)
Constructor setting properties for a tokenized corpus.
Definition: CorpusProperties.hpp:156
Corpus properties containing the type, table, and column name of its source.
Definition: CorpusProperties.hpp:41
std::uint16_t sourceType
The type of the source from which the corpus is created (see below).
Definition: CorpusProperties.hpp:52
std::vector< std::string > models
The models used by the manipulators with the same array index.
Definition: CorpusProperties.hpp:64
std::vector< std::uint16_t > savePoints
List of savepoints.
Definition: CorpusProperties.hpp:83
Namespace for data structures.
Definition: AlgoThreadProperties.hpp:43
std::string sourceColumn
The name of the table column from which the corpus is created.
Definition: CorpusProperties.hpp:58
std::vector< std::uint16_t > manipulators
The IDs of manipulators for preprocessing the corpus.
Definition: CorpusProperties.hpp:61
std::vector< std::string > languages
The languages used by the manipulators with the same array index.
Definition: CorpusProperties.hpp:70
std::string sourceTable
The name of the table from which the corpus is created.
Definition: CorpusProperties.hpp:55
std::vector< std::string > dictionaries
The dictionaries used by the manipulators with the same array index.
Definition: CorpusProperties.hpp:67