35 #ifndef MODULE_ANALYZER_ALGO_CORPUSGENERATOR_HPP_ 36 #define MODULE_ANALYZER_ALGO_CORPUSGENERATOR_HPP_ 38 #include "../Thread.hpp" 40 #include "../../../Data/Corpus.hpp" 41 #include "../../../Helper/Math.hpp" 42 #include "../../../Helper/Memory.hpp" 43 #include "../../../Helper/Utf8.hpp" 44 #include "../../../Main/Database.hpp" 45 #include "../../../Struct/StatusSetter.hpp" 46 #include "../../../Struct/TextMap.hpp" 47 #include "../../../Struct/ThreadOptions.hpp" 48 #include "../../../Struct/ThreadStatus.hpp" 54 #include <string_view> 92 using StringString = std::pair<std::string, std::string>;
112 std::string_view
getName()
const override;
140 static bool isSentenceEmpty(
141 const std::pair<std::size_t, std::size_t>& sentence,
142 const std::vector<std::string>& tokens
CorpusGenerator(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Continues a previously interrupted algorithm run.
Definition: CorpusGenerator.cpp:47
void onAlgoInitTarget() override
Initializes the target table for the algorithm.
Definition: CorpusGenerator.cpp:92
Namespace for algorithm classes.
Definition: All.cpp:52
void onAlgoUnpause() override
Does nothing.
Definition: CorpusGenerator.cpp:352
Abstract class providing thread functionality to algorithm (child) classes.
Definition: Thread.hpp:84
Algorithm building a text corpus and creating corpus statistics from the input data.
Definition: CorpusGenerator.hpp:80
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
void onAlgoClear() override
Does nothing.
Definition: CorpusGenerator.cpp:355
Text map entry.
Definition: TextMap.hpp:49
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
Type
Data types.
Definition: Data.hpp:66
void parseAlgoOption() override
Does nothing.
Definition: CorpusGenerator.cpp:362
Class for analyzer exceptions to be used by algorithms.
Definition: Thread.hpp:242
constexpr auto corpusNumFields
Number of target fields.
Definition: CorpusGenerator.hpp:68
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
std::string_view getName() const override
Returns the name of the algorithm.
Definition: CorpusGenerator.cpp:79
void onAlgoTick() override
Sleeps until the thread is terminated.
Definition: CorpusGenerator.cpp:340
A generic value.
Definition: Data.hpp:96
void onAlgoPause() override
Does nothing.
Definition: CorpusGenerator.cpp:349
void checkAlgoOptions() override
Does nothing.
Definition: CorpusGenerator.cpp:365
void resetAlgo() override
Resets the algorithm.
Definition: CorpusGenerator.cpp:373
void onAlgoInit() override
Generates the corpus.
Definition: CorpusGenerator.cpp:119