33 #ifndef MODULE_ANALYZER_ALGO_ALLTOKENS_HPP_ 34 #define MODULE_ANALYZER_ALGO_ALLTOKENS_HPP_ 36 #include "../Thread.hpp" 38 #include "../../../Data/Corpus.hpp" 39 #include "../../../Data/Data.hpp" 40 #include "../../../Helper/Memory.hpp" 41 #include "../../../Main/Database.hpp" 42 #include "../../../Struct/StatusSetter.hpp" 43 #include "../../../Struct/TextMap.hpp" 44 #include "../../../Struct/ThreadOptions.hpp" 45 #include "../../../Struct/ThreadStatus.hpp" 46 #include "../../../Timer/Simple.hpp" 52 #include <string_view> 98 using StringString = std::pair<std::string, std::string>;
100 using TokenMap = std::map<std::string, std::size_t>;
101 using TokenCounts = std::map<std::size_t, std::size_t>;
102 using SingleMap = std::map<std::string, TokenCounts>;
103 using DoubleMap = std::map<std::string, SingleMap>;
123 std::string_view
getName()
const override;
149 std::string countTable;
160 std::size_t articleCount{};
162 std::size_t updateCount{};
163 std::size_t countsTable{};
165 bool hasArticles{
false};
167 bool firstTick{
true};
171 TokenCounts tokenCounts;
183 void updateProgress(std::uint32_t every);
187 void saveSingle(
const std::string& typeName);
188 void saveTokenCounts();
189 void initCountsTable();
192 static void processSingle(
193 const std::vector<std::string>& corpusTokens,
198 static void processDouble(
203 static void processToken(
204 const std::string& token,
208 static void addTokenCounts(
209 const TokenCounts& from,
214 template<
typename T>
bool isDone(
const T& container) {
215 if(this->count >= container.size()) {
void checkAlgoOptions() override
Checks the configuration options for the algorithm.
Definition: AllTokens.cpp:233
Namespace for algorithm classes.
Definition: All.cpp:52
void onAlgoPause() override
Does nothing.
Definition: AllTokens.cpp:214
constexpr auto allTokensUpdateEveryDate
Indicates after how many dates the status will be updated, if a date map is available.
Definition: AllTokens.hpp:68
Abstract class providing thread functionality to algorithm (child) classes.
Definition: Thread.hpp:84
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
Text map entry.
Definition: TextMap.hpp:49
void parseAlgoOption() override
Parses a configuration option for the algorithm.
Definition: AllTokens.cpp:222
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
void onAlgoClear() override
Does nothing.
Definition: AllTokens.cpp:219
void resetAlgo() override
Resets the algorithm.
Definition: AllTokens.cpp:248
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
constexpr auto allTokensUpdateEveryArticle
Indicates after how many articles the status will be updated, if no date map, but an article map is a...
Definition: AllTokens.hpp:71
Class for analyzer exceptions to be used by algorithms.
Definition: Thread.hpp:242
void onAlgoInitTarget() override
Initializes the target table for the algorithm.
Definition: AllTokens.cpp:90
Structure for inserting multiple values of different types into a row.
Definition: Data.hpp:360
void onAlgoTick() override
Counts tokens in the current date, article, or token.
Definition: AllTokens.cpp:169
constexpr auto allTokensColumns
The number of columns in the tokens table.
Definition: AllTokens.hpp:65
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
constexpr auto allTokensUpdateEveryRow
Indicates after how many rows the status will be updated while saving the results to the database...
Definition: AllTokens.hpp:77
std::string_view getName() const override
Returns the name of the algorithm.
Definition: AllTokens.cpp:76
void onAlgoInit() override
Initializes the algorithm and processes its input.
Definition: AllTokens.cpp:106
void onAlgoUnpause() override
Unpauses the algorithm.
Definition: AllTokens.cpp:216
Counts all tokens in a corpus.
Definition: AllTokens.hpp:89
AllTokens(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Continues a previously interrupted algorithm run.
Definition: AllTokens.cpp:45
constexpr auto allTokensUpdateEveryToken
Indicates after how many tokens the status will be updated, if no date and no article map is availabl...
Definition: AllTokens.hpp:74