32 #ifndef MODULE_ANALYZER_ALGO_ASSOCOVERTIME_HPP_ 33 #define MODULE_ANALYZER_ALGO_ASSOCOVERTIME_HPP_ 35 #include "../Thread.hpp" 37 #include "../../../Data/Data.hpp" 38 #include "../../../Helper/DateTime.hpp" 39 #include "../../../Helper/Memory.hpp" 40 #include "../../../Main/Database.hpp" 41 #include "../../../Struct/QueryStruct.hpp" 42 #include "../../../Struct/StatusSetter.hpp" 43 #include "../../../Struct/TextMap.hpp" 44 #include "../../../Struct/ThreadOptions.hpp" 45 #include "../../../Struct/ThreadStatus.hpp" 53 #include <string_view> 54 #include <unordered_map> 98 using StringString = std::pair<std::string, std::string>;
99 using Results = std::vector<std::pair<std::string, std::vector<std::uint64_t>>>;
119 std::string_view
getName()
const override;
144 struct Associations {
145 std::vector<std::uint64_t> keywordPositions;
146 std::vector<std::vector<std::uint64_t>> categoriesPositions;
147 std::uint64_t offset{};
150 using DateAssociationMap = std::unordered_map<std::string, std::unordered_map<std::string, Associations>>;
151 using DateAssociation = std::pair<std::string, std::unordered_map<std::string, Associations>>;
152 using ArticleAssociationMap = std::unordered_map<std::string, Associations>;
153 using ArticleAssociation = std::pair<std::string, Associations>;
157 std::vector<std::string> categoryLabels;
158 std::vector<std::uint64_t> categoryQueries;
159 bool combineSources{
true};
160 bool ignoreEmptyDate{
true};
161 std::uint64_t keyWordQuery{};
162 std::uint16_t windowSize{1};
166 QueryStruct queryKeyWord;
168 std::vector<QueryStruct> queriesCategories;
171 DateAssociationMap associations;
173 std::string previousDate;
175 std::size_t currentCorpus{};
176 std::size_t dateCounter{};
177 std::size_t firstDatePos{};
178 std::size_t dateMapSize{};
179 std::size_t articleIndex{};
180 std::size_t tokenIndex{};
181 std::size_t processedDates{};
183 bool dateSaved{
false};
187 void saveAssociations();
188 [[nodiscard]] Results processDates();
189 void saveResults(
const Results& results);
192 void initQueries()
override;
193 void deleteQueries()
override;
196 void addArticlesForDate(
198 DateAssociationMap::iterator& dateIt,
199 const TextMap& articleMap,
200 const std::vector<std::string>& tokens,
201 std::queue<std::string>& warningsTo
203 DateAssociationMap::iterator addDate(
const std::string& date);
204 ArticleAssociationMap::iterator addArticleToDate(
205 const std::string& article,
206 DateAssociationMap::iterator date
209 const std::string& token,
210 Associations& associationsTo,
211 std::queue<std::string>& warningsTo
214 const DateAssociation& date,
218 const ArticleAssociation& article,
219 std::size_t& occurrencesTo,
220 std::vector<std::uint64_t>& catsCountersTo
222 void processTermOccurrence(
223 const ArticleAssociation& article,
224 std::uint64_t occurrence,
225 std::size_t& occurrencesTo,
226 std::vector<std::uint64_t>& catsCountersTo
228 void processCategory(
229 const ArticleAssociation& article,
230 std::uint64_t termOccurrence,
232 std::vector<std::uint64_t>& catsCountersTo
234 bool processCategoryOccurrence(
235 std::uint64_t termOccurrence,
236 std::uint64_t catOccurrence,
237 std::size_t catIndex,
238 std::vector<std::uint64_t>& catsCountersTo
241 void fillGap(
const std::string& table,
const std::string& date, std::size_t numColumns);
243 const std::string& table,
244 const std::string& date,
245 const std::vector<std::uint64_t>& dataSet,
246 std::size_t numColumns
void onAlgoPause() override
Does nothing.
Definition: AssocOverTime.cpp:191
constexpr auto assocOverTimeAddColumns
Number of extra columns included in a dataset (except date).
Definition: AssocOverTime.hpp:71
std::string_view getName() const override
Returns the name of the algorithm.
Definition: AssocOverTime.cpp:75
constexpr auto assocOverTimeMinColumns
Minimum number of columns included in a dataset (including date).
Definition: AssocOverTime.hpp:74
Namespace for algorithm classes.
Definition: All.cpp:52
Empty algorithm template.
Definition: AssocOverTime.hpp:87
Abstract class providing thread functionality to algorithm (child) classes.
Definition: Thread.hpp:84
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
void onAlgoUnpause() override
Does nothing.
Definition: AssocOverTime.cpp:194
void onAlgoInitTarget() override
Initializes the target table for the algorithm.
Definition: AssocOverTime.cpp:89
Text map entry.
Definition: TextMap.hpp:49
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
Class for analyzer exceptions to be used by algorithms.
Definition: Thread.hpp:242
AssocOverTime(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Continues a previously interrupted algorithm run.
Definition: AssocOverTime.cpp:44
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
std::vector< TextMapEntry > TextMap
A text map is defined as a vector of text map entries.
Definition: TextMap.hpp:280
void parseAlgoOption() override
Parses a configuration option for the algorithm.
Definition: AssocOverTime.cpp:204
void onAlgoClear() override
Does nothing.
Definition: AssocOverTime.cpp:197
void onAlgoInit() override
Generates the corpus.
Definition: AssocOverTime.cpp:125
void onAlgoTick() override
Calculates the associations in the text corpus.
Definition: AssocOverTime.cpp:177
void checkAlgoOptions() override
Checks the configuration options for the algorithm.
Definition: AssocOverTime.cpp:220
Structure to identify a query including its type and result type(s).
Definition: QueryStruct.hpp:40
constexpr auto assocOverTimeUpdateProgressEvery
Indicates, while saving, after how many rows the progress of the thread will be updated.
Definition: AssocOverTime.hpp:68
void resetAlgo() override
Resets the algorithm.
Definition: AssocOverTime.cpp:299