32 #ifndef MODULE_ANALYZER_THREAD_HPP_ 33 #define MODULE_ANALYZER_THREAD_HPP_ 38 #include "../Config.hpp" 39 #include "../Thread.hpp" 41 #include "../../Data/Corpus.hpp" 42 #include "../../Data/Data.hpp" 43 #include "../../Helper/CommaLocale.hpp" 44 #include "../../Helper/DateTime.hpp" 45 #include "../../Helper/Json.hpp" 46 #include "../../Helper/Memory.hpp" 47 #include "../../Main/Exception.hpp" 48 #include "../../Network/FTPUpload.hpp" 49 #include "../../Query/Container.hpp" 50 #include "../../Struct/CorpusProperties.hpp" 51 #include "../../Struct/QueryProperties.hpp" 52 #include "../../Struct/QueryStruct.hpp" 53 #include "../../Struct/StatusSetter.hpp" 54 #include "../../Struct/ThreadOptions.hpp" 55 #include "../../Struct/ThreadStatus.hpp" 56 #include "../../Timer/Simple.hpp" 58 #include "../../_extern/rapidjson/include/rapidjson/document.h" 69 #include <string_view> 135 virtual std::string_view
getName()
const = 0;
156 const std::vector<std::uint64_t>& queryIds,
157 std::vector<QueryStruct>& propertiesTo
250 std::vector<QueryStruct> queryFilterQueries;
253 std::chrono::time_point<std::chrono::steady_clock> idleStart{};
256 void setUpConfig(std::queue<std::string>& warningsTo);
258 void setUpDatabase();
260 void setUpSqlStatements();
262 void setUpAlgorithm();
263 void logWarnings(std::queue<std::string>& warnings);
266 void addCorpus(std::size_t index,
StatusSetter& statusSetter);
268 void filterCorpusByQuery(std::size_t index,
StatusSetter& statusSetter);
271 template<
typename Allocator>
272 [[nodiscard]]
static rapidjson::Value createJSONValue(
275 const std::string& originalType,
278 rapidjson::Value result;
282 result.SetBool(value.
_b);
287 result.SetInt(value.
_i32);
292 result.SetUint(value.
_ui32);
297 result.SetUint(value.
_i64);
302 result.SetUint(value.
_ui64);
307 result.SetDouble(value.
_d);
312 result.SetString(value.
_s, allocator);
317 throw Thread::Exception(
"Cannot write unknown data type '" + originalType +
"' to JSON");
32-bit integer.
Definition: Data.hpp:74
bool addCorpora(bool isCombine, StatusSetter &statusSetter)
Gets the contents of all corpora, filters and combines them if necessary.
Definition: Thread.cpp:318
Query properties containing its name, text, type, and result type(s).
Definition: QueryProperties.hpp:39
std::uint32_t _ui32
Unsigned 32-bit integer value.
Definition: Data.hpp:106
Namespace for analyzer classes.
virtual void onAlgoInit()=0
Initializes the algorithm.
virtual void onAlgoTick()=0
Performs an algorithm tick.
virtual void onAlgoUnpause()=0
Unpauses the algorithm.
virtual void onAlgoPause()=0
Pauses the algorithm.
virtual void onAlgoClear()=0
Clears the algorithm.
String.
Definition: Data.hpp:89
Boolean value.
Definition: Data.hpp:71
std::vector< Corpus > corpora
Vector of corpora for the analyzer thread.
Definition: Thread.hpp:121
Query container.
Definition: Container.hpp:76
std::string getTargetTableName() const
Gets the full name of the target table.
Definition: Thread.cpp:294
Abstract class providing thread functionality to algorithm (child) classes.
Definition: Thread.hpp:84
void onTick() override
Performs an algorithm tick.
Definition: Thread.cpp:100
void cleanUpQueries()
Clean up all queries and free their memory.
Definition: Thread.cpp:486
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
std::int64_t _i64
64-bit integer value.
Definition: Data.hpp:109
void onReset() override
Resets the algorithm.
Definition: Thread.cpp:163
void onInit() override
Initializes the analyzer, the target table, and the algorithm.
Definition: Thread.cpp:76
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
constexpr auto combineUpdateStatusEvery
The number of tokens after which the status will be updated when combining corpora.
Definition: Thread.hpp:79
virtual std::string_view getName() const =0
Returns the name of the algorithm.
Corpus properties containing the type, table, and column name of its source.
Definition: CorpusProperties.hpp:41
Unsigned 32-bit integer.
Definition: Data.hpp:77
void deleteQueries() override
Does nothing.
Definition: Thread.cpp:190
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
Abstract class providing module-independent thread functionality.
Definition: Thread.hpp:93
Class representing a text corpus.
Definition: Corpus.hpp:165
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
Type
Data types.
Definition: Data.hpp:66
std::uint64_t _ui64
Unsigned 64-bit integer value.
Definition: Data.hpp:112
Class for analyzer exceptions to be used by algorithms.
Definition: Thread.hpp:242
void onPause() override
Pauses the analyzer.
Definition: Thread.cpp:132
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
Unsigned 64-bit integer.
Definition: Data.hpp:83
double _d
Floating point value (with double precision).
Definition: Data.hpp:115
Floating point value (with double precision).
Definition: Data.hpp:86
Class providing database functionality for analyzer threads by implementing Wrapper::Database.
Definition: Database.hpp:188
std::string _s
String value.
Definition: Data.hpp:119
Database database
Database connection for the analyzer thread.
Definition: Thread.hpp:114
virtual void onAlgoInitTarget()=0
Initializes the target table for the algorithm.
void checkCorpusSources(StatusSetter &statusSetter)
Checks the specified sources for creating the corpus.
Definition: Thread.cpp:359
std::int32_t _i32
32-bit integer value.
Definition: Data.hpp:103
void onUnpause() override
Unpauses the analyzer.
Definition: Thread.cpp:141
void addOptionalQuery(std::uint64_t queryId, QueryStruct &propertiesTo)
Adds an optional query.
Definition: Thread.cpp:205
void uploadResult()
Upload the specified result via FTP.
Definition: Thread.cpp:374
Structure to identify a query including its type and result type(s).
Definition: QueryStruct.hpp:40
bool _b
Boolean value.
Definition: Data.hpp:100
Abstract configuration for analyzers, to be implemented by algorithm classes.
Definition: Config.hpp:103
64-bit integer.
Definition: Data.hpp:80
Thread(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Constructor initializing a previously interrupted analyzer thread.
Definition: Thread.cpp:44
void initQueries() override
Does nothing.
Definition: Thread.cpp:183
A generic value.
Definition: Data.hpp:96
void onClear() override
Clears the algorithm.
Definition: Thread.cpp:150
void addQueries(const std::vector< std::uint64_t > &queryIds, std::vector< QueryStruct > &propertiesTo)
Adds multiple queries at once, ignoring empty ones.
Definition: Thread.cpp:227
void cleanUpCorpora()
Clean up all corpora and free their memory.
Definition: Thread.cpp:481
void pause()
Pauses the thread.
Definition: Thread.cpp:281
void finished()
Sets the status of the analyzer to finished.
Definition: Thread.cpp:257