31 #ifndef MODULE_PARSER_THREAD_HPP_ 32 #define MODULE_PARSER_THREAD_HPP_ 37 #include "../Thread.hpp" 39 #include "../../Helper/CommaLocale.hpp" 40 #include "../../Helper/DateTime.hpp" 41 #include "../../Helper/DotLocale.hpp" 42 #include "../../Helper/Json.hpp" 43 #include "../../Helper/Strings.hpp" 44 #include "../../Main/Exception.hpp" 45 #include "../../Query/Container.hpp" 46 #include "../../Struct/DataEntry.hpp" 47 #include "../../Struct/QueryProperties.hpp" 48 #include "../../Struct/QueryStruct.hpp" 49 #include "../../Struct/StatusSetter.hpp" 50 #include "../../Struct/ThreadOptions.hpp" 51 #include "../../Struct/ThreadStatus.hpp" 52 #include "../../Timer/Simple.hpp" 64 #include <string_view> 104 using IdString = std::pair<std::uint64_t, std::string>;
167 std::string parsingTable;
168 std::string targetTable;
175 std::vector<QueryStruct> queriesSkip;
176 std::vector<QueryStruct> queriesContentIgnore;
177 std::vector<QueryStruct> queriesId;
178 std::vector<QueryStruct> queriesDateTime;
179 std::vector<QueryStruct> queriesFields;
182 std::uint64_t tickCounter{};
183 std::chrono::steady_clock::time_point startTime{std::chrono::steady_clock::time_point::min()};
184 std::chrono::steady_clock::time_point pauseTime{std::chrono::steady_clock::time_point::min()};
185 std::chrono::steady_clock::time_point idleTime{std::chrono::steady_clock::time_point::min()};
189 bool idFromUrlOnly{
false};
190 std::uint64_t lastUrl{};
191 std::string lockTime;
194 std::uint64_t idFirst{};
195 std::uint64_t idDist{};
197 std::uint64_t posDist{};
198 std::uint64_t total{};
201 void setUpConfig(std::queue<std::string>& warningsTo);
203 void setUpContainer();
204 void setUpDatabase();
205 void setUpTableNames();
207 void setUpSqlStatements();
209 void checkParsingTable();
212 void logWarnings(std::queue<std::string>& warnings);
215 void initQueries()
override;
216 void deleteQueries()
override;
218 const std::vector<std::uint64_t>& queryIds,
219 std::vector<QueryStruct>& propertiesTo
222 const std::vector<std::uint64_t>& queryIds,
223 std::vector<QueryStruct>& propertiesTo
226 std::string_view type,
227 const std::vector<std::string>& names,
228 const std::vector<std::uint64_t>& queryIds,
229 std::vector<QueryStruct>& propertiesTo
233 void parsingUrlSelection();
234 void parsingFetchUrls();
235 void parsingCheckUrls();
236 std::size_t parsingNext();
237 bool parsingContent(
const IdString& content, std::string_view parsedId);
238 void parsingUrlFinished(
bool success);
239 void parsingSaveResults(
bool warped);
240 void parsingFieldWarning(
241 std::string_view error,
242 std::string_view name,
void onReset() override
Resets the parser.
Definition: Thread.cpp:398
Query properties containing its name, text, type, and result type(s).
Definition: QueryProperties.hpp:39
A data entry containing either parsed or extracted data.
Definition: DataEntry.hpp:45
Parser thread.
Definition: Thread.hpp:87
Class for query container exceptions.
Definition: Container.hpp:148
Query container.
Definition: Container.hpp:76
std::queue< DataEntry > results
Parsed data that has not yet been written to the database.
Definition: Thread.hpp:147
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
Namespace for parser classes.
Definition: Config.hpp:43
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
Configuration for parsers.
Definition: Config.hpp:92
Class providing database functionality for parser threads by implementing Wrapper::Database.
Definition: Database.hpp:139
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
Abstract class providing module-independent thread functionality.
Definition: Thread.hpp:93
Class for JSON exceptions.
Definition: Json.hpp:136
void onUnpause() override
Unpauses the parser.
Definition: Thread.cpp:319
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
Thread(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Constructor initializing a previously interrupted parser thread.
Definition: Thread.cpp:50
void onInit() override
Initializes the parser.
Definition: Thread.cpp:83
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
Class for date/time locale exception.
Definition: DateTime.hpp:337
std::queue< IdString > finished
Queue of URLs in the cache that have been finished.
Definition: Thread.hpp:150
void onPause() override
Pauses the parser.
Definition: Thread.cpp:307
Structure to identify a query including its type and result type(s).
Definition: QueryStruct.hpp:40
std::queue< IdString > urls
Queue of URLs in the cache to still be processed, and their IDs.
Definition: Thread.hpp:138
std::string cacheLockTime
The time until which the URLs in the cache are locked, as string.
Definition: Thread.hpp:144
void onClear() override
Clears the parser.
Definition: Thread.cpp:332
void onTick() override
Performs a parser tick.
Definition: Thread.cpp:120
Class for date/time exceptions.
Definition: DateTime.hpp:330
Template class for safe in-scope database locks.
Definition: DatabaseLock.hpp:54
Database database
Database connection for the parser thread.
Definition: Thread.hpp:131
constexpr std::uint8_t updateContentCounterEvery
The number of processed contents after which the thread status will be updated.
Definition: Thread.hpp:78