32 #ifndef MODULE_ANALYZER_DATABASE_HPP_ 33 #define MODULE_ANALYZER_DATABASE_HPP_ 37 #include "../../Data/Corpus.hpp" 38 #include "../../Data/Data.hpp" 39 #include "../../Helper/CommaLocale.hpp" 40 #include "../../Helper/Container.hpp" 41 #include "../../Helper/Json.hpp" 42 #include "../../Helper/Memory.hpp" 43 #include "../../Helper/Portability/mysqlcppconn.h" 44 #include "../../Main/Exception.hpp" 45 #include "../../Struct/CorpusProperties.hpp" 46 #include "../../Struct/StatusSetter.hpp" 47 #include "../../Struct/TableColumn.hpp" 48 #include "../../Struct/TargetTableProperties.hpp" 49 #include "../../Struct/TextMap.hpp" 50 #include "../../Timer/Simple.hpp" 51 #include "../../Wrapper/Database.hpp" 52 #include "../../Wrapper/DatabaseLock.hpp" 54 #include "../../_extern/rapidjson/include/rapidjson/document.h" 56 #include <cppconn/exception.h> 57 #include <cppconn/prepared_statement.h> 58 #include <cppconn/resultset.h> 59 #include <cppconn/statement.h> 60 #include <mysql_connection.h> 69 #include <unordered_map> 202 using IsRunningCallback = std::function<bool()>;
203 using SentenceMap = std::vector<std::pair<std::size_t, std::size_t>>;
204 using SqlResultSetPtr = std::unique_ptr<sql::ResultSet>;
205 using StringString = std::pair<std::string, std::string>;
235 const std::string& name,
236 const std::vector<StringString>& fields,
255 const std::string& filterDateFrom,
256 const std::string& filterDateTo,
258 std::size_t& sourcesTo,
267 [[nodiscard]] std::string
getSourceTableName(std::uint16_t type,
const std::string& name)
const;
268 [[nodiscard]]
static std::string
getSourceColumnName(std::uint16_t type,
const std::string& name);
270 std::vector<std::uint8_t>& types,
271 std::vector<std::string>& tables,
272 std::vector<std::string>& columns
312 std::size_t getCorpusInfo{};
313 std::size_t checkCorpusSavePoint{};
314 std::size_t getCorpusFirst{};
315 std::size_t getCorpusSavePoint{};
316 std::size_t getCorpusNext{};
317 std::size_t isCorpusChanged{};
318 std::size_t isCorpusChangedParsing{};
319 std::size_t isCorpusChangedExtracting{};
320 std::size_t isCorpusChangedAnalyzing{};
321 std::size_t deleteCorpus{};
322 std::size_t addChunkContinuous{};
323 std::size_t addChunkTokenized{};
324 std::size_t measureChunk{};
325 std::size_t measureCorpus{};
326 std::size_t updateTargetTable{};
327 std::size_t getTargetTableUpdated{};
328 std::size_t updateAdditionalTable{};
332 IsRunningCallback isRunning;
335 std::string corporaLastUpdated;
340 const std::string& table,
341 const std::string& column
345 [[nodiscard]]
bool corpusIsChanged(
351 std::size_t& sourcesTo,
357 std::size_t& sourcesTo,
360 [[nodiscard]] std::string corpusFindSavePoint(
362 const std::string& corpusCreationTime
364 [[nodiscard]]
bool corpusManipulate(
367 std::size_t numSources,
370 void corpusSaveSavePoint(
373 std::size_t numSources,
374 const std::string& savePoint,
constexpr auto sqlArg12
Twelfth argument in a SQL query.
Definition: Database.hpp:160
std::string getSourceTableName(std::uint16_t type, const std::string &name) const
Public helper function getting the full name of a source table.
Definition: Database.cpp:1051
std::uint64_t targetTableId
The ID of the target table to be written to.
Definition: Database.hpp:291
constexpr auto column2
Second column in a table.
Definition: Database.hpp:170
Namespace for analyzer classes.
constexpr auto corpusSlicingFactor
The factor used for corpus slicing percentage points (1/100).
Definition: Database.hpp:86
std::string targetTableFull
The full name of the target table to be written to, including prefixes.
Definition: Database.hpp:294
constexpr auto maxNumCorpusColumns
The maximum number of columns used when creating a text corpus.
Definition: Database.hpp:89
std::string getCorporaLastUpdated() const
Gets the last update date/time over the sources of all corpora.
Definition: Database.cpp:1022
void prepare()
Prepares the SQL statements for the analyzer.
Definition: Database.cpp:554
constexpr auto progressCreatedCorpus
The progress with creating a corpus after the server created the corpus.
Definition: Database.hpp:101
constexpr auto sqlArg2
Second argument in a SQL query.
Definition: Database.hpp:130
constexpr auto progressGeneratedSavePoint
The progress of saving a savepoint after generating it.
Definition: Database.hpp:115
constexpr auto progressMovedData
The progress with creating a corpus after the data has been moved.
Definition: Database.hpp:98
std::string targetTableName
The name of the target table to be written to.
Definition: Database.hpp:288
Database(Module::Database &dbThread)
Constructor setting the database connection for the thread.
Definition: Database.cpp:44
constexpr auto sqlArg6
Sixth argument in a SQL query.
Definition: Database.hpp:142
Target table properties containing its type, website, URL list, table names, columns, and compression.
Definition: TargetTableProperties.hpp:44
void setTargetTable(const std::string &table)
Sets the name of the target table.
Definition: Database.cpp:57
Class handling database access for threads.
Definition: Database.hpp:91
std::unordered_map< std::size_t, std::string > additionalTables
The IDs and full names of additional tables to write data to.
Definition: Database.hpp:307
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
Corpus properties containing the type, table, and column name of its source.
Definition: CorpusProperties.hpp:41
Class for JSON exceptions.
Definition: Json.hpp:136
void initTargetTable(bool isCompressed, bool isDelete)
Creates the target table, or adds the field columns, if they do not exist already.
Definition: Database.cpp:136
constexpr auto sqlArg8
Eighth argument in a SQL query.
Definition: Database.hpp:148
constexpr auto defaultCorpusSlicing
The default percentage of the maximum package size allowed by the MySQL server to be used for the max...
Definition: Database.hpp:83
void setTargetFields(const std::vector< StringString > &fields)
Sets the fields of the target table and their types.
Definition: Database.cpp:82
Class representing a text corpus.
Definition: Corpus.hpp:165
bool getCorpus(const CorpusProperties &corpusProperties, const std::string &filterDateFrom, const std::string &filterDateTo, Data::Corpus &corpusTo, std::size_t &sourcesTo, StatusSetter &statusSetter)
Gets the text corpus after creating it if it is out-of-date or does not yet exist.
Definition: Database.cpp:922
Type
Data types.
Definition: Data.hpp:66
constexpr auto progressReceivedSources
The progress with creating a corpus after the source texts have been received.
Definition: Database.hpp:95
constexpr auto numColumns2
Two table columns.
Definition: Database.hpp:179
Structure for table columns containing its name, type, reference, and indexing.
Definition: TableColumn.hpp:39
constexpr auto sqlArg4
Fourth argument in a SQL query.
Definition: Database.hpp:136
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
std::string getTargetTableUpdated()
Gets the date/time when the target table has been updated last.
Definition: Database.cpp:278
Wrapper class providing the database functionality of Module::Database to its child classes...
Definition: Database.hpp:72
std::vector< TextMapEntry > TextMap
A text map is defined as a vector of text map entries.
Definition: TextMap.hpp:280
void checkSources(std::vector< std::uint8_t > &types, std::vector< std::string > &tables, std::vector< std::string > &columns)
Public helper function checking the given data sources.
Definition: Database.cpp:1152
std::string tablePrefix
The prefix used for tables in the MySQL database.
Definition: Database.hpp:285
Class providing database functionality for analyzer threads by implementing Wrapper::Database.
Definition: Database.hpp:188
void setIsRunningCallback(const IsRunningCallback &isRunningCallback)
Sets the callback function for checking whether the thread is still running.
Definition: Database.cpp:104
constexpr auto progressDeletedCorpus
The progress with creating a corpus after the old corpus has been deleted.
Definition: Database.hpp:92
constexpr auto sqlArg1
First argument in a SQL query.
Definition: Database.hpp:127
constexpr auto progressAddingCorpus
The remaining progress, attributed to adding the corpus to the database.
Definition: Database.hpp:107
std::vector< StringString > targetFields
The names and types of the target fields, i.e. the columns in the target table to be written to...
Definition: Database.hpp:297
static std::string getSourceColumnName(std::uint16_t type, const std::string &name)
Public helper function getting the full name of a source column.
Definition: Database.cpp:1093
constexpr auto numColumns1
One table column.
Definition: Database.hpp:176
constexpr auto sqlArg9
Ninth argument in a SQL query.
Definition: Database.hpp:151
constexpr auto sqlArg3
Third argument in a SQL query.
Definition: Database.hpp:133
constexpr auto progressReceivedCorpus
The progress with getting an existing corpus after its contents have been received from the database...
Definition: Database.hpp:112
const std::string & getAdditionalTableName(std::size_t id) const
Gets the full name of an additional table.
Definition: Database.cpp:465
constexpr auto column1
First column in a table.
Definition: Database.hpp:167
void updateTargetTable()
Updates the target table.
Definition: Database.cpp:243
constexpr auto column3
Third column in a table.
Definition: Database.hpp:173
void updateAdditionalTable(std::size_t id)
Updates an additional table.
Definition: Database.cpp:500
constexpr auto progressSlicedCorpus
The progress with creating a corpus after the corpus has been sliced.
Definition: Database.hpp:104
constexpr auto sqlArg5
Fifth argument in a SQL query.
Definition: Database.hpp:139
Template class for safe in-scope database locks.
Definition: DatabaseLock.hpp:54
void setCorpusSlicing(std::uint8_t percentageOfMaxAllowedPackageSize)
Sets the size of corpus chunks, in percentage of the maximum package size allowed by the MySQL server...
Definition: Database.cpp:94
constexpr auto sqlArg10
Tenth argument in a SQL query.
Definition: Database.hpp:154
constexpr auto sqlArg7
Seventh argument in a SQL query.
Definition: Database.hpp:145
std::uint8_t corpusSlicing
The maximum size of the text corpus chunks, in percentage of the maximum package size allowed by the ...
Definition: Database.hpp:304
constexpr auto progressSavingSavePoint
The remaining progress, attributed to saving a savepoint to the database.
Definition: Database.hpp:118
std::size_t addAdditionalTable(const std::string &name, const std::vector< StringString > &fields, bool isCompressed, bool isDelete)
Creates an additional table, or adds its field columns, if they do not exist already.
Definition: Database.cpp:348
constexpr auto sqlArg11
Eleventh argument in a SQL query.
Definition: Database.hpp:157