crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Database.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2023 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * Database.hpp
24  *
25  * Class handling database access for the command-and-control server and its threads.
26  * Thread-specific functionality is not implemented in this (parent) class.
27  *
28  * NOT THREAD-SAFE!
29  * Use only one instance per thread.
30  * Use instances of the child class Module::Database for module-specific functionality functionality instead.
31  *
32  * Created on: Sep 29, 2018
33  * Author: ans
34  */
35 
36 #ifndef MAIN_DATABASE_HPP_
37 #define MAIN_DATABASE_HPP_
38 
39 // optional debugging options
40 //#define MAIN_DATABASE_DEBUG_REQUEST_COUNTER // enable database request counter for debugging purposes
41 //#define MAIN_DATABASE_DEBUG_DEADLOCKS // enable documentation of deadlocks by writing hashes ('#') to stdout
42 #define MAIN_DATABASE_LOG_MOVING // log the moving of websites from one data directory to another to stdout
43 
44 #include "Exception.hpp"
45 #include "Version.hpp"
46 
47 #include "../Data/Data.hpp"
48 #include "../Helper/CommaLocale.hpp"
49 #include "../Helper/Container.hpp"
50 #include "../Helper/FileSystem.hpp"
51 #include "../Helper/Json.hpp"
52 #include "../Helper/Portability/locale.h"
53 #include "../Helper/Portability/mysqlcppconn.h"
54 #include "../Helper/Strings.hpp"
55 #include "../Helper/Utf8.hpp"
56 #include "../Helper/Versions.hpp"
57 #include "../Struct/ConfigProperties.hpp"
58 #include "../Struct/DatabaseSettings.hpp"
59 #include "../Struct/QueryProperties.hpp"
60 #include "../Struct/TableColumn.hpp"
61 #include "../Struct/TableProperties.hpp"
62 #include "../Struct/TargetTableProperties.hpp"
63 #include "../Struct/ThreadDatabaseEntry.hpp"
64 #include "../Struct/ThreadOptions.hpp"
65 #include "../Struct/ThreadStatus.hpp"
66 #include "../Struct/UrlListProperties.hpp"
67 #include "../Struct/WebsiteProperties.hpp"
68 #include "../Timer/Simple.hpp"
69 #include "../Wrapper/DatabaseLock.hpp"
70 #include "../Wrapper/DatabaseTryLock.hpp"
71 #include "../Wrapper/PreparedSqlStatement.hpp"
72 
73 #include "../_extern/rapidjson/include/rapidjson/document.h"
74 
75 #include <cppconn/driver.h>
76 #include <cppconn/exception.h>
77 #include <cppconn/prepared_statement.h>
78 #include <cppconn/resultset.h>
79 #include <cppconn/statement.h>
80 #include <mysql_connection.h>
81 
82 #include <algorithm> // std::find, std::find_if, std::for_each, std::remove, std::sort, std::transform, std::unique
83 #include <cctype> // std::tolower
84 #include <chrono> // std::chrono
85 #include <cmath> // std::isnan
86 #include <cstddef> // std::size_t
87 #include <cstdint> // std::uint8_t, std::uint32_t, std::uint64_t
88 #include <fstream> // std::ifstream
89 #include <functional> // std::function
90 #include <iostream> // std::cout, std::endl, std::flush
91 #include <memory> // std::unique_ptr
92 #include <mutex> // std::lock_guard, std::mutex
93 #include <queue> // std::queue
94 #include <sstream> // std::istringstream, std::ostringstream
95 #include <stdexcept> // std::logic_error
96 #include <string> // std::getline, std::stoull, std::string, std::to_string
97 #include <string_view> // std::string_view, std::string_view_literals
98 #include <thread> // std::this_thread
99 #include <tuple> // std::get<...>
100 #include <utility> // std::pair, std::swap
101 #include <vector> // std::vector
102 
103 // optional header
104 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
105 #include <atomic> // std::atomic
106 #endif
107 
108 // forward-declaration for being friends
110 
111  class Database;
112 
113 } /* namespace crawlservpp::Wrapper */
114 
115 namespace crawlservpp::Main {
116 
117  /*
118  * CONSTANTS
119  */
120 
121  using std::string_view_literals::operator""sv;
122 
125 
127  inline constexpr auto sqlDir{"sql"sv};
128 
130  inline constexpr auto sqlExtension{".sql"sv};
131 
133  inline constexpr auto lockTimeOutSec{300};
134 
136  inline constexpr auto reconnectAfterIdleMs{600000};
137 
139  inline constexpr auto sleepOnLockMs{250};
140 
142  inline constexpr auto maxContentSize{1073741824};
143 
145  inline constexpr auto maxContentSizeString{"1 GiB"sv};
146 
148  inline constexpr auto wwwPrefix{"www."sv};
149 
151  inline constexpr auto numUrlListTables{6};
152 
154  inline constexpr auto sqlConstraint{"CONSTRAINT "sv};
155 
157  inline constexpr auto secToMs{1000};
158 
160  inline constexpr auto sleepOnDeadLockMs{250};
161 
163  inline constexpr auto maxColumnsUrlList{6};
164 
166  inline constexpr auto numArgsAddUrl{4};
167 
171 
173  inline constexpr auto nAtOnce10{10};
174 
176  inline constexpr auto nAtOnce100{100};
177 
179  inline constexpr auto nAtOnce500{500};
180 
182  inline constexpr auto sqlArg1{1};
183 
185  inline constexpr auto sqlArg2{2};
186 
188  inline constexpr auto sqlArg3{3};
189 
191  inline constexpr auto sqlArg4{4};
192 
194  inline constexpr auto sqlArg5{5};
195 
197  inline constexpr auto sqlArg6{6};
198 
200  inline constexpr auto sqlArg7{7};
201 
203  inline constexpr auto sqlArg8{8};
204 
206  inline constexpr auto sqlArg9{9};
207 
211 
213  inline constexpr auto sqlSortAborted{1027};
214 
216  inline constexpr auto sqlTooManyConnections{1040};
217 
219  inline constexpr auto sqlCannotGetHostName{1042};
220 
222  inline constexpr auto sqlBadHandShake{1043};
223 
225  inline constexpr auto sqlServerShutDown{1053};
226 
228  inline constexpr auto sqlNormalShutdown{1077};
229 
231  inline constexpr auto sqlGotSignal{1078};
232 
234  inline constexpr auto sqlShutDownComplete{1079};
235 
237  inline constexpr auto sqlForcingCloseOfThread{1080};
238 
240  inline constexpr auto sqlCannotCreateIPSocket{1081};
241 
243  inline constexpr auto sqlAbortedConnection{1152};
244 
246  inline constexpr auto sqlReadErrorFromConnectionPipe{1154};
247 
249  inline constexpr auto sqlPacketsOutOfOrder{1156};
250 
252  inline constexpr auto sqlCouldNotUncompressPackets{1157};
253 
255  inline constexpr auto sqlErrorReadingPackets{1158};
256 
258  inline constexpr auto sqlTimeOutReadingPackets{1159};
259 
261  inline constexpr auto sqlErrorWritingPackets{1160};
262 
264  inline constexpr auto sqlTimeOutWritingPackets{1161};
265 
267  inline constexpr auto sqlNewAbortedConnection{1184};
268 
270  inline constexpr auto sqlNetErrorReadingFromMaster{1189};
271 
273  inline constexpr auto sqlNetErrorWritingToMaster{1190};
274 
276  inline constexpr auto sqlMoreThanMaxUserConnections{1203};
277 
279  inline constexpr auto sqlLockWaitTimeOutExceeded{1205};
280 
282  inline constexpr auto sqlNumOfLocksExceedsLockTableSize{1206};
283 
285  inline constexpr auto sqlDeadLock{1213};
286 
288  inline constexpr auto sqlServerErrorConnectingToMaster{1218};
289 
291  inline constexpr auto sqlQueryExecutionInterrupted{1317};
292 
294  inline constexpr auto sqlUnableToConnectToForeignDataSource{1429};
295 
297  inline constexpr auto sqlCannotConnectToServerThroughSocket{2002};
298 
300  inline constexpr auto sqlCannotConnectToServer{2003};
301 
303  inline constexpr auto sqlUnknownServerHost{2005};
304 
306  inline constexpr auto sqlServerHasGoneAway{2006};
307 
309  inline constexpr auto sqlTCPError{2011};
310 
312  inline constexpr auto sqlErrorInServerHandshake{2012};
313 
315  inline constexpr auto sqlLostConnectionDuringQuery{2013};
316 
318  inline constexpr auto sqlClientErrorConnectingToSlave{2024};
319 
321  inline constexpr auto sqlClientErrorConnectingToMaster{2025};
322 
324  inline constexpr auto sqlSSLConnectionError{2026};
325 
327  inline constexpr auto sqlMalformedPacket{2027};
328 
330  inline constexpr auto sqlInvalidConnectionHandle{2048};
331 
335 
337  inline constexpr auto sqlStorageEngineError{1030};
338 
340  inline constexpr auto sqlInsufficientPrivileges{1045};
341 
343  inline constexpr auto sqlWrongArguments{1210};
344 
346  inline constexpr auto sqlIncorrectPath{1525};
347 
349 
350  /*
351  * DECLARATION
352  */
353 
355 
366  class Database {
368  friend class Wrapper::Database;
369 
371  template<class DB> friend class Wrapper::DatabaseLock;
372 
374  template<class DB> friend class Wrapper::DatabaseTryLock;
375 
376  // for convenience
378 
390 
391  using IdPairs = std::vector<std::pair<std::uint64_t, std::uint64_t>>;
392  using IdString = std::pair<std::uint64_t, std::string>;
393  using IsRunningCallback = std::function<bool()>;
394  using SqlPreparedStatementPtr = std::unique_ptr<sql::PreparedStatement>;
395  using SqlResultSetPtr = std::unique_ptr<sql::ResultSet>;
396  using SqlStatementPtr = std::unique_ptr<sql::Statement>;
397  using StringString = std::pair<std::string, std::string>;
398  using StringQueueOfStrings = std::pair<std::string, std::queue<std::string>>;
399  using TableNameWriteAccess = std::pair<std::string, bool>;
400  using Queries = std::vector<std::pair<std::string, std::vector<StringString>>>;
401 
402  public:
405 
406  Database(const DatabaseSettings& dbSettings, const std::string& dbModule);
407  virtual ~Database();
408 
412 
413  void setSleepOnError(std::uint64_t seconds);
414  void setTimeOut(std::uint64_t milliseconds);
415 
419 
420  [[nodiscard]] const DatabaseSettings& getSettings() const;
421  [[nodiscard]] const std::string& getDriverVersion() const;
422  [[nodiscard]] const std::string& getDataDir() const;
423  [[nodiscard]] std::uint64_t getMaxAllowedPacketSize() const;
424  [[nodiscard]] std::uint64_t getConnectionId() const;
425 
429 
430  void connect();
431  void initializeSql();
432  void prepare();
433  void update();
434 
438 
439  void log(const std::string& logEntry);
440  void log(const std::string& logModule, const std::string& logEntry);
441  [[nodiscard]] std::uint64_t getNumberOfLogEntries(const std::string& logModule);
442  void clearLogs(const std::string& logModule);
443 
447 
448  [[nodiscard]] std::vector<ThreadDatabaseEntry> getThreads();
449  std::uint64_t addThread(const ThreadOptions& threadOptions);
450  [[nodiscard]] std::uint64_t getThreadRunTime(std::uint64_t threadId);
451  [[nodiscard]] std::uint64_t getThreadPauseTime(std::uint64_t threadId);
452  void setThreadStatus(
453  std::uint64_t threadId,
454  bool threadPaused,
455  const std::string& threadStatusMessage
456  );
457  void setThreadStatus(std::uint64_t threadId, const std::string& threadStatusMessage);
458  void setThreadRunTime(std::uint64_t threadId, std::uint64_t threadRunTime);
459  void setThreadPauseTime(std::uint64_t threadId, std::uint64_t threadPauseTime);
460  void deleteThread(std::uint64_t threadId);
461 
465 
466  std::uint64_t addWebsite(const WebsiteProperties& websiteProperties);
467  [[nodiscard]] std::string getWebsiteDomain(std::uint64_t id);
468  [[nodiscard]] std::string getWebsiteNamespace(std::uint64_t websiteId);
469  [[nodiscard]] std::uint64_t getWebsiteFromUrlList(std::uint64_t listId);
470  [[nodiscard]] IdString getWebsiteNamespaceFromUrlList(std::uint64_t listId);
471  [[nodiscard]] IdString getWebsiteNamespaceFromConfig(std::uint64_t configId);
472  [[nodiscard]] IdString getWebsiteNamespaceFromTargetTable(
473  const std::string& type,
474  std::uint64_t tableId
475  );
476  [[nodiscard]] bool isWebsiteNamespace(const std::string& nameSpace);
477  [[nodiscard]] std::string duplicateWebsiteNamespace(const std::string& websiteNamespace);
478  [[nodiscard]] std::string getWebsiteDataDirectory(std::uint64_t websiteId);
479  [[nodiscard]] std::uint64_t getChangedUrlsByWebsiteUpdate(
480  std::uint64_t websiteId,
481  const WebsiteProperties& websiteProperties
482  );
483  [[nodiscard]] std::uint64_t getLostUrlsByWebsiteUpdate(
484  std::uint64_t websiteId,
485  const WebsiteProperties& websiteProperties
486  );
487  void updateWebsite(std::uint64_t websiteId, const WebsiteProperties& websiteProperties);
488  void deleteWebsite(std::uint64_t websiteId);
489  std::uint64_t duplicateWebsite(std::uint64_t websiteId, const Queries& queries);
490  void moveWebsite(std::uint64_t websiteId, const WebsiteProperties& websiteProperties);
491 
495 
496  std::uint64_t addUrlList(std::uint64_t websiteId, const UrlListProperties& listProperties);
497  [[nodiscard]] std::queue<IdString> getUrlLists(std::uint64_t websiteId);
498  std::size_t mergeUrls(std::uint64_t listId, std::queue<std::string>& urls);
499  [[nodiscard]] std::queue<std::string> getUrls(std::uint64_t listId);
500  [[nodiscard]] std::queue<IdString> getUrlsWithIds(std::uint64_t listId);
501  [[nodiscard]] std::string getUrlListNamespace(std::uint64_t listId);
502  [[nodiscard]] IdString getUrlListNamespaceFromTargetTable(
503  const std::string& type,
504  std::uint64_t tableId
505  );
506  [[nodiscard]] bool isUrlListNamespace(std::uint64_t websiteId, const std::string& nameSpace);
507  void updateUrlList(std::uint64_t listId, const UrlListProperties& listProperties);
508  void deleteUrlList(std::uint64_t listId);
509  std::size_t deleteUrls(std::uint64_t listId, std::queue<uint64_t>& urlIds);
510  void resetParsingStatus(std::uint64_t listId);
511  void resetExtractingStatus(std::uint64_t listId);
512  void resetAnalyzingStatus(std::uint64_t listId);
513 
517 
518  std::uint64_t addQuery(std::uint64_t websiteId, const QueryProperties& queryProperties);
519  void getQueryProperties(std::uint64_t queryId, QueryProperties& queryPropertiesTo);
520  void updateQuery(std::uint64_t queryId, const QueryProperties& queryProperties);
521  void moveQuery(std::uint64_t queryId, std::uint64_t toWebsiteId);
522  void deleteQuery(std::uint64_t queryId);
523  std::uint64_t duplicateQuery(std::uint64_t queryId);
524 
528 
529  std::uint64_t addConfiguration(
530  std::uint64_t websiteId,
531  const ConfigProperties& configProperties
532  );
533  [[nodiscard]] std::string getConfiguration(std::uint64_t configId);
534  void updateConfiguration(std::uint64_t configId, const ConfigProperties& configProperties);
535  void deleteConfiguration(std::uint64_t configId);
536  std::uint64_t duplicateConfiguration(std::uint64_t configId);
537 
541 
542  std::uint64_t addOrUpdateTargetTable(const TargetTableProperties& properties);
543  [[nodiscard]] std::queue<IdString> getTargetTables(
544  const std::string& type,
545  std::uint64_t listId
546  );
547  [[nodiscard]] std::uint64_t getTargetTableId(
548  const std::string& type,
549  std::uint64_t listId,
550  const std::string& tableName
551  );
552  [[nodiscard]] std::string getTargetTableName(std::string_view type, std::uint64_t tableId);
553  void deleteTargetTable(const std::string& type, std::uint64_t tableId);
554 
558 
559  void checkConnection();
560  [[nodiscard]] bool isWebsite(std::uint64_t websiteId);
561  [[nodiscard]] bool isUrlList(std::uint64_t urlListId);
562  [[nodiscard]] bool isUrlList(std::uint64_t websiteId, std::uint64_t urlListId);
563  [[nodiscard]] bool isQuery(std::uint64_t queryId);
564  [[nodiscard]] bool isQuery(std::uint64_t websiteId, std::uint64_t queryId);
565  [[nodiscard]] bool isCorpus(std::uint64_t firstId, bool requireArticles, bool requireDates);
566  [[nodiscard]] bool isConfiguration(std::uint64_t configId);
567  [[nodiscard]] bool isConfiguration(std::uint64_t websiteId, std::uint64_t configId);
568  [[nodiscard]] bool isTargetTable(
569  std::string_view type,
570  std::uint64_t websiteId,
571  std::uint64_t urlListId,
572  std::uint64_t tableID
573  );
574  [[nodiscard]] bool checkDataDir(const std::string& dir);
575 
579 
580  void beginNoLock();
581  void endNoLock();
582 
586 
587  [[nodiscard]] bool isTableEmpty(const std::string& tableName);
588  [[nodiscard]] bool isTableExists(const std::string& tableName);
589  [[nodiscard]] bool isColumnExists(
590  const std::string& tableName,
591  const std::string& columnName
592  );
593  [[nodiscard]] std::string getColumnType(
594  const std::string& tableName,
595  const std::string& columnName
596  );
597  void readTableAsStrings(
598  const std::string& tableName,
599  std::vector<std::vector<std::string>>& contentsTo,
600  bool includeColumnNames
601  );
602  void readColumnAsStrings(
603  const std::string& tableName,
604  const std::string& columnName,
605  const std::string& condition,
606  std::vector<std::string>& contentsTo
607  );
608  void lockTables(std::queue<TableNameWriteAccess>& tableLocks);
609  void unlockTables();
610  void startTransaction(const std::string& isolationLevel);
611  void endTransaction(bool success);
612 
616 
617  void getCustomData(Data::GetValue& data);
618  void getCustomData(Data::GetFields& data);
619  void getCustomData(Data::GetFieldsMixed& data);
620  void getCustomData(Data::GetColumn& data);
621  void getCustomData(Data::GetColumns& data);
622  void getCustomData(Data::GetColumnsMixed& data);
623  void insertCustomData(const Data::InsertValue& data);
624  void insertCustomData(const Data::InsertFields& data);
625  void insertCustomData(const Data::InsertFieldsMixed& data);
626  void updateCustomData(const Data::UpdateValue& data);
627  void updateCustomData(const Data::UpdateFields& data);
628  void updateCustomData(const Data::UpdateFieldsMixed& data);
629 
633 
634 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
635  static std::uint64_t getRequestCounter() {
636  return Database::requestCounter.load();
637  }
638 #else
639 
651  static std::uint64_t getRequestCounter() {
652  return 0;
653  }
654 #endif
655 
657 
659  class Transaction {
660  public:
663 
665 
673  Main::Database& db,
674  const std::string& isolationLevel
675  ) : ref(db), active(false), successful(false) {
676  this->ref.startTransaction(isolationLevel);
677 
678  this->active = true;
679  }
680 
682 
686  explicit Transaction(
687  Main::Database& db
688  ) : ref(db), active(false), successful(false) {
689  this->ref.startTransaction("");
690 
691  this->active = true;
692  }
693 
695  virtual ~Transaction() {
696  if(this->active) {
697  try {
698  this->ref.endTransaction(this->successful);
699  }
700  catch(...) {} // ignore exceptions
701 
702  this->active = false;
703  }
704  }
705 
709 
711 
715  void success() {
716  this->successful = true;
717  }
718 
720 
723 
726  Transaction(Transaction&) = delete;
727 
729  Transaction& operator=(Transaction&) = delete;
730 
732  Transaction(Transaction&&) = delete;
733 
735  Transaction& operator=(Transaction&&) = delete;
736 
738 
739  private:
740  Main::Database& ref; // reference to database
741  bool active; // transaction is active
742  bool successful; // transaction was successful
743  };
744 
747 
750 
753 
756 
759 
762 
766 
769  Database(Database&) = delete;
770 
772  Database& operator=(Database&) = delete;
773 
775  Database(Database&&) = delete;
776 
778  Database& operator=(Database&&) = delete;
779 
781 
782  protected:
785 
787  std::unique_ptr<sql::Connection> connection;
788 
790  static sql::Driver * driver;
791 
795 
796  void reserveForPreparedStatements(std::size_t n);
797  void addPreparedStatement(const std::string& sqlQuery, std::size_t& id);
798  void clearPreparedStatement(std::size_t& id);
799  [[nodiscard]] sql::PreparedStatement& getPreparedStatement(std::size_t id);
800 
804 
805  [[nodiscard]] std::uint64_t getLastInsertedId();
806  void resetAutoIncrement(const std::string& tableName);
807  static void addDatabaseLock(
808  const std::string& name,
809  const IsRunningCallback& isRunningCallback
810  );
811  static bool tryDatabaseLock(const std::string& name);
812  static void removeDatabaseLock(const std::string& name);
813  void checkDirectory(const std::string& dir);
814 
818 
819  void createTable(const TableProperties& properties);
820  void clearTable(const std::string& tableName);
821  void dropTable(const std::string& tableName);
822  void addColumn(const std::string& tableName, const TableColumn& column);
823  void compressTable(const std::string& tableName);
824  std::queue<std::string> cloneTable(
825  const std::string& tableName,
826  const std::string& destDir
827  );
828 
832 
833  [[nodiscard]] bool isUrlListCaseSensitive(std::uint64_t listId);
834  void setUrlListCaseSensitive(std::uint64_t listId, bool isCaseSensitive);
835 
839 
840  static void sqlException(const std::string& function, const sql::SQLException& e);
841 
845 
847 
861  template<class T, class... Args>
862  static bool sqlExecute(T& statement, Args... args) {
863  bool result{false};
864 
865  while(true) { // retry on deadlock
866  try {
867 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
868  ++Database::requestCounter;
869 #endif
870 
871  result = statement.execute(args...);
872 
873  break;
874  }
875  catch(const sql::SQLException &e) {
876  if(e.getErrorCode() != sqlDeadLock) {
877  throw; // no deadlock: re-throw exception
878  }
879  }
880 
881 #ifdef MAIN_DATABASE_DEBUG_DEADLOCKS
882  const auto counter{Database::getRequestCounter()};
883 
884  std::cout << "#";
885 
886  if(counter > 0) {
887  std::cout << counter;
888  }
889 
890  std::cout << std::flush;
891 #endif
892 
893  if(sleepOnDeadLockMs > 0) {
894  std::this_thread::sleep_for(
895  std::chrono::milliseconds(
897  )
898  );
899  }
900  }
901 
902  return result;
903  }
904 
906 
919  template<class T, class... Args>
920  static sql::ResultSet * sqlExecuteQuery(T& statement, Args... args) {
921  sql::ResultSet * resultPtr{nullptr};
922 
923  while(true) { // retry on deadlock
924  try {
925 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
926  ++Database::requestCounter;
927 #endif
928 
929  resultPtr = statement.executeQuery(args...);
930 
931  break;
932  }
933  catch(const sql::SQLException &e) {
934  if(e.getErrorCode() != sqlDeadLock) {
935  throw; // no deadlock: re-throw exception
936  }
937  }
938 
939 #ifdef MAIN_DATABASE_DEBUG_DEADLOCKS
940  const auto counter{Database::getRequestCounter()};
941 
942  std::cout << "#";
943 
944  if(counter > 0) {
945  std::cout << counter;
946  }
947 
948  std::cout << std::flush;
949 #endif
950 
951  if(sleepOnDeadLockMs > 0) {
952  std::this_thread::sleep_for(
953  std::chrono::milliseconds(
955  )
956  );
957  }
958  }
959 
960  return resultPtr;
961  }
962 
964 
977  template<class T, class... Args>
978  static int sqlExecuteUpdate(T& statement, Args... args) {
979  int result{};
980 
981  while(true) { // retry on deadlock
982  try {
983 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
984  ++Database::requestCounter;
985 #endif
986 
987  result = statement.executeUpdate(args...);
988 
989  break;
990  }
991  catch(const sql::SQLException &e) {
992  if(e.getErrorCode() != sqlDeadLock) {
993  throw; // no deadlock: re-throw exception
994  }
995  }
996 
997 #ifdef MAIN_DATABASE_DEBUG_DEADLOCKS
998  const auto counter{Database::getRequestCounter()};
999 
1000  std::cout << "#";
1001 
1002  if(counter > 0) {
1003  std::cout << counter;
1004  }
1005 
1006  std::cout << std::flush;
1007 #endif
1008 
1009  if(sleepOnDeadLockMs > 0) {
1010  std::this_thread::sleep_for(
1011  std::chrono::milliseconds(
1013  )
1014  );
1015  }
1016  }
1017 
1018  return result;
1019  }
1020 
1022 
1035  template<class T, class... Args>
1036  static bool sqlExecute(std::unique_ptr<T>& statement, Args... args) {
1037  return sqlExecute(*statement, args...);
1038  }
1039 
1041 
1053  template<class T, class... Args>
1054  static sql::ResultSet * sqlExecuteQuery(std::unique_ptr<T>& statement, Args... args) {
1055  return sqlExecuteQuery(*statement, args...);
1056  }
1057 
1059 
1071  template<class T, class... Args>
1072  static int sqlExecuteUpdate(std::unique_ptr<T>& statement, Args... args) {
1073  return sqlExecuteUpdate(*statement, args...);
1074  }
1075 
1077 
1078  private:
1079  // private connection information
1080  const DatabaseSettings settings; // database settings
1081  std::uint64_t connectionId{}; // MySQL connection ID
1082  std::uint64_t maxAllowedPacketSize{}; // maximum packet size
1083  std::uint64_t sleepOnError{}; // number of seconds to sleep on database error
1084  std::string driverVersion; // MySQL Connector/C++ version
1085  std::string dataDir; // main data directory
1086  std::vector<std::string> dirs; // all known data directories
1087  std::string module; // module for which the database connection was established
1088  Timer::Simple reconnectTimer; // timer for reconnecting to the database
1089 
1090  // optional private variables
1091 #ifdef MAIN_DATABASE_DEBUG_REQUEST_COUNTER
1092  static std::atomic<std::uint64_t> requestCounter; // MySQL request counter
1093 #endif
1094 
1095  // locking state
1096  static std::mutex lockAccess;
1097  static std::vector<std::string> locks;
1098 
1099  // prepared SQL statements
1100  std::vector<Wrapper::PreparedSqlStatement> preparedStatements;
1101 
1102  // internal helper functions
1103  void run(const std::string& sqlFile);
1104  void execute(const std::string& sqlQuery);
1105  int executeUpdate(const std::string& sqlQuery);
1106  [[nodiscard]] std::string sqlEscapeString(const std::string& in);
1107 
1108  // IDs of prepared SQL statements
1109  struct _ps {
1110  std::size_t log{};
1111  std::size_t lastId{};
1112  std::size_t setThreadStatus{};
1113  std::size_t setThreadStatusMessage{};
1114  } ps;
1115  };
1116 
1117 } /* namespace crawlservpp::Main */
1118 
1119 #endif /* MAIN_DATABASE_HPP_ */
Class for storage engine exceptions.
Definition: Database.hpp:755
constexpr auto sqlMalformedPacket
Malformed packet.
Definition: Database.hpp:327
constexpr auto lockTimeOutSec
Time-out on table lock in seconds.
Definition: Database.hpp:133
constexpr auto sqlInsufficientPrivileges
Insufficient privileges.
Definition: Database.hpp:340
Query properties containing its name, text, type, and result type(s).
Definition: QueryProperties.hpp:39
constexpr auto sqlNumOfLocksExceedsLockTableSize
Number of locks exceeds lock table size.
Definition: Database.hpp:282
constexpr auto sqlServerShutDown
Server shutdown.
Definition: Database.hpp:225
constexpr auto maxColumnsUrlList
Maximum number of columns in all associated tables associated with an URL list.
Definition: Database.hpp:163
Transaction(Main::Database &db)
Constructor starting the transaction using the default isolation level.
Definition: Database.hpp:686
constexpr auto sqlTimeOutWritingPackets
Timeout writing packets.
Definition: Database.hpp:264
Class for insufficient privileges exceptions.
Definition: Database.hpp:758
constexpr auto sqlNetErrorReadingFromMaster
Network error reading from master.
Definition: Database.hpp:270
std::unique_ptr< sql::Connection > connection
Database connection.
Definition: Database.hpp:787
constexpr auto sqlSortAborted
Sort aborted.
Definition: Database.hpp:213
static sql::ResultSet * sqlExecuteQuery(std::unique_ptr< T > &statement, Args... args)
Template function for executing a SQL query by unique pointer and returning the resulting set...
Definition: Database.hpp:1054
virtual ~Transaction()
Destructor committing the transaction on success.
Definition: Database.hpp:695
constexpr auto sqlPacketsOutOfOrder
Packets out of order.
Definition: Database.hpp:249
constexpr auto sqlMoreThanMaxUserConnections
More than the maximum number of user connections.
Definition: Database.hpp:276
constexpr auto sqlNetErrorWritingToMaster
Network error writing to master.
Definition: Database.hpp:273
constexpr auto sqlTooManyConnections
Too many connections.
Definition: Database.hpp:216
constexpr auto numArgsAddUrl
Number of arguments for adding one URL.
Definition: Database.hpp:110
Structure for retrieving multiple table columns of different types.
Definition: Data.hpp:310
constexpr auto sqlArg2
Second argument in a SQL query.
Definition: Database.hpp:97
constexpr auto sqlInvalidConnectionHandle
Invalid connection handle.
Definition: Database.hpp:330
constexpr auto sqlLockWaitTimeOutExceeded
Lock wait timeout exceeded.
Definition: Database.hpp:279
constexpr auto reconnectAfterIdleMs
Idle time in milliseconds after which a re-connect to the database will be enforced.
Definition: Database.hpp:136
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
constexpr auto sqlWrongArguments
Wrong arguments.
Definition: Database.hpp:343
Target table properties containing its type, website, URL list, table names, columns, and compression.
Definition: TargetTableProperties.hpp:44
constexpr auto sqlDeadLock
Deadlock.
Definition: Database.hpp:285
constexpr auto sqlClientErrorConnectingToSlave
Client error connecting to slave.
Definition: Database.hpp:318
constexpr auto sqlExtension
File extension for .sql files.
Definition: Database.hpp:130
constexpr auto secToMs
The factor for converting seconds to milliseconds and vice versa.
Definition: Database.hpp:157
Properties of a URL list containing its namespace and name.
Definition: UrlListProperties.hpp:39
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
constexpr auto sqlCannotConnectToServerThroughSocket
Cannot connect to server through socket.
Definition: Database.hpp:297
constexpr auto sqlCannotConnectToServer
Cannot connect to server.
Definition: Database.hpp:300
Structure for getting multiple values of different types from a table column.
Definition: Data.hpp:243
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
constexpr auto sqlQueryExecutionInterrupted
Query execution interrupted.
Definition: Database.hpp:291
Structure for updating multiple values of different types in a table.
Definition: Data.hpp:408
Class for JSON exceptions.
Definition: Json.hpp:136
Class for incorrect path exceptions.
Definition: Database.hpp:752
static std::uint64_t getRequestCounter()
Gets the number of SQL requests performed since the start of the application.
Definition: Database.hpp:514
constexpr auto sqlStorageEngineError
Storage engine error.
Definition: Database.hpp:337
static bool sqlExecute(T &statement, Args... args)
Template function for executing a SQL query.
Definition: Database.hpp:862
constexpr auto sqlErrorReadingPackets
Error reading packets.
Definition: Database.hpp:255
constexpr auto sqlArg8
Eighth argument in a SQL query.
Definition: Database.hpp:148
static sql::Driver * driver
Pointer to the MySQL database driver.
Definition: Database.hpp:790
constexpr auto sqlTimeOutReadingPackets
Timeout reading packets.
Definition: Database.hpp:258
Structure for updating one value in a table.
Definition: Data.hpp:369
constexpr auto sqlCouldNotUncompressPackets
Could not uncompress packets.
Definition: Database.hpp:252
static std::uint64_t getRequestCounter()
Gets the number of SQL requests performed since the start of the application.
Definition: Database.hpp:651
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
constexpr auto sqlConstraint
The MySQL keyword for a constraint, including the trailing space.
Definition: Database.hpp:154
Table properties containing its name, columns, data directory, and compression.
Definition: TableProperties.hpp:42
constexpr auto sqlIncorrectPath
Incorrect path.
Definition: Database.hpp:346
Structure for inserting multiple values of different types into a row.
Definition: Data.hpp:360
constexpr auto sqlUnableToConnectToForeignDataSource
Unable to connect to foreign data source.
Definition: Database.hpp:294
Structure for retrieving one value from a table column.
Definition: Data.hpp:207
Structure for table columns containing its name, type, reference, and indexing.
Definition: TableColumn.hpp:39
constexpr auto sqlBadHandShake
Bad handshake.
Definition: Database.hpp:222
constexpr auto sqlArg6
Sixth argument in a SQL query.
Definition: Database.hpp:109
static int sqlExecuteUpdate(std::unique_ptr< T > &statement, Args... args)
Template function for executing a SQL query by unique pointer and returning the number of affected ro...
Definition: Database.hpp:1072
constexpr auto sqlArg3
Third argument in a SQL query.
Definition: Database.hpp:100
constexpr auto sqlLostConnectionDuringQuery
Lost connection during query.
Definition: Database.hpp:315
Wrapper class providing the database functionality of Module::Database to its child classes...
Definition: Database.hpp:72
constexpr auto sqlNewAbortedConnection
New aborted connection-.
Definition: Database.hpp:267
constexpr auto sqlErrorInServerHandshake
Error in server handshake.
Definition: Database.hpp:312
Database settings containing its host, port, user, password, schema, and compression.
Definition: DatabaseSettings.hpp:48
constexpr auto sqlCannotGetHostName
Cannot get host name.
Definition: Database.hpp:219
Structure for updating multiple values of the same type in a table.
Definition: Data.hpp:390
A simple timer.
Definition: Simple.hpp:53
Information about a thread as stored in the database, containing both the options for and the status ...
Definition: ThreadDatabaseEntry.hpp:40
Class for wrong arguments exceptions.
Definition: Database.hpp:761
static bool sqlExecute(std::unique_ptr< T > &statement, Args... args)
Template function for executing a SQL query by unique pointer.
Definition: Database.hpp:1036
constexpr auto sqlDir
(Sub-)Directory for .sql files.
Definition: Database.hpp:127
constexpr auto sqlForcingCloseOfThread
Forcing close of thread.
Definition: Database.hpp:237
Template class for safe in-scope database locks.
Definition: DatabaseTryLock.hpp:51
constexpr auto sqlNormalShutdown
Normal shutdown.
Definition: Database.hpp:228
constexpr auto nAtOnce500
Five hundred at once.
Definition: Database.hpp:179
constexpr auto nAtOnce10
Process ten values at once.
Definition: Database.hpp:88
Wrapper class for in-scope transactions.
Definition: Database.hpp:659
void success()
Sets the state of the transaction to successful.
Definition: Database.hpp:715
constexpr auto sqlGotSignal
Got signal.
Definition: Database.hpp:231
Structure for retrieving multiple table columns of the same type.
Definition: Data.hpp:284
constexpr auto sqlArg5
Fifth argument in a SQL query.
Definition: Database.hpp:106
Namespace for RAII wrappers and Wrapper::Database.
Definition: Database.hpp:109
constexpr auto sqlSSLConnectionError
SSL connection error.
Definition: Database.hpp:324
constexpr auto maxContentSize
Maximum size of database content (= 1 GiB).
Definition: Database.hpp:75
constexpr auto sqlArg1
First argument in a SQL query.
Definition: Database.hpp:94
static sql::ResultSet * sqlExecuteQuery(T &statement, Args... args)
Template function for executing a SQL query and returning the resulting set.
Definition: Database.hpp:920
constexpr auto sqlArg9
Ninth argument in a SQL query.
Definition: Database.hpp:151
constexpr auto sqlReadErrorFromConnectionPipe
Read error from connection pipe.
Definition: Database.hpp:246
static int sqlExecuteUpdate(T &statement, Args... args)
Template function for executing a SQL query and returning the number of affected rows.
Definition: Database.hpp:978
constexpr auto sqlCannotCreateIPSocket
Cannot create IP socket.
Definition: Database.hpp:240
Structure for retrieving multiple values of the same type from a table column.
Definition: Data.hpp:225
constexpr auto sqlClientErrorConnectingToMaster
Client error connecting to master.
Definition: Database.hpp:321
constexpr auto sqlServerHasGoneAway
Server has gone away.
Definition: Database.hpp:306
Class for database connection exceptions.
Definition: Database.hpp:749
constexpr auto sqlTCPError
TCP error.
Definition: Database.hpp:309
constexpr auto sqlShutDownComplete
Shutdown complete.
Definition: Database.hpp:234
Structure for retrieving the values in a table column.
Definition: Data.hpp:258
constexpr auto sleepOnLockMs
Sleep time in milliseconds before re-attempting to add a database lock.
Definition: Database.hpp:139
constexpr auto sqlUnknownServerHost
Unknown server host.
Definition: Database.hpp:303
constexpr auto sqlArg4
Fourth argument in a SQL query.
Definition: Database.hpp:103
#define MAIN_EXCEPTION_SUBCLASS(NAME)
Macro used to easily define classes for specific exceptions.
Definition: Exception.hpp:65
Transaction(Main::Database &db, const std::string &isolationLevel)
Constructor starting the transaction using the specified isolation level.
Definition: Database.hpp:672
Namespace for the main classes of the program.
Definition: App.cpp:34
Structure for inserting one value into a table.
Definition: Data.hpp:333
constexpr auto sqlServerErrorConnectingToMaster
Server error connecting to master.
Definition: Database.hpp:288
Configuration properties containing its module, name, and JSON string.
Definition: ConfigProperties.hpp:40
constexpr auto sqlAbortedConnection
Aborted connection.
Definition: Database.hpp:243
constexpr auto numUrlListTables
The minimum number of tables per URL list.
Definition: Database.hpp:151
Template class for safe in-scope database locks.
Definition: DatabaseLock.hpp:54
constexpr auto wwwPrefix
"www." prefix to be ignored when checking for a domain.
Definition: Database.hpp:148
constexpr auto nAtOnce100
Process one hundred values at once.
Definition: Database.hpp:91
Structure for inserting multiple values of the same type into a table.
Definition: Data.hpp:348
constexpr auto sqlArg7
Seventh argument in a SQL query.
Definition: Database.hpp:145
constexpr auto sqlErrorWritingPackets
Error writing packets.
Definition: Database.hpp:261
constexpr auto maxContentSizeString
Maximum size of database content as string.
Definition: Database.hpp:78
Website properties containing its domain, namespace, name, and data directory.
Definition: WebsiteProperties.hpp:39
constexpr auto sleepOnDeadLockMs
Time (in ms) to sleep on SQL deadlock.
Definition: Database.hpp:160