crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
crawlservpp::Module::Crawler::Database Class Referencefinal

Class providing database functionality for crawler threads by implementing Wrapper::Database. More...

#include <Database.hpp>

Inheritance diagram for crawlservpp::Module::Crawler::Database:
Collaboration diagram for crawlservpp::Module::Crawler::Database:

Classes

class  Exception
 Class for crawler-specific database exceptions. More...
 

Construction

 Database (Module::Database &dbThread)
 Constructor setting the database connection for the thread. More...
 

Crawler-specific Setters

void setMaxBatchSize (std::uint16_t setMaxBatchSize)
 Sets the maximum number of URLs to be processed at once. More...
 
void setRecrawl (bool isRecrawl)
 Sets whether all URLs will be recrawled. More...
 
void setUrlCaseSensitive (bool isUrlCaseSensitive)
 Sets whether the current URL list is case-sensitive. More...
 
void setUrlDebug (bool isUrlDebug)
 Sets whether to enable URL debugging. More...
 
void setUrlStartupCheck (bool isUrlStartupCheck)
 Sets whether to check URLs on startup. More...
 

Prepared SQL Statements

void prepare ()
 Prepares the SQL statements for the crawler. More...
 

URLs

std::uint64_t getUrlId (const std::string &url)
 Gets the ID of a URL from the database. More...
 
IdString getNextUrl (std::uint64_t currentUrlId)
 Gets the ID of the next URL to crawl from the database. More...
 
bool addUrlIfNotExists (const std::string &urlString, bool manual)
 Adds a URL to the database, if it doesnt exist already. More...
 
std::size_t addUrlsIfNotExist (std::queue< std::string > &urls, bool manual)
 Adds URLs to the database, if they do not exist already. More...
 
std::uint64_t getUrlPosition (std::uint64_t urlId)
 Gets the position of a URL in the current URL list. More...
 
std::uint64_t getNumberOfUrls ()
 Gets the number of URL in the current URL list. More...
 

URL Checking

void urlDuplicationCheck ()
 Checks the current URL list for duplicates. More...
 
void urlHashCheck ()
 Checks the hash values in the current URL list. More...
 
void urlEmptyCheck ()
 Checks for empty URLs in the current URL list. More...
 
void urlUtf8Check ()
 Checks for URLs containing invalid UTF-8 characters in the current URL list. More...
 

URL Locking

std::string getUrlLockTime (std::uint64_t urlId)
 Gets the time, until which a URL has been locked. More...
 
bool isUrlCrawled (std::uint64_t urlId)
 Gets whether a URL has been crawled. More...
 
std::string lockUrlIfOk (std::uint64_t urlId, const std::string &lockTime, std::uint32_t lockTimeout)
 Locks a URL if it is lockable or still locked by the current thread. More...
 
void unLockUrlIfOk (std::uint64_t urlId, const std::string &lockTime)
 Unlocks a URL in the database. More...
 
void setUrlFinishedIfOk (std::uint64_t urlId, const std::string &lockTime)
 Sets the URL to crawled in the database, if it is still locked by the thread. More...
 

Crawling

void saveContent (std::uint64_t urlId, std::uint32_t response, const std::string &type, const std::string &content)
 Saves crawled content to the database. More...
 
void saveArchivedContent (std::uint64_t urlId, const std::string &timeStamp, std::uint32_t response, const std::string &type, const std::string &content)
 Saves archived content to the database. More...
 
bool isArchivedContentExists (std::uint64_t urlId, const std::string &timeStamp)
 Checks whether archived content for a URL with a specific timestamp already exists in the database. More...
 

Setters

void setLogging (std::uint8_t level, std::uint8_t min, std::uint8_t verbose)
 Sets the current, minimal, and verbose logging levels. More...
 
void setSleepOnError (std::uint64_t seconds)
 Sets the number of seconds to sleep before trying to reconnect after connection loss. More...
 
void setTimeOut (std::uint64_t milliseconds)
 Sets the maximum execution time for MySQL queries, in milliseconds. More...
 

Logging

void log (std::uint8_t level, const std::string &logEntry)
 Writes a thread-specific log entry to the database. More...
 
void log (std::uint8_t level, std::queue< std::string > &logEntries)
 Writes multiple thread-specific log entries to the database. More...
 

Websites

std::string getWebsiteDomain (std::uint64_t websiteId)
 Gets the domain of a website from the database. More...
 

Queries

void getQueryProperties (std::uint64_t queryId, QueryProperties &queryPropertiesTo)
 Gets the properties of a query from the database. More...
 

Configurations

std::string getConfiguration (std::uint64_t configId)
 Gets a configuration from the database. More...
 

Target Tables

std::uint64_t addOrUpdateTargetTable (const TargetTableProperties &properties)
 Adds a new target table or updates an existing target table in the database. More...
 
std::queue< IdString > getTargetTables (const std::string &type, std::uint64_t listId)
 Gets the target tables of the specified type for a URL list from the database. More...
 
std::uint64_t getTargetTableId (const std::string &type, std::uint64_t listId, const std::string &tableName)
 Gets the ID of a target table from the database. More...
 
std::string getTargetTableName (const std::string &type, std::uint64_t tableId)
 Gets the name of a target table from the database. More...
 
void addTargetColumn (const std::string &tableName, const TableColumn &column)
 Adds a column to the target table, if it does not exist already. More...
 
void deleteTargetTable (const std::string &type, std::uint64_t tableId)
 Deletes a target table from the database. More...
 

Locking

void beginNoLock ()
 Disables database locking by starting a new SQL transaction. More...
 
void endNoLock ()
 Re-enables database locking by ending the previous SQL transaction. More...
 

Tables

bool isTableEmpty (const std::string &tableName)
 Checks whether a table in the database is empty. More...
 
bool isTableExists (const std::string &tableName)
 Checks whether a table exists in the database. More...
 
bool isColumnExists (const std::string &tableName, const std::string &columnName)
 Checks whether a table in the database contains a specific column. More...
 
std::string getColumnType (const std::string &tableName, const std::string &columnName)
 Gets the type of a specific table column from the database. More...
 

Custom Data

void getCustomData (Data::GetValue &data)
 Gets a custom value from one column from a table row in the database. More...
 
void getCustomData (Data::GetFields &data)
 Gets custom values from multiple columns of the same type from a table row. More...
 
void getCustomData (Data::GetFieldsMixed &data)
 Gets custom values from multiple columns of different types from a table row. More...
 
void getCustomData (Data::GetColumn &data)
 Gets custom values from a table column in the database. More...
 
void getCustomData (Data::GetColumns &data)
 Gets custom values from multiple table columns of the same type. More...
 
void getCustomData (Data::GetColumnsMixed &data)
 Gets custom values from multiple table columns of different types. More...
 
void insertCustomData (const Data::InsertValue &data)
 Inserts a custom value into a table row in the database. More...
 
void insertCustomData (const Data::InsertFields &data)
 Inserts custom values into multiple table columns of the same type. More...
 
void insertCustomData (const Data::InsertFieldsMixed &data)
 Inserts custom values into multiple table columns of different types. More...
 
void updateCustomData (const Data::UpdateValue &data)
 Updates a custom value in a table row. More...
 
void updateCustomData (const Data::UpdateFields &data)
 Updates custom values in multiple table columns of the same type. More...
 
void updateCustomData (const Data::UpdateFieldsMixed &data)
 Updates custom values in multiple table columns of different types. More...
 

Request Counter

static std::uint64_t getRequestCounter ()
 Gets the number of SQL requests performed since the start of the application. More...
 

Database Connection

Module::Databasedatabase
 Reference to the database connection for the thread. More...
 

Getters

const ModuleOptionsgetOptions () const
 Gets the options of the module. More...
 
const std::string & getWebsiteIdString () const
 Gets the ID of the website used by the thread as string. More...
 
const std::string & getUrlListIdString () const
 Gets the ID of the URL list used by the thread as string. More...
 
std::uint8_t getLoggingMin () const
 Gets the minimal logging level. More...
 
std::uint8_t getLoggingVerbose () const
 Gets the level for verbose logging. More...
 
std::uint64_t getMaxAllowedPacketSize () const
 Gets the maximum allowed packet size for communicating with the MySQL server. More...
 

Validation

void checkConnection ()
 Checks whether the connection to the database is still valid and tries to reconnect if necessary. More...
 

Helper Functions for Prepared SQL Statements

void reserveForPreparedStatements (std::size_t n)
 Reserves memory for a specific number of additional prepared SQL statements. More...
 
void addPreparedStatement (const std::string &sqlQuery, std::size_t &id)
 Prepares an additional SQL statement and sets its ID. More...
 
void clearPreparedStatement (std::size_t &id)
 Clears a prepared SQL statement. More...
 
sql::PreparedStatement & getPreparedStatement (std::size_t id)
 Gets a reference to a prepared SQL statement. More...
 

Database Helper Functions

std::uint64_t getLastInsertedId ()
 Gets the last inserted ID from the database. More...
 
void createTable (const TableProperties &properties)
 Adds a table to the database. More...
 
void addColumn (const std::string &tableName, const TableColumn &column)
 Adds a column to a table in the database. More...
 
void dropTable (const std::string &tableName)
 Deletes a table from the database. More...
 
void compressTable (const std::string &tableName)
 Compresses a table in the database. More...
 
static void addDatabaseLock (const std::string &name, const IsRunningCallback &isRunningCallback)
 Adds a lock to the database class, blocking execution. More...
 
static bool tryDatabaseLock (const std::string &name)
 Tries to add a lock to the database class, not blocking execution. More...
 
static void removeDatabaseLock (const std::string &name)
 Removes a lock from the database class. More...
 

URL List Helper Function

void setUrlListCaseSensitive (std::uint64_t listId, bool isCaseSensitive)
 Sets whether the specified URL list is case-sensitive. More...
 

Exception Helper Function

static void sqlException (const std::string &function, const sql::SQLException &e)
 Catches a SQL exception and re-throws it as a specific or a generic Database::Exception. More...
 

Helper Functions for Executing SQL Queries

static bool sqlExecute (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement. More...
 
static sql::ResultSet * sqlExecuteQuery (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement and returns the resulting set. More...
 
static int sqlExecuteUpdate (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement and returns the number of affected rows. More...
 

Detailed Description

Class providing database functionality for crawler threads by implementing Wrapper::Database.

Constructor & Destructor Documentation

◆ Database()

crawlservpp::Module::Crawler::Database::Database ( Module::Database dbThread)
explicit

Constructor setting the database connection for the thread.

Parameters
dbThreadReference to the database connection used by the crawler thread.

Member Function Documentation

◆ addColumn()

void crawlservpp::Wrapper::Database::addColumn ( const std::string &  tableName,
const TableColumn column 
)
inlineprotectedinherited

Adds a column to a table in the database.

Parameters
tableNameConstant reference to a string containing the name of the table to which the column will be added.
columnConstant reference to a structure containing the properties of the column to be added to the table.
Exceptions
Main::Database::Exceptionif no table, column, or column type has been specified, i.e. if one of the strings containing the name of the table, the name of the column, and the type of the column is empty, or if a column reference is incomplete.
Main::Database::Exceptionif a MySQL error occured while adding the column to the given table in the database.
See also
Struct::TableColumn

References crawlservpp::Main::Database::addColumn(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Wrapper::Database::addTargetColumn().

◆ addDatabaseLock()

void crawlservpp::Wrapper::Database::addDatabaseLock ( const std::string &  name,
const IsRunningCallback &  isRunningCallback 
)
inlinestaticprotectedinherited

Adds a lock to the database class, blocking execution.

If a lock with the same name already exists, the function will block execution until this lock has been released, or the specified callback function returns false.

Parameters
nameConstant reference to a string containing the name of the lock to be waited for and added to the database class.
isRunningCallbackConstant reference to a function that will be regularly called during a block, to enquire whether the thread (or application) is still running. As soon as this function returns false, execution will no longer be blocked, even if the lock could not be added.

References crawlservpp::Main::Database::addDatabaseLock().

◆ addOrUpdateTargetTable()

std::uint64_t crawlservpp::Wrapper::Database::addOrUpdateTargetTable ( const TargetTableProperties properties)
inlineinherited

Adds a new target table or updates an existing target table in the database.

Parameters
propertiesConstant reference to the properties of the new target table, or the existing target table to be updated.
Returns
If no target table with the specified type and name already exists, a unique ID identifying the new target table in the database. The ID of the new table is, however, only unique among all target tables of the same type. If a target table with the specified type and name already exists, its ID will be returned instead.
Exceptions
Main::Database::Exceptionif no type, website, URL list, name, or columns have been specfied in the given properties of the new target table, or if a column of the already existing target table cannot be overwritten due to incompatibilities between the respective data types.
Main::Database::Exceptionif a MySQL error occured while adding the new target table, or updating the existing target table in the database.

References crawlservpp::Main::Database::addOrUpdateTargetTable(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), crawlservpp::Module::Parser::Database::initTargetTable(), crawlservpp::Module::Analyzer::Database::initTargetTable(), and crawlservpp::Module::Extractor::Database::initTargetTables().

◆ addPreparedStatement()

void crawlservpp::Wrapper::Database::addPreparedStatement ( const std::string &  sqlQuery,
std::size_t &  id 
)
inlineprotectedinherited

Prepares an additional SQL statement and sets its ID.

If the current ID is not zero, the old prepared statement will be removed.

Parameters
sqlQueryConstant reference to a string containing the SQL query for the prepared SQL statement.
idReference to the current ID or zero, which will be set to the new unique ID identifying the prepared SQL query in-class.
Exceptions
Main::Database::Exceptionif a MySQL error occured while preparing and adding the SQL statement.
std::out_of_rangeif id contains an neither zero nor a valid ID.

References crawlservpp::Main::Database::addPreparedStatement(), and crawlservpp::Wrapper::Database::database.

Referenced by prepare(), crawlservpp::Module::Parser::Database::prepare(), crawlservpp::Module::Extractor::Database::prepare(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), and crawlservpp::Module::Extractor::Database::unLockUrlsIfOk().

◆ addTargetColumn()

void crawlservpp::Wrapper::Database::addTargetColumn ( const std::string &  tableName,
const TableColumn column 
)
inlineinherited

Adds a column to the target table, if it does not exist already.

Does nothing if the column already exists.

Parameters
tableNameConstant reference to a string containing the name of the target table to which to add the specified column.
columnConstant reference to a structure containing the properties of the column to be added to the table.
Warning
The data type of the column will not be validated if the column already exists.

References crawlservpp::Wrapper::Database::addColumn(), crawlservpp::Wrapper::Database::isColumnExists(), and crawlservpp::Struct::TableColumn::name.

Referenced by crawlservpp::Module::Analyzer::Algo::TopicModelling::resetAlgo().

◆ addUrlIfNotExists()

bool crawlservpp::Module::Crawler::Database::addUrlIfNotExists ( const std::string &  urlString,
bool  manual 
)

Adds a URL to the database, if it doesnt exist already.

Parameters
urlStringConstant reference to a string containing the URL to be added to the current URL list in the database.
manualSpecifies whether the URL is a custom URL, i.e. has been manually added.
Returns
True if the URL has been added. False, if the URL had already existed.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, or the prepared SQL statement for adding a URL to the database is missing.
Main::Database::Exceptionif a MySQL error occured while adding the URL to the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Module::Crawler::sqlArg4, crawlservpp::Module::Crawler::sqlArg5, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ addUrlsIfNotExist()

std::size_t crawlservpp::Module::Crawler::Database::addUrlsIfNotExist ( std::queue< std::string > &  urls,
bool  manual 
)

Adds URLs to the database, if they do not exist already.

Adds the given URLs in batches of the maximum batch size, 100 and 10 to the database, if possible, to considerably speed up the process.

Parameters
urlsReference to a queue containing the URLs to be added to the current URL list in the database. The queue will be cleared after a succesfull call to the function, even if some or all of the given URL have not been added, because they already existed in the database.
manualSpecifies whether the URLs are custom URL, i.e. have been manually added.
Returns
The number of given URLs that did not yet exist and have been added to the database.
Exceptions
Module::Crawler::Database::Exceptionif one of the prepared SQL statements for adding URLs to the database is missing.
Main::Database::Exceptionif a MySQL error occured while adding the URLs to the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::nAtOnce10, crawlservpp::Module::Crawler::nAtOnce100, crawlservpp::Module::Crawler::numArgsAddUrl, crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Module::Crawler::sqlArg4, crawlservpp::Module::Crawler::sqlArg5, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ beginNoLock()

void crawlservpp::Wrapper::Database::beginNoLock ( )
inlineinherited

Disables database locking by starting a new SQL transaction.

Exceptions
Main::Database::Exceptionif a MySQL error occured while starting a new SQL transaction in the database.

References crawlservpp::Main::Database::beginNoLock(), and crawlservpp::Wrapper::Database::database.

Referenced by getNumberOfUrls(), and getUrlPosition().

◆ checkConnection()

void crawlservpp::Wrapper::Database::checkConnection ( )
inlineprotectedinherited

Checks whether the connection to the database is still valid and tries to reconnect if necessary.

Warning
Afterwards, old references to prepared SQL statements might be invalid, because the connection to the database might have been reset.
Exceptions
Main::Database::Exceptionif the MySQL driver is not initialized.
Main::Database::Exceptionif a MySQL error occured while attempting to reconnect to the database.

References crawlservpp::Main::Database::checkConnection(), and crawlservpp::Wrapper::Database::database.

Referenced by addUrlIfNotExists(), addUrlsIfNotExist(), crawlservpp::Module::Extractor::Database::checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), crawlservpp::Module::Extractor::Database::fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), crawlservpp::Module::Extractor::Database::getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), crawlservpp::Module::Extractor::Database::getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), crawlservpp::Module::Extractor::Database::getLockTime(), getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), crawlservpp::Module::Extractor::Database::getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), getUrlId(), getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), crawlservpp::Module::Extractor::Database::getUrlLockTime(), getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), crawlservpp::Module::Extractor::Database::getUrlPosition(), isArchivedContentExists(), isUrlCrawled(), lockUrlIfOk(), prepare(), crawlservpp::Module::Parser::Database::prepare(), crawlservpp::Module::Extractor::Database::prepare(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), crawlservpp::Module::Extractor::Database::renewUrlLockIfOk(), saveArchivedContent(), saveContent(), setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), crawlservpp::Module::Extractor::Database::setUrlsFinishedIfLockOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), crawlservpp::Module::Extractor::Database::updateTargetTable(), urlDuplicationCheck(), urlEmptyCheck(), and urlHashCheck().

◆ clearPreparedStatement()

void crawlservpp::Wrapper::Database::clearPreparedStatement ( std::size_t &  id)
inlineprotectedinherited

Clears a prepared SQL statement.

Parameters
idReference to the current ID, which will be set to zero after the corresponding prepared SQL statement has been cleared.
Exceptions
Main::Database::Exceptionif a MySQL error occured while clearing the prepared SQL statement.
std::out_of_rangeif id contains zero or an invalid ID.

References crawlservpp::Main::Database::clearPreparedStatement(), and crawlservpp::Wrapper::Database::database.

Referenced by prepare(), crawlservpp::Module::Extractor::Database::prepare(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), and crawlservpp::Module::Extractor::Database::unLockUrlsIfOk().

◆ compressTable()

void crawlservpp::Wrapper::Database::compressTable ( const std::string &  tableName)
inlineprotectedinherited

Compresses a table in the database.

The function will have no effect om the table, if the table is already compressed.

Parameters
tableNameConstant reference to a string containing the name of the table to be compressed.
Exceptions
Main::Database::Exceptionif no table is specified, i.e. if the string containing the name of the table is empty, or if a row format could not be determined.
Main::Database::Exceptionif a MySQL error occured while compressing the table in the database.

References crawlservpp::Main::Database::compressTable(), and crawlservpp::Wrapper::Database::database.

◆ createTable()

void crawlservpp::Wrapper::Database::createTable ( const TableProperties properties)
inlineprotectedinherited

Adds a table to the database.

Note
A column for the primary key named id will be created automatically.
Parameters
propertiesConstant reference to a structure containing the properties of the table to be created.
Exceptions
Main::Database::Exceptionif no name or columns are specified in the given properties structure, if one of the columns defined there is missing its name or data type, or if a column reference is incomplete.
Main::Database::Exceptionif a MySQL error occured while adding the table to the database.
See also
Struct::TableProperties

References crawlservpp::Main::Database::createTable(), and crawlservpp::Wrapper::Database::database.

◆ deleteTargetTable()

void crawlservpp::Wrapper::Database::deleteTargetTable ( const std::string &  type,
std::uint64_t  tableId 
)
inlineinherited

Deletes a target table from the database.

Parameters
typeConstant reference to a string containing the type of the target table to be deleted
tableIdThe ID of the target table to be deleted.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while deleting the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::deleteTargetTable().

◆ dropTable()

void crawlservpp::Wrapper::Database::dropTable ( const std::string &  tableName)
inlineprotectedinherited

Deletes a table from the database.

If the table does not exist in the database, the database will not be changed.

Parameters
tableNameConstant reference to a string containing the name of the table to be deleted, if it exists.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. if the string containing the name of the table is empty.
Main::Database::Exceptionif a MySQL error occured while removing the table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::dropTable().

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), and crawlservpp::Module::Analyzer::Database::initTargetTable().

◆ endNoLock()

void crawlservpp::Wrapper::Database::endNoLock ( )
inlineinherited

Re-enables database locking by ending the previous SQL transaction.

Exceptions
Main::Database::Exceptionif a MySQL error occured while ending the previous SQL transaction by committing the changes to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::endNoLock().

Referenced by getNumberOfUrls(), and getUrlPosition().

◆ getColumnType()

std::string crawlservpp::Wrapper::Database::getColumnType ( const std::string &  tableName,
const std::string &  columnName 
)
inlineinherited

Gets the type of a specific table column from the database.

Parameters
tableNameConstant reference to a string containing the name of the table in the database from which the type of the column will be retrieved.
columnNameConstant reference to a string containing the name of the column whose type will be retrieved.
Returns
A copy of the name of the given table column's data type, without specifiers like 'UNSIGNED'.
Exceptions
Main::Database::Exceptionif no table or column has been specified, i.e. one of the strings containing the name and the column is empty.
Main::Database::Exceptionif a MySQL error occured while retrieving the type of the given column, e.g. if the specified table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getColumnType().

Referenced by crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ getConfiguration()

std::string crawlservpp::Wrapper::Database::getConfiguration ( std::uint64_t  configId)
inlineinherited

Gets a configuration from the database.

Parameters
configIdThe ID of the configuration to be retrieved from the database.
Returns
A copy of the configuration's JSON string as stored in the database, or an empty string if the given configuration does not exist in the database.
Exceptions
Main::Database::Exceptionif no configuration has been specified, i.e. the configuration ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the configuration from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getConfiguration().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ getCustomData() [1/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetValue data)
inlineinherited

Gets a custom value from one column from a table row in the database.

Parameters
dataReference to the data structure that identifies the column, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column name or no column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetValue

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), and crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ getCustomData() [2/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetFields data)
inlineinherited

Gets custom values from multiple columns of the same type from a table row.

Parameters
dataReference to the data structure that identifies the columns, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column names or no column type are specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [3/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetFieldsMixed data)
inlineinherited

Gets custom values from multiple columns of different types from a table row.

Parameters
dataReference to the data structure that identifies the columns and their types, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no columns are specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [4/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumn data)
inlineinherited

Gets custom values from a table column in the database.

Parameters
dataReference to the data structure that identifies the column, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column or column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumn

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [5/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumns data)
inlineinherited

Gets custom values from multiple table columns of the same type.

Parameters
dataReference to the data structure that identifies the columns, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column or column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumns

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [6/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumnsMixed data)
inlineinherited

Gets custom values from multiple table columns of different types.

Parameters
dataReference to the data structure that identifies the columns and their types, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no columns have been specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumnsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getLastInsertedId()

std::uint64_t crawlservpp::Wrapper::Database::getLastInsertedId ( )
inlineprotectedinherited

Gets the last inserted ID from the database.

Returns
The last inserted ID from the database.
Exceptions
Main::Database::Exceptionif the prepared SQL statement for retrieving the last inserted ID from the database is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the last inserted ID from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getLastInsertedId().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources().

◆ getLoggingMin()

◆ getLoggingVerbose()

std::uint8_t crawlservpp::Wrapper::Database::getLoggingVerbose ( ) const
inlineprotectedinherited

Gets the level for verbose logging.

Returns
The logging level, in which verbose logging is activated.

References crawlservpp::Wrapper::Database::database.

Referenced by prepare(), crawlservpp::Module::Parser::Database::prepare(), crawlservpp::Module::Extractor::Database::prepare(), and crawlservpp::Module::Analyzer::Database::prepare().

◆ getMaxAllowedPacketSize()

std::uint64_t crawlservpp::Wrapper::Database::getMaxAllowedPacketSize ( ) const
inlineprotectedinherited

Gets the maximum allowed packet size for communicating with the MySQL server.

Returns
The maximum allowed packet size for communicating with the MySQL server, in bytes, or zero if not connected to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getMaxAllowedPacketSize().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), saveArchivedContent(), saveContent(), crawlservpp::Module::Parser::Database::updateTargetTable(), and crawlservpp::Module::Extractor::Database::updateTargetTable().

◆ getNextUrl()

Database::IdString crawlservpp::Module::Crawler::Database::getNextUrl ( std::uint64_t  currentUrlId)

Gets the ID of the next URL to crawl from the database.

Parameters
currentUrlIdThe ID of the URL that has been crawled last.
Returns
A pair of the ID and a string containing the next URL to crawl, or an empty pair if there are no more URLs to crawl.
Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statement for retrieving the next URL to crawl is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the next URL to crawl from the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ getNumberOfUrls()

std::uint64_t crawlservpp::Module::Crawler::Database::getNumberOfUrls ( )

Gets the number of URL in the current URL list.

Returns
The total number of URLs in the current URL list.
Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statement for retrieving the number of URLs is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the number of URLs from the database.

References crawlservpp::Wrapper::Database::beginNoLock(), crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::endNoLock(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ getOptions()

◆ getPreparedStatement()

sql::PreparedStatement & crawlservpp::Wrapper::Database::getPreparedStatement ( std::size_t  id)
inlineprotectedinherited

Gets a reference to a prepared SQL statement.

Warning
Do not run checkConnection while using this reference, because the references will be invalidated when reconnecting to the database!
Parameters
idThe ID of the prepared SQL statement to retrieve.
Returns
A reference to the prepared SQL statement.
Exceptions
Main::Database::Exceptionif a MySQL error occured while retrieving the prepared SQL statement.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getPreparedStatement().

Referenced by addUrlIfNotExists(), addUrlsIfNotExist(), crawlservpp::Module::Extractor::Database::checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), crawlservpp::Module::Extractor::Database::fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), crawlservpp::Module::Extractor::Database::getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), crawlservpp::Module::Extractor::Database::getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), crawlservpp::Module::Extractor::Database::getLockTime(), getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), crawlservpp::Module::Extractor::Database::getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), getUrlId(), getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), crawlservpp::Module::Extractor::Database::getUrlLockTime(), getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), crawlservpp::Module::Extractor::Database::getUrlPosition(), isArchivedContentExists(), isUrlCrawled(), lockUrlIfOk(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), crawlservpp::Module::Extractor::Database::renewUrlLockIfOk(), saveArchivedContent(), saveContent(), setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), crawlservpp::Module::Extractor::Database::setUrlsFinishedIfLockOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), crawlservpp::Module::Extractor::Database::updateTargetTable(), urlDuplicationCheck(), urlEmptyCheck(), and urlHashCheck().

◆ getQueryProperties()

void crawlservpp::Wrapper::Database::getQueryProperties ( std::uint64_t  queryId,
QueryProperties queryPropertiesTo 
)
inlineinherited

Gets the properties of a query from the database.

Parameters
queryIdThe ID of the query for which the properties will be retrieved from the database.
queryPropertiesToReference to the structure to which the retrieved properties of the query will be written.
Exceptions
Main::Database::Exceptionif no query ID has been specfied, i.e. the query ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the properties of the given query from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getQueryProperties().

Referenced by crawlservpp::Module::Analyzer::Thread::addOptionalQuery(), crawlservpp::Module::Analyzer::Thread::addQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ getRequestCounter()

std::uint64_t crawlservpp::Wrapper::Database::getRequestCounter ( )
inlinestaticinherited

Gets the number of SQL requests performed since the start of the application.

Note
By default, the request counter should be deactivated and the function always return zero.
Returns
The number of SQL requests performed since the start of the application or zero, if the request counter had not been activated on compilation.

References crawlservpp::Main::Database::getRequestCounter().

Referenced by crawlservpp::Main::Database::sqlExecute(), crawlservpp::Main::Database::sqlExecuteQuery(), and crawlservpp::Main::Database::sqlExecuteUpdate().

◆ getTargetTableId()

std::uint64_t crawlservpp::Wrapper::Database::getTargetTableId ( const std::string &  type,
std::uint64_t  listId,
const std::string &  tableName 
)
inlineinherited

Gets the ID of a target table from the database.

Parameters
typeConstant reference to a string containing the type of the target table for which to retrieve its ID.
listIdThe ID of the URL list associated with the target table for which to retrieve its ID.
tableNameConst reference to a string containing the name of the target table for which to retrieve its ID.
Returns
The ID of the specified target table as stored in the database.
Exceptions
Main::Database::Exceptionif no target table or URL list has been specified, i.e. if the string containing the type is empty, or the target table or the URL list ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the ID of the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTableId().

◆ getTargetTableName()

std::string crawlservpp::Wrapper::Database::getTargetTableName ( const std::string &  type,
std::uint64_t  tableId 
)
inlineinherited

Gets the name of a target table from the database.

Parameters
typeString view containing the type of the target table for which to retrieve its name.
tableIdThe ID of the target table for which to retrieve its name.
Returns
A copy of the name of the specified target table as stored in the database.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the name of the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTableName().

◆ getTargetTables()

std::queue< Database::IdString > crawlservpp::Wrapper::Database::getTargetTables ( const std::string &  type,
std::uint64_t  listId 
)
inlineinherited

Gets the target tables of the specified type for a URL list from the database.

Parameters
typeConstant reference to a string containing the type of the target tables to retrieve.
listIdThe ID of the URL list for which to retrieve the target tables.
Returns
A queue containing the IDs and names of the target tables of the given type for the specified URL list.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the target tables from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTables().

◆ getUrlId()

std::uint64_t crawlservpp::Module::Crawler::Database::getUrlId ( const std::string &  url)

Gets the ID of a URL from the database.

Uses a hash check for first checking the probable existence of the URL.

Parameters
urlConstant reference to a string containing the URL to be checked.
Returns
The ID of the given URL, or zero if the URL does not exist in the current URL list.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, or the prepared SQL statement for retrieving the ID of a URL is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the ID of the URL from the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by isArchivedContentExists(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ getUrlListIdString()

const std::string & crawlservpp::Wrapper::Database::getUrlListIdString ( ) const
inlineprotectedinherited

Gets the ID of the URL list used by the thread as string.

Returns
A reference to the string containing the ID of the URL list used by the thread.

References crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::prepare().

◆ getUrlLockTime()

std::string crawlservpp::Module::Crawler::Database::getUrlLockTime ( std::uint64_t  urlId)

Gets the time, until which a URL has been locked.

Parameters
urlIdThe ID of the URL whose lock time will be retrieved.
Returns
The time, until which the URL has been locked, in the format YYYY-MM-DD HH:MM:SS.
Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statement for retrieving the lock time is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the lock time of the URL.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by lockUrlIfOk().

◆ getUrlPosition()

std::uint64_t crawlservpp::Module::Crawler::Database::getUrlPosition ( std::uint64_t  urlId)

Gets the position of a URL in the current URL list.

Parameters
urlIdID of the URL whose position in the current URL list will be retrieved from the database.
Returns
The position of the given URL in the current URL list.
Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statement for retrieving the position of a URL is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the position of the URL from the database.

References crawlservpp::Wrapper::Database::beginNoLock(), crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::endNoLock(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ getWebsiteDomain()

std::string crawlservpp::Wrapper::Database::getWebsiteDomain ( std::uint64_t  websiteId)
inlineinherited

Gets the domain of a website from the database.

Parameters
websiteIdThe ID of the website for which the domain will be retrieved from the database.
Returns
A copy of the domain name of the given website.
Exceptions
Main::Database::Exceptionif no website has been specified, i.e. the website ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the domain name of the given website from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getWebsiteDomain().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ getWebsiteIdString()

const std::string & crawlservpp::Wrapper::Database::getWebsiteIdString ( ) const
inlineprotectedinherited

Gets the ID of the website used by the thread as string.

Returns
A reference to the string containing the ID of the website used by the thread.

References crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::prepare().

◆ insertCustomData() [1/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertValue data)
inlineinherited

◆ insertCustomData() [2/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertFields data)
inlineinherited

Inserts custom values into multiple table columns of the same type.

Parameters
dataConstant reference to a structure containing the data to be inserted.
Exceptions
Main::Database::Exceptionif no table, columns, or column type have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while inserting the data.
See also
Data::InsertFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::insertCustomData().

◆ insertCustomData() [3/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertFieldsMixed data)
inlineinherited

Inserts custom values into multiple table columns of different types.

Parameters
dataConstant reference to a structure containing the data to be inserted.
Exceptions
Main::Database::Exceptionif no table or columns have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while inserting the data.
See also
Data::InsertFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::insertCustomData().

◆ isArchivedContentExists()

bool crawlservpp::Module::Crawler::Database::isArchivedContentExists ( std::uint64_t  urlId,
const std::string &  timeStamp 
)

Checks whether archived content for a URL with a specific timestamp already exists in the database.

Parameters
urlIdThe ID of the URL whose archived version has been crawled.
timeStampThe time stamp of the archived content, i.e. when it has been archived by the crawled archive.
Returns
True, if archived content for the specified URL with the given timestamp already exists in the database. False, if no such content has yet been saved to the database.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for checking for archived content in the database is missing.
Main::Database::Exceptionif a MySQL error occured while checking for archived content in the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Helper::Bytes::first, crawlservpp::Wrapper::Database::getPreparedStatement(), getUrlId(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Wrapper::Database::sqlException(), crawlservpp::Wrapper::Database::sqlExecuteQuery(), crawlservpp::Wrapper::Database::sqlExecuteUpdate(), and crawlservpp::Module::Crawler::urlListTableAlias.

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ isColumnExists()

bool crawlservpp::Wrapper::Database::isColumnExists ( const std::string &  tableName,
const std::string &  columnName 
)
inlineinherited

Checks whether a table in the database contains a specific column.

Parameters
tableNameConstant reference to a string containing the name of the table in the database in which the existence of the column will be checked.
columnNameConstant reference to a string containing the name of the column to be checked for in the given table.
Returns
True, if the given column exists in the specified table. False otherwise.
Exceptions
Main::Database::Exceptionif no table or columns have been specified, i.e. one of the strings containing the name and the column is empty.
Main::Database::Exceptionif a MySQL error occured while checking the existence of the given column, e.g. if the specified table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isColumnExists().

Referenced by crawlservpp::Wrapper::Database::addTargetColumn(), and crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ isTableEmpty()

bool crawlservpp::Wrapper::Database::isTableEmpty ( const std::string &  tableName)
inlineinherited

Checks whether a table in the database is empty.

Parameters
tableNameConstant reference to a string containing the name of the table whose contents will be checked in the database.
Returns
True, if the given table is empty. False if it contains data.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. the string containing the name is empty.
Main::Database::Exceptionif a MySQL error occured while checking the content of the given table in the database, e.g. if the table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isTableEmpty().

◆ isTableExists()

bool crawlservpp::Wrapper::Database::isTableExists ( const std::string &  tableName)
inlineinherited

Checks whether a table exists in the database.

Parameters
tableNameConstant reference to a string containing the name of the table whose existence in the database will be checked.
Returns
True, if the given table exists in the database. False otherwise.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. the string containing the name is empty.
Main::Database::Exceptionif a MySQL error occured while checking the existence of the given table in the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isTableExists().

◆ isUrlCrawled()

bool crawlservpp::Module::Crawler::Database::isUrlCrawled ( std::uint64_t  urlId)

Gets whether a URL has been crawled.

Parameters
urlIdThe ID of the URL for which to check whether it has been crawled.
Returns
True, if the URL has been crawled. False, if the URL does not exist, or has not yet been crawled.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for checking whether a URL has been crawled is missing.
Main::Database::Exceptionif a MySQL error occured while checking whether the URL has been crawled.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ lockUrlIfOk()

std::string crawlservpp::Module::Crawler::Database::lockUrlIfOk ( std::uint64_t  urlId,
const std::string &  lockTime,
std::uint32_t  lockTimeout 
)

Locks a URL if it is lockable or still locked by the current thread.

Parameters
urlIdThe ID of the URL to lock.
lockTimeConstant reference to a string containing the time at which the current lock by the thread for this URL will end (or has ended). Constant reference to an empty string, if the URL has not yet been locked by the current thread.
lockTimeoutThe time for which to lock the URL for the current thread, in seconds.
Returns
A copy of a string containing the time until which the URL has been locked for the current thread, in the format YYYY-MM-DD HH:MM:SS. A copy of an empty string, if the URL could not be locked, or its lock could not be renewed for the current thread, e.g. because it has already been locked by another thread since the current URL lock expired.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if one of the prepared SQL statements for locking a URL, or for renewing a URL lock is missing.
Main::Database::Exceptionif a MySQL error occured while locking the URL, or renewing its URL lock.
See also
getLockTime

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), getUrlLockTime(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Module::Crawler::sqlArg4, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ log() [1/2]

void crawlservpp::Wrapper::Database::log ( std::uint8_t  level,
const std::string &  logEntry 
)
inlineinherited

Writes a thread-specific log entry to the database.

Removes invalid UTF-8 characters if necessary.

If debug logging is active, the entry will be written to the logging file as well.

The log entry will not be written to the database, if the current logging level is lower than the specified logging level. The logging level does not affect the writing of logging entries being to the logging file when debug logging is active.

Note
String views cannot be used, because they are not supported by the API for the MySQL database.
Parameters
levelThe logging level for the entry. The entry will only be written to the database, if the current logging level is at least the logging level for the entry.
logEntryConstant reference to a string containing the log entry.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::log().

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Analyzer::Database::getCorpus(), crawlservpp::Module::Analyzer::Database::initTargetTable(), prepare(), crawlservpp::Module::Parser::Database::prepare(), crawlservpp::Module::Extractor::Database::prepare(), crawlservpp::Module::Analyzer::Database::prepare(), saveArchivedContent(), saveContent(), crawlservpp::Module::Parser::Database::setTargetTable(), crawlservpp::Module::Extractor::Database::setTargetTable(), crawlservpp::Module::Analyzer::Database::setTargetTable(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), crawlservpp::Module::Extractor::Database::updateTargetTable(), and urlHashCheck().

◆ log() [2/2]

void crawlservpp::Wrapper::Database::log ( std::uint8_t  level,
std::queue< std::string > &  logEntries 
)
inlineinherited

Writes multiple thread-specific log entries to the database.

Removes invalid UTF-8 characters if necessary.

If debug logging is active, the entries will be written to the logging file as well.

The log entries will not be written to the database, if the current logging level is lower than the specified logging level. The logging level does not affect the writing of logging entries being to the logging file when debug logging is active.

Note
String views cannot be used, because they are not supported by the API for the MySQL database.
Parameters
levelThe logging level for the entries. The entries will only be written to the database, if the current logging level is at least the logging level for the entry.
logEntriesReference to a queue of strings containing the log entries to be written. It will be emptied regardless whether the log entries will be written to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::log().

◆ prepare()

◆ removeDatabaseLock()

void crawlservpp::Wrapper::Database::removeDatabaseLock ( const std::string &  name)
inlinestaticprotectedinherited

Removes a lock from the database class.

Does nothing if a lock with the given name does not exist in the database class.

Parameters
nameConstant reference to a string containing the name of the lock to be removed from the database class.

References crawlservpp::Main::Database::removeDatabaseLock().

◆ reserveForPreparedStatements()

void crawlservpp::Wrapper::Database::reserveForPreparedStatements ( std::size_t  n)
inlineprotectedinherited

Reserves memory for a specific number of additional prepared SQL statements.

Parameters
nNumber of prepared SQL statements for which memory should be reserved.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::reserveForPreparedStatements().

Referenced by prepare(), crawlservpp::Module::Parser::Database::prepare(), crawlservpp::Module::Extractor::Database::prepare(), and crawlservpp::Module::Analyzer::Database::prepare().

◆ saveArchivedContent()

void crawlservpp::Module::Crawler::Database::saveArchivedContent ( std::uint64_t  urlId,
const std::string &  timeStamp,
std::uint32_t  response,
const std::string &  type,
const std::string &  content 
)

Saves archived content to the database.

Parameters
urlIdThe ID of the URL whose archived version has been crawled.
timeStampThe time stamp of the archived content, i.e. when it has been archived by the crawled archive.
responseThe HTTP status code that has been received together with the content, e.g. 200 for OK.
typeConstant reference to a string containing the description of the content type that has been received together with the content, e.g. text/html.
contentConstant reference to a string containing the crawled content to be saved to the database.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for saving archived content to the database is missing.
Main::Database::Exceptionif a MySQL error occured while saving the archived content to the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getLoggingMin(), crawlservpp::Wrapper::Database::getMaxAllowedPacketSize(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Helper::CommaLocale::locale(), crawlservpp::Wrapper::Database::log(), crawlservpp::Module::Crawler::maxContentSize, crawlservpp::Module::Crawler::maxContentSizeString, crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Module::Crawler::sqlArg4, crawlservpp::Module::Crawler::sqlArg5, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ saveContent()

void crawlservpp::Module::Crawler::Database::saveContent ( std::uint64_t  urlId,
std::uint32_t  response,
const std::string &  type,
const std::string &  content 
)

Saves crawled content to the database.

Parameters
urlIdThe ID of the URL that has been crawled.
responseThe HTTP status code that has been received together with the content, e.g. 200 for OK.
typeConstant reference to a string containing the description of the content type that has been received together with the content, e.g. text/html.
contentConstant reference to a string containing the crawled content to be saved to the database.
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for saving crawled content to the database is missing.
Main::Database::Exceptionif a MySQL error occured while saving the crawled content to the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getLoggingMin(), crawlservpp::Wrapper::Database::getMaxAllowedPacketSize(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Helper::CommaLocale::locale(), crawlservpp::Wrapper::Database::log(), crawlservpp::Module::Crawler::maxContentSize, crawlservpp::Module::Crawler::maxContentSizeString, crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Module::Crawler::sqlArg3, crawlservpp::Module::Crawler::sqlArg4, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setLogging()

void crawlservpp::Wrapper::Database::setLogging ( std::uint8_t  level,
std::uint8_t  min,
std::uint8_t  verbose 
)
inlineinherited

Sets the current, minimal, and verbose logging levels.

Initializes debug logging via logging file if necessary.

Parameters
levelThe current logging level.
minThe minimum logging level.
verboseThe verbose logging level.
Exceptions
Module::Database::Exceptionif the logging file could not be opened for writing.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::setLogging().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ setMaxBatchSize()

void crawlservpp::Module::Crawler::Database::setMaxBatchSize ( std::uint16_t  setMaxBatchSize)

Sets the maximum number of URLs to be processed at once.

Parameters
setMaxBatchSizeThe maximum number of URLs that will be processed in one MySQL query.

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setRecrawl()

void crawlservpp::Module::Crawler::Database::setRecrawl ( bool  isRecrawl)

Sets whether all URLs will be recrawled.

Note
Needs to be set before preparing the SQL statements for the crawler.
Parameters
isRecrawlSet to true, to force the re-crawling of all URLs.

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setSleepOnError()

void crawlservpp::Wrapper::Database::setSleepOnError ( std::uint64_t  seconds)
inlineinherited

Sets the number of seconds to sleep before trying to reconnect after connection loss.

Parameters
secondsThe number of seconds to wait before trying to reconnect to the MySQL server after the connection got lost.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setSleepOnError().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ setTimeOut()

void crawlservpp::Wrapper::Database::setTimeOut ( std::uint64_t  milliseconds)
inlineinherited

Sets the maximum execution time for MySQL queries, in milliseconds.

Note
The database connection needs to be estanblished before setting the time out.
Parameters
millisecondsThe number of milliseconds for a MySQL query to run before it gets cancelled, or zero to disable the time-out for MySQL queries.
Exceptions
Main::Database::Exceptionif a MySQL error occurs while setting the execution time.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setTimeOut().

Referenced by crawlservpp::Module::Parser::Thread::onReset().

◆ setUrlCaseSensitive()

void crawlservpp::Module::Crawler::Database::setUrlCaseSensitive ( bool  isUrlCaseSensitive)

Sets whether the current URL list is case-sensitive.

Note
Needs to be set before preparing the SQL statements for the crawler.
Parameters
isUrlCaseSensitiveSpecifies whether the URLs in the current URL list are case-sensitive.
Warning
Changing this property of the URL list will invalidate all hashs previously created!

References crawlservpp::Wrapper::Database::getOptions(), and crawlservpp::Wrapper::Database::setUrlListCaseSensitive().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setUrlDebug()

void crawlservpp::Module::Crawler::Database::setUrlDebug ( bool  isUrlDebug)

Sets whether to enable URL debugging.

Note
Needs to be set before preparing the SQL statements for the crawler.
Parameters
isUrlDebugSpecifies whether URL debugging is enabled.

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setUrlFinishedIfOk()

void crawlservpp::Module::Crawler::Database::setUrlFinishedIfOk ( std::uint64_t  urlId,
const std::string &  lockTime 
)

Sets the URL to crawled in the database, if it is still locked by the thread.

Parameters
urlIdThe ID of the URL to set to crawled.
lockTimeConstant reference to a string containing the time at which the current lock by the thread for this URL will end (or has ended).
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, if no lock time has been specified, i.e. it references an empty string, or if the prepared SQL statement for setting a URL to crawled is missing.
Main::Database::Exceptionif a MySQL error occured while setting the URL to crawled.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ setUrlListCaseSensitive()

void crawlservpp::Wrapper::Database::setUrlListCaseSensitive ( std::uint64_t  listId,
bool  isCaseSensitive 
)
inlineprotectedinherited

Sets whether the specified URL list is case-sensitive.

Warning
The case-sensitivity should not be changed once URLs have been retrieved!
Parameters
listIdThe ID of the URL list whose case-sensitivity will be changed.
isCaseSensitiveSpecify whether URLs in the given URL list will be case-sensitive or not.
Exceptions
Main::Database::Exceptionif no URL list has been specified, i.e. the URL list ID is zero.
Main::Database::Exceptionif a MySQL error occured while setting the case-sensitivity of the URL list.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setUrlListCaseSensitive().

Referenced by setUrlCaseSensitive().

◆ setUrlStartupCheck()

void crawlservpp::Module::Crawler::Database::setUrlStartupCheck ( bool  isUrlStartupCheck)

Sets whether to check URLs on startup.

Note
Needs to be set before preparing the SQL statements for the crawler.
Parameters
isUrlStartupCheckSpecifies whether to perform a check of the URL list on startup.

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ sqlException()

void crawlservpp::Wrapper::Database::sqlException ( const std::string &  function,
const sql::SQLException &  e 
)
inlinestaticprotectedinherited

Catches a SQL exception and re-throws it as a specific or a generic Database::Exception.

Note
Always throws an exception.
Parameters
functionConstant reference to a string containing the name of the function in which the exception has been thrown.
eConstant reference to the SQL exception that has been thrown.
Exceptions
Main::Database::ConnectionException
Main::Database::StorageEngineException
Main::Database::PrivilegesException
Main::Database::WrongArgumentsException
Main::Database::IncorrectPathException
Main::Database::Exceptiondepending on the SQL exception thrown.

References crawlservpp::Main::Database::sqlException().

Referenced by addUrlIfNotExists(), addUrlsIfNotExist(), crawlservpp::Module::Extractor::Database::checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), crawlservpp::Module::Extractor::Database::fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), crawlservpp::Module::Extractor::Database::getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), crawlservpp::Module::Extractor::Database::getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), crawlservpp::Module::Extractor::Database::getLockTime(), getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), crawlservpp::Module::Extractor::Database::getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), getUrlId(), getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), crawlservpp::Module::Extractor::Database::getUrlLockTime(), getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), crawlservpp::Module::Extractor::Database::getUrlPosition(), isArchivedContentExists(), isUrlCrawled(), lockUrlIfOk(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), crawlservpp::Module::Extractor::Database::renewUrlLockIfOk(), saveArchivedContent(), saveContent(), setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), crawlservpp::Module::Extractor::Database::setUrlsFinishedIfLockOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), crawlservpp::Module::Extractor::Database::unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddEntries(), crawlservpp::Module::Extractor::Database::updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), crawlservpp::Module::Extractor::Database::updateTargetTable(), urlDuplicationCheck(), urlEmptyCheck(), and urlHashCheck().

◆ sqlExecute()

◆ sqlExecuteQuery()

sql::ResultSet * crawlservpp::Wrapper::Database::sqlExecuteQuery ( sql::PreparedStatement &  sqlPreparedStatement)
inlinestaticprotectedinherited

Executes a prepared SQL statement and returns the resulting set.

Parameters
sqlPreparedStatementReference to the prepared SQL statement to be executed.
Returns
A pointer to the result set retrieved by executing the prepared SQL statement.

References crawlservpp::Main::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), crawlservpp::Module::Extractor::Database::fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), crawlservpp::Module::Extractor::Database::getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), crawlservpp::Module::Extractor::Database::getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), crawlservpp::Module::Extractor::Database::getLockTime(), getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), crawlservpp::Module::Extractor::Database::getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), getUrlId(), getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), crawlservpp::Module::Extractor::Database::getUrlLockTime(), getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), crawlservpp::Module::Extractor::Database::getUrlPosition(), isArchivedContentExists(), isUrlCrawled(), urlDuplicationCheck(), urlEmptyCheck(), and urlHashCheck().

◆ sqlExecuteUpdate()

◆ tryDatabaseLock()

bool crawlservpp::Wrapper::Database::tryDatabaseLock ( const std::string &  name)
inlinestaticprotectedinherited

Tries to add a lock to the database class, not blocking execution.

If a lock with the same name already exists, the function will not add a lock and return false instead.

Parameters
nameConstant reference to a string containing the name of the lock to be added to the database class if a lock with the same name does not exist already.
Returns
True, if a lock with the same name did not exist already and the lock has been added. False, if a lock with the same name already exists and no lock has been added.

References crawlservpp::Main::Database::tryDatabaseLock().

◆ unLockUrlIfOk()

void crawlservpp::Module::Crawler::Database::unLockUrlIfOk ( std::uint64_t  urlId,
const std::string &  lockTime 
)

Unlocks a URL in the database.

Parameters
urlIdThe ID of the URL to unlock.
lockTimeConstant reference to a string containing the time at which the current lock by the thread for this URL will end (or has ended).
Exceptions
Module::Crawler::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, if no lock time has been specified, or if the prepared SQL statement for unlocking a URL is missing.
Main::Database::Exceptionif a MySQL error occured while unlocking the URL.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Crawler::sqlArg1, crawlservpp::Module::Crawler::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ updateCustomData() [1/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateValue data)
inlineinherited

Updates a custom value in a table row.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no table, columns, or column type have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateValue

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ updateCustomData() [2/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateFields data)
inlineinherited

Updates custom values in multiple table columns of the same type.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no columns or no column type are specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ updateCustomData() [3/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateFieldsMixed data)
inlineinherited

Updates custom values in multiple table columns of different types.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no columns are specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ urlDuplicationCheck()

void crawlservpp::Module::Crawler::Database::urlDuplicationCheck ( )

Checks the current URL list for duplicates.

Always throws an exception, unless no duplicates are found.

Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statements for checking the current URL list for duplicates is missing, or if duplicates have been found and removed.
Main::Database::Exceptionif a MySQL error occured while checking the URL list for duplicates.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ urlEmptyCheck()

void crawlservpp::Module::Crawler::Database::urlEmptyCheck ( )

Checks for empty URLs in the current URL list.

Always throws an exception, unless no empty URLs are found.

Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statements for checking the current URL list for empty URLs is missing, or if empty URLs have been found.
Main::Database::Exceptionif a MySQL error occured while checking for empty URLs in the current URL list.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ urlHashCheck()

void crawlservpp::Module::Crawler::Database::urlHashCheck ( )

Checks the hash values in the current URL list.

Always throws an exception, unless all hash values are correct.

Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statements for checking the hash values in the current URL list is missing, or if invalid has values have been found and corrected.
Main::Database::Exceptionif a MySQL error occured while checking the hash values in the URL list.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getLoggingMin(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Helper::CommaLocale::locale(), crawlservpp::Wrapper::Database::log(), crawlservpp::Wrapper::Database::sqlException(), crawlservpp::Wrapper::Database::sqlExecuteQuery(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ urlUtf8Check()

void crawlservpp::Module::Crawler::Database::urlUtf8Check ( )

Checks for URLs containing invalid UTF-8 characters in the current URL list.

Always throws an exception, unless all URLs in the current URL list contain only valid UTF-8-encoded characters.

Exceptions
Module::Crawler::Database::Exceptionif the prepared SQL statement for retrieving all URLs from the current URL list is missing, if a URL in the current URL list contains invalid UTF-8 characters, or if a UTF-8 error while checking the URLs in the current URL list.
Main::Database::Exceptionif a MySQL error occured while retrieving all URLs from the current URL list.
See also
getUrls

References crawlservpp::Helper::Utf8::isValidUtf8().

Member Data Documentation

◆ database

Module::Database& crawlservpp::Wrapper::Database::database
protectedinherited

Reference to the database connection for the thread.

Referenced by crawlservpp::Wrapper::Database::addColumn(), crawlservpp::Wrapper::Database::addOrUpdateTargetTable(), crawlservpp::Wrapper::Database::addPreparedStatement(), crawlservpp::Wrapper::Database::beginNoLock(), crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Wrapper::Database::clearPreparedStatement(), crawlservpp::Wrapper::Database::compressTable(), crawlservpp::Wrapper::Database::createTable(), crawlservpp::Wrapper::Database::deleteTargetTable(), crawlservpp::Wrapper::Database::dropTable(), crawlservpp::Wrapper::Database::endNoLock(), crawlservpp::Wrapper::Database::getColumnType(), crawlservpp::Wrapper::Database::getConfiguration(), crawlservpp::Wrapper::Database::getCustomData(), crawlservpp::Wrapper::Database::getLastInsertedId(), crawlservpp::Wrapper::Database::getLoggingMin(), crawlservpp::Wrapper::Database::getLoggingVerbose(), crawlservpp::Wrapper::Database::getMaxAllowedPacketSize(), crawlservpp::Wrapper::Database::getOptions(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::getQueryProperties(), crawlservpp::Wrapper::Database::getTargetTableId(), crawlservpp::Wrapper::Database::getTargetTableName(), crawlservpp::Wrapper::Database::getTargetTables(), crawlservpp::Wrapper::Database::getUrlListIdString(), crawlservpp::Wrapper::Database::getWebsiteDomain(), crawlservpp::Wrapper::Database::getWebsiteIdString(), crawlservpp::Wrapper::Database::insertCustomData(), crawlservpp::Wrapper::Database::isColumnExists(), crawlservpp::Wrapper::Database::isTableEmpty(), crawlservpp::Wrapper::Database::isTableExists(), crawlservpp::Wrapper::Database::log(), crawlservpp::Wrapper::Database::reserveForPreparedStatements(), crawlservpp::Wrapper::Database::setLogging(), crawlservpp::Wrapper::Database::setSleepOnError(), crawlservpp::Wrapper::Database::setTimeOut(), crawlservpp::Wrapper::Database::setUrlListCaseSensitive(), and crawlservpp::Wrapper::Database::updateCustomData().


The documentation for this class was generated from the following files: