crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
crawlservpp::Module::Extractor::Database Class Referencefinal

Class providing database functionality for extractor threads by implementing Wrapper::Database. More...

#include <Database.hpp>

Inheritance diagram for crawlservpp::Module::Extractor::Database:
Collaboration diagram for crawlservpp::Module::Extractor::Database:

Classes

class  Exception
 Class for parser database exceptions. More...
 

Construction

 Database (Module::Database &dbThread)
 Constructor setting the database connection for the thread. More...
 

Extractor-specific Setters

void setCacheSize (std::uint64_t setCacheSize)
 Sets the maximum cache size for URLs. More...
 
void setMaxBatchSize (std::uint16_t setMaxBatchSize)
 Sets the maximum number of URLs and results to be processed at once. More...
 
void setReExtract (bool isReExtract)
 Sets whether to re-extract data from already processed URLs. More...
 
void setExtractCustom (bool isExtractCustom)
 Sets whether to extract data from custom URLs. More...
 
void setRawContentIsSource (bool isRawContentIsSource)
 Sets whether raw crawled data is used as source for the data to be extracted. More...
 
void setSources (std::queue< StringString > &tablesAndColumns)
 Sets the tables and columns of the parsed data sources. More...
 
void setTargetTable (const std::string &table)
 Sets the name of the target table. More...
 
void setTargetFields (const std::vector< std::string > &fields)
 Sets the columns of the target table. More...
 
void setLinkedTable (const std::string &table)
 Sets the name of the linked table. More...
 
void setLinkedField (const std::string &field)
 Sets the mname of the linked field. More...
 
void setLinkedFields (const std::vector< std::string > &fields)
 Sets the columns of the linked table. More...
 
void setOverwrite (bool isOverwrite)
 Sets whether existing datasets with the same ID will be overwritten. More...
 
void setOverwriteLinked (bool isOverwrite)
 Sets whether existing linked datasets with the same ID will be overwritten. More...
 

Target Table Initialization

void initTargetTables ()
 Creates the target table, if it does not exist, or adds target columns needed by the extractor. More...
 

Prepared SQL Statements

void prepare ()
 Prepares the SQL statements needed by the extractor. More...
 

URLs

std::string fetchUrls (std::uint64_t lastId, std::queue< IdString > &cache, std::uint32_t lockTimeout)
 Fetches, locks, and adds the next URLs to the cache, i.e. to the caching queue to be processed. More...
 
std::uint64_t getUrlPosition (std::uint64_t urlId)
 Gets the position of a URL in the URL list. More...
 
std::uint64_t getNumberOfUrls ()
 Gets the number of URLs in the URL list. More...
 

URL Locking

std::string getLockTime (std::uint32_t lockTimeout)
 Gets the current URL lock expiration time from the database. More...
 
std::string getUrlLockTime (std::uint64_t urlId)
 Gets the current lock expiration time for a URL from the database. More...
 
std::string renewUrlLockIfOk (std::uint64_t urlId, const std::string &lockTime, std::uint32_t lockTimeout)
 Locks a URL in the database, if it is lockable, or extends its locking time, if it is still locked by the extractor. More...
 
bool unLockUrlIfOk (std::uint64_t urlId, const std::string &lockTime)
 Unlocks a URL in the database. More...
 
void unLockUrlsIfOk (std::queue< IdString > &urls, std::string &lockTime)
 Unlocks multiple URLs in the database at once. More...
 

Extracting

std::uint32_t checkExtractingTable ()
 Checks the extracting table. More...
 
bool getContent (std::uint64_t urlId, IdString &contentTo)
 Gets the latest content stored in the database for a specific URL. More...
 
void getLatestParsedData (std::uint64_t urlId, std::size_t sourceIndex, std::string &resultTo)
 Gets parsed data from the given source stored in the database for a specific URL. More...
 
void updateOrAddEntries (std::queue< DataEntry > &entries, StatusSetter &statusSetter)
 Adds extracted data to the database, or updates data that already exists. More...
 
void updateOrAddLinked (std::queue< DataEntry > &entries, StatusSetter &statusSetter)
 Adds linked data to the database, or updates data that already exists. More...
 
void setUrlsFinishedIfLockOk (std::queue< IdString > &finished)
 Sets URLs to finished in the database, except those locked by another thread. More...
 
void updateTargetTable ()
 Updates the target table. More...
 

Setters

void setLogging (std::uint8_t level, std::uint8_t min, std::uint8_t verbose)
 Sets the current, minimal, and verbose logging levels. More...
 
void setSleepOnError (std::uint64_t seconds)
 Sets the number of seconds to sleep before trying to reconnect after connection loss. More...
 
void setTimeOut (std::uint64_t milliseconds)
 Sets the maximum execution time for MySQL queries, in milliseconds. More...
 

Logging

void log (std::uint8_t level, const std::string &logEntry)
 Writes a thread-specific log entry to the database. More...
 
void log (std::uint8_t level, std::queue< std::string > &logEntries)
 Writes multiple thread-specific log entries to the database. More...
 

Websites

std::string getWebsiteDomain (std::uint64_t websiteId)
 Gets the domain of a website from the database. More...
 

Queries

void getQueryProperties (std::uint64_t queryId, QueryProperties &queryPropertiesTo)
 Gets the properties of a query from the database. More...
 

Configurations

std::string getConfiguration (std::uint64_t configId)
 Gets a configuration from the database. More...
 

Target Tables

std::uint64_t addOrUpdateTargetTable (const TargetTableProperties &properties)
 Adds a new target table or updates an existing target table in the database. More...
 
std::queue< IdString > getTargetTables (const std::string &type, std::uint64_t listId)
 Gets the target tables of the specified type for a URL list from the database. More...
 
std::uint64_t getTargetTableId (const std::string &type, std::uint64_t listId, const std::string &tableName)
 Gets the ID of a target table from the database. More...
 
std::string getTargetTableName (const std::string &type, std::uint64_t tableId)
 Gets the name of a target table from the database. More...
 
void addTargetColumn (const std::string &tableName, const TableColumn &column)
 Adds a column to the target table, if it does not exist already. More...
 
void deleteTargetTable (const std::string &type, std::uint64_t tableId)
 Deletes a target table from the database. More...
 

Locking

void beginNoLock ()
 Disables database locking by starting a new SQL transaction. More...
 
void endNoLock ()
 Re-enables database locking by ending the previous SQL transaction. More...
 

Tables

bool isTableEmpty (const std::string &tableName)
 Checks whether a table in the database is empty. More...
 
bool isTableExists (const std::string &tableName)
 Checks whether a table exists in the database. More...
 
bool isColumnExists (const std::string &tableName, const std::string &columnName)
 Checks whether a table in the database contains a specific column. More...
 
std::string getColumnType (const std::string &tableName, const std::string &columnName)
 Gets the type of a specific table column from the database. More...
 

Custom Data

void getCustomData (Data::GetValue &data)
 Gets a custom value from one column from a table row in the database. More...
 
void getCustomData (Data::GetFields &data)
 Gets custom values from multiple columns of the same type from a table row. More...
 
void getCustomData (Data::GetFieldsMixed &data)
 Gets custom values from multiple columns of different types from a table row. More...
 
void getCustomData (Data::GetColumn &data)
 Gets custom values from a table column in the database. More...
 
void getCustomData (Data::GetColumns &data)
 Gets custom values from multiple table columns of the same type. More...
 
void getCustomData (Data::GetColumnsMixed &data)
 Gets custom values from multiple table columns of different types. More...
 
void insertCustomData (const Data::InsertValue &data)
 Inserts a custom value into a table row in the database. More...
 
void insertCustomData (const Data::InsertFields &data)
 Inserts custom values into multiple table columns of the same type. More...
 
void insertCustomData (const Data::InsertFieldsMixed &data)
 Inserts custom values into multiple table columns of different types. More...
 
void updateCustomData (const Data::UpdateValue &data)
 Updates a custom value in a table row. More...
 
void updateCustomData (const Data::UpdateFields &data)
 Updates custom values in multiple table columns of the same type. More...
 
void updateCustomData (const Data::UpdateFieldsMixed &data)
 Updates custom values in multiple table columns of different types. More...
 

Request Counter

static std::uint64_t getRequestCounter ()
 Gets the number of SQL requests performed since the start of the application. More...
 

Database Connection

Module::Databasedatabase
 Reference to the database connection for the thread. More...
 

Getters

const ModuleOptionsgetOptions () const
 Gets the options of the module. More...
 
const std::string & getWebsiteIdString () const
 Gets the ID of the website used by the thread as string. More...
 
const std::string & getUrlListIdString () const
 Gets the ID of the URL list used by the thread as string. More...
 
std::uint8_t getLoggingMin () const
 Gets the minimal logging level. More...
 
std::uint8_t getLoggingVerbose () const
 Gets the level for verbose logging. More...
 
std::uint64_t getMaxAllowedPacketSize () const
 Gets the maximum allowed packet size for communicating with the MySQL server. More...
 

Validation

void checkConnection ()
 Checks whether the connection to the database is still valid and tries to reconnect if necessary. More...
 

Helper Functions for Prepared SQL Statements

void reserveForPreparedStatements (std::size_t n)
 Reserves memory for a specific number of additional prepared SQL statements. More...
 
void addPreparedStatement (const std::string &sqlQuery, std::size_t &id)
 Prepares an additional SQL statement and sets its ID. More...
 
void clearPreparedStatement (std::size_t &id)
 Clears a prepared SQL statement. More...
 
sql::PreparedStatement & getPreparedStatement (std::size_t id)
 Gets a reference to a prepared SQL statement. More...
 

Database Helper Functions

std::uint64_t getLastInsertedId ()
 Gets the last inserted ID from the database. More...
 
void createTable (const TableProperties &properties)
 Adds a table to the database. More...
 
void addColumn (const std::string &tableName, const TableColumn &column)
 Adds a column to a table in the database. More...
 
void dropTable (const std::string &tableName)
 Deletes a table from the database. More...
 
void compressTable (const std::string &tableName)
 Compresses a table in the database. More...
 
static void addDatabaseLock (const std::string &name, const IsRunningCallback &isRunningCallback)
 Adds a lock to the database class, blocking execution. More...
 
static bool tryDatabaseLock (const std::string &name)
 Tries to add a lock to the database class, not blocking execution. More...
 
static void removeDatabaseLock (const std::string &name)
 Removes a lock from the database class. More...
 

URL List Helper Function

void setUrlListCaseSensitive (std::uint64_t listId, bool isCaseSensitive)
 Sets whether the specified URL list is case-sensitive. More...
 

Exception Helper Function

static void sqlException (const std::string &function, const sql::SQLException &e)
 Catches a SQL exception and re-throws it as a specific or a generic Database::Exception. More...
 

Helper Functions for Executing SQL Queries

static bool sqlExecute (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement. More...
 
static sql::ResultSet * sqlExecuteQuery (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement and returns the resulting set. More...
 
static int sqlExecuteUpdate (sql::PreparedStatement &sqlPreparedStatement)
 Executes a prepared SQL statement and returns the number of affected rows. More...
 

Detailed Description

Class providing database functionality for extractor threads by implementing Wrapper::Database.

Constructor & Destructor Documentation

◆ Database()

crawlservpp::Module::Extractor::Database::Database ( Module::Database dbThread)
explicit

Constructor setting the database connection for the thread.

Parameters
dbThreadReference to the database connection used by the extractor thread.

Member Function Documentation

◆ addColumn()

void crawlservpp::Wrapper::Database::addColumn ( const std::string &  tableName,
const TableColumn column 
)
inlineprotectedinherited

Adds a column to a table in the database.

Parameters
tableNameConstant reference to a string containing the name of the table to which the column will be added.
columnConstant reference to a structure containing the properties of the column to be added to the table.
Exceptions
Main::Database::Exceptionif no table, column, or column type has been specified, i.e. if one of the strings containing the name of the table, the name of the column, and the type of the column is empty, or if a column reference is incomplete.
Main::Database::Exceptionif a MySQL error occured while adding the column to the given table in the database.
See also
Struct::TableColumn

References crawlservpp::Main::Database::addColumn(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Wrapper::Database::addTargetColumn().

◆ addDatabaseLock()

void crawlservpp::Wrapper::Database::addDatabaseLock ( const std::string &  name,
const IsRunningCallback &  isRunningCallback 
)
inlinestaticprotectedinherited

Adds a lock to the database class, blocking execution.

If a lock with the same name already exists, the function will block execution until this lock has been released, or the specified callback function returns false.

Parameters
nameConstant reference to a string containing the name of the lock to be waited for and added to the database class.
isRunningCallbackConstant reference to a function that will be regularly called during a block, to enquire whether the thread (or application) is still running. As soon as this function returns false, execution will no longer be blocked, even if the lock could not be added.

References crawlservpp::Main::Database::addDatabaseLock().

◆ addOrUpdateTargetTable()

std::uint64_t crawlservpp::Wrapper::Database::addOrUpdateTargetTable ( const TargetTableProperties properties)
inlineinherited

Adds a new target table or updates an existing target table in the database.

Parameters
propertiesConstant reference to the properties of the new target table, or the existing target table to be updated.
Returns
If no target table with the specified type and name already exists, a unique ID identifying the new target table in the database. The ID of the new table is, however, only unique among all target tables of the same type. If a target table with the specified type and name already exists, its ID will be returned instead.
Exceptions
Main::Database::Exceptionif no type, website, URL list, name, or columns have been specfied in the given properties of the new target table, or if a column of the already existing target table cannot be overwritten due to incompatibilities between the respective data types.
Main::Database::Exceptionif a MySQL error occured while adding the new target table, or updating the existing target table in the database.

References crawlservpp::Main::Database::addOrUpdateTargetTable(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), crawlservpp::Module::Parser::Database::initTargetTable(), crawlservpp::Module::Analyzer::Database::initTargetTable(), and initTargetTables().

◆ addPreparedStatement()

void crawlservpp::Wrapper::Database::addPreparedStatement ( const std::string &  sqlQuery,
std::size_t &  id 
)
inlineprotectedinherited

Prepares an additional SQL statement and sets its ID.

If the current ID is not zero, the old prepared statement will be removed.

Parameters
sqlQueryConstant reference to a string containing the SQL query for the prepared SQL statement.
idReference to the current ID or zero, which will be set to the new unique ID identifying the prepared SQL query in-class.
Exceptions
Main::Database::Exceptionif a MySQL error occured while preparing and adding the SQL statement.
std::out_of_rangeif id contains an neither zero nor a valid ID.

References crawlservpp::Main::Database::addPreparedStatement(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Crawler::Database::prepare(), crawlservpp::Module::Parser::Database::prepare(), prepare(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), and unLockUrlsIfOk().

◆ addTargetColumn()

void crawlservpp::Wrapper::Database::addTargetColumn ( const std::string &  tableName,
const TableColumn column 
)
inlineinherited

Adds a column to the target table, if it does not exist already.

Does nothing if the column already exists.

Parameters
tableNameConstant reference to a string containing the name of the target table to which to add the specified column.
columnConstant reference to a structure containing the properties of the column to be added to the table.
Warning
The data type of the column will not be validated if the column already exists.

References crawlservpp::Wrapper::Database::addColumn(), crawlservpp::Wrapper::Database::isColumnExists(), and crawlservpp::Struct::TableColumn::name.

Referenced by crawlservpp::Module::Analyzer::Algo::TopicModelling::resetAlgo().

◆ beginNoLock()

void crawlservpp::Wrapper::Database::beginNoLock ( )
inlineinherited

Disables database locking by starting a new SQL transaction.

Exceptions
Main::Database::Exceptionif a MySQL error occured while starting a new SQL transaction in the database.

References crawlservpp::Main::Database::beginNoLock(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Crawler::Database::getNumberOfUrls(), and crawlservpp::Module::Crawler::Database::getUrlPosition().

◆ checkConnection()

void crawlservpp::Wrapper::Database::checkConnection ( )
inlineprotectedinherited

Checks whether the connection to the database is still valid and tries to reconnect if necessary.

Warning
Afterwards, old references to prepared SQL statements might be invalid, because the connection to the database might have been reset.
Exceptions
Main::Database::Exceptionif the MySQL driver is not initialized.
Main::Database::Exceptionif a MySQL error occured while attempting to reconnect to the database.

References crawlservpp::Main::Database::checkConnection(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Crawler::Database::addUrlIfNotExists(), crawlservpp::Module::Crawler::Database::addUrlsIfNotExist(), checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), getLockTime(), crawlservpp::Module::Crawler::Database::getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), crawlservpp::Module::Crawler::Database::getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), crawlservpp::Module::Crawler::Database::getUrlId(), crawlservpp::Module::Crawler::Database::getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), getUrlLockTime(), crawlservpp::Module::Crawler::Database::getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), getUrlPosition(), crawlservpp::Module::Crawler::Database::isArchivedContentExists(), crawlservpp::Module::Crawler::Database::isUrlCrawled(), crawlservpp::Module::Crawler::Database::lockUrlIfOk(), crawlservpp::Module::Crawler::Database::prepare(), crawlservpp::Module::Parser::Database::prepare(), prepare(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), renewUrlLockIfOk(), crawlservpp::Module::Crawler::Database::saveArchivedContent(), crawlservpp::Module::Crawler::Database::saveContent(), crawlservpp::Module::Crawler::Database::setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), setUrlsFinishedIfLockOk(), crawlservpp::Module::Crawler::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), updateOrAddEntries(), updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), updateTargetTable(), crawlservpp::Module::Crawler::Database::urlDuplicationCheck(), crawlservpp::Module::Crawler::Database::urlEmptyCheck(), and crawlservpp::Module::Crawler::Database::urlHashCheck().

◆ checkExtractingTable()

std::uint32_t crawlservpp::Module::Extractor::Database::checkExtractingTable ( )

Checks the extracting table.

Deletes duplicate URL locks.

Returns
The number of duplicate URL locks that have been deleted. Zero, if no duplicate locks have been found.
Exceptions
Module::Extractor::Database::Exceptionif the prepared SQL statement for checking the table is missing.
Main::Database::Exceptionif a MySQL error occured while checking the table.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ clearPreparedStatement()

void crawlservpp::Wrapper::Database::clearPreparedStatement ( std::size_t &  id)
inlineprotectedinherited

Clears a prepared SQL statement.

Parameters
idReference to the current ID, which will be set to zero after the corresponding prepared SQL statement has been cleared.
Exceptions
Main::Database::Exceptionif a MySQL error occured while clearing the prepared SQL statement.
std::out_of_rangeif id contains zero or an invalid ID.

References crawlservpp::Main::Database::clearPreparedStatement(), and crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Crawler::Database::prepare(), prepare(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), and unLockUrlsIfOk().

◆ compressTable()

void crawlservpp::Wrapper::Database::compressTable ( const std::string &  tableName)
inlineprotectedinherited

Compresses a table in the database.

The function will have no effect om the table, if the table is already compressed.

Parameters
tableNameConstant reference to a string containing the name of the table to be compressed.
Exceptions
Main::Database::Exceptionif no table is specified, i.e. if the string containing the name of the table is empty, or if a row format could not be determined.
Main::Database::Exceptionif a MySQL error occured while compressing the table in the database.

References crawlservpp::Main::Database::compressTable(), and crawlservpp::Wrapper::Database::database.

◆ createTable()

void crawlservpp::Wrapper::Database::createTable ( const TableProperties properties)
inlineprotectedinherited

Adds a table to the database.

Note
A column for the primary key named id will be created automatically.
Parameters
propertiesConstant reference to a structure containing the properties of the table to be created.
Exceptions
Main::Database::Exceptionif no name or columns are specified in the given properties structure, if one of the columns defined there is missing its name or data type, or if a column reference is incomplete.
Main::Database::Exceptionif a MySQL error occured while adding the table to the database.
See also
Struct::TableProperties

References crawlservpp::Main::Database::createTable(), and crawlservpp::Wrapper::Database::database.

◆ deleteTargetTable()

void crawlservpp::Wrapper::Database::deleteTargetTable ( const std::string &  type,
std::uint64_t  tableId 
)
inlineinherited

Deletes a target table from the database.

Parameters
typeConstant reference to a string containing the type of the target table to be deleted
tableIdThe ID of the target table to be deleted.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while deleting the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::deleteTargetTable().

◆ dropTable()

void crawlservpp::Wrapper::Database::dropTable ( const std::string &  tableName)
inlineprotectedinherited

Deletes a table from the database.

If the table does not exist in the database, the database will not be changed.

Parameters
tableNameConstant reference to a string containing the name of the table to be deleted, if it exists.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. if the string containing the name of the table is empty.
Main::Database::Exceptionif a MySQL error occured while removing the table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::dropTable().

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), and crawlservpp::Module::Analyzer::Database::initTargetTable().

◆ endNoLock()

void crawlservpp::Wrapper::Database::endNoLock ( )
inlineinherited

Re-enables database locking by ending the previous SQL transaction.

Exceptions
Main::Database::Exceptionif a MySQL error occured while ending the previous SQL transaction by committing the changes to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::endNoLock().

Referenced by crawlservpp::Module::Crawler::Database::getNumberOfUrls(), and crawlservpp::Module::Crawler::Database::getUrlPosition().

◆ fetchUrls()

std::string crawlservpp::Module::Extractor::Database::fetchUrls ( std::uint64_t  lastId,
std::queue< IdString > &  cache,
std::uint32_t  lockTimeout 
)

Fetches, locks, and adds the next URLs to the cache, i.e. to the caching queue to be processed.

Parameters
lastIdThe last ID that has been processed, or zero if non has been processed yet.
cacheReference to the caching queue, i.e. the queue storing the IDs and URIs of the URLs still in the cache.
lockTimeoutThe maximum locking time for the URLs that are being processed, in seconds.
Returns
The expiration time of the new lock for the URLs in the cache, as string in the format YYYY-MM-DD HH:MM:SS.
Exceptions
Module::Extractor::Database::Exceptionif one of the prepared SQL statements for fetching and locking URLs is missing.
Main::Database::Exceptionif a MySQL error occured while fetching and locking the URLs.

References crawlservpp::Wrapper::Database::checkConnection(), getLockTime(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::nAtOnce10, crawlservpp::Module::Extractor::nAtOnce100, crawlservpp::Module::Extractor::numArgsLockUrl, crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Module::Extractor::sqlArg3, crawlservpp::Wrapper::Database::sqlException(), crawlservpp::Wrapper::Database::sqlExecute(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ getColumnType()

std::string crawlservpp::Wrapper::Database::getColumnType ( const std::string &  tableName,
const std::string &  columnName 
)
inlineinherited

Gets the type of a specific table column from the database.

Parameters
tableNameConstant reference to a string containing the name of the table in the database from which the type of the column will be retrieved.
columnNameConstant reference to a string containing the name of the column whose type will be retrieved.
Returns
A copy of the name of the given table column's data type, without specifiers like 'UNSIGNED'.
Exceptions
Main::Database::Exceptionif no table or column has been specified, i.e. one of the strings containing the name and the column is empty.
Main::Database::Exceptionif a MySQL error occured while retrieving the type of the given column, e.g. if the specified table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getColumnType().

Referenced by crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ getConfiguration()

std::string crawlservpp::Wrapper::Database::getConfiguration ( std::uint64_t  configId)
inlineinherited

Gets a configuration from the database.

Parameters
configIdThe ID of the configuration to be retrieved from the database.
Returns
A copy of the configuration's JSON string as stored in the database, or an empty string if the given configuration does not exist in the database.
Exceptions
Main::Database::Exceptionif no configuration has been specified, i.e. the configuration ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the configuration from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getConfiguration().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ getContent()

bool crawlservpp::Module::Extractor::Database::getContent ( std::uint64_t  urlId,
IdString &  contentTo 
)

Gets the latest content stored in the database for a specific URL.

Parameters
urlIdID of the URL whose latest content will be retrieved from the database.
contentToReference to a pair, to which the content and its ID will be written.
Returns
True, if the requested content for the given URL has been retrieved, even when it is empty. False, if no content has been stored for the URL in the database.
Exceptions
Module::Extractor::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for retrieving the latest content for a URL from the database is missing.
Main::Database::Exceptionif a MySQL error occured while trying to retrieve the content from the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ getCustomData() [1/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetValue data)
inlineinherited

Gets a custom value from one column from a table row in the database.

Parameters
dataReference to the data structure that identifies the column, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column name or no column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetValue

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), and crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ getCustomData() [2/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetFields data)
inlineinherited

Gets custom values from multiple columns of the same type from a table row.

Parameters
dataReference to the data structure that identifies the columns, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column names or no column type are specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [3/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetFieldsMixed data)
inlineinherited

Gets custom values from multiple columns of different types from a table row.

Parameters
dataReference to the data structure that identifies the columns and their types, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no columns are specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [4/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumn data)
inlineinherited

Gets custom values from a table column in the database.

Parameters
dataReference to the data structure that identifies the column, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column or column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumn

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [5/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumns data)
inlineinherited

Gets custom values from multiple table columns of the same type.

Parameters
dataReference to the data structure that identifies the columns, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no column or column type is specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumns

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getCustomData() [6/6]

void crawlservpp::Wrapper::Database::getCustomData ( Data::GetColumnsMixed data)
inlineinherited

Gets custom values from multiple table columns of different types.

Parameters
dataReference to the data structure that identifies the columns and their types, and to which the result will be written.
Exceptions
Main::Database::Exceptionif no columns have been specified in the given data structure, or if an invalid data type has been encountered.
Main::Database::Exceptionif a MySQL error occured while retrieving the data.
See also
Data::GetColumnsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getCustomData().

◆ getLastInsertedId()

std::uint64_t crawlservpp::Wrapper::Database::getLastInsertedId ( )
inlineprotectedinherited

Gets the last inserted ID from the database.

Returns
The last inserted ID from the database.
Exceptions
Main::Database::Exceptionif the prepared SQL statement for retrieving the last inserted ID from the database is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the last inserted ID from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getLastInsertedId().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources().

◆ getLatestParsedData()

void crawlservpp::Module::Extractor::Database::getLatestParsedData ( std::uint64_t  urlId,
std::size_t  sourceIndex,
std::string &  resultTo 
)

Gets parsed data from the given source stored in the database for a specific URL.

Note
The source index is determined by the order in which the sources have been added, starting at zero.
Parameters
urlIdID of the URL whose parsed data will be retrieved from the database.
sourceIndexZero-based index of the source from which to retrieve the data, as specified in the tables and columns passed to setSources().
resultToReference to a string, to which the retrieved data will be written. Will be left unchanged, if the specified data has not been found.
Exceptions
Module::Extractor::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for retrieving parsed data for the given URL from the specified source is missing, e.g. if the source index is invalid.
Main::Database::Exceptionif a MySQL error occured while trying to retrieve the data from the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ getLockTime()

std::string crawlservpp::Module::Extractor::Database::getLockTime ( std::uint32_t  lockTimeout)

Gets the current URL lock expiration time from the database.

The database calculates the lock expiration time based on the given local maximum locking time.

Parameters
lockTimeoutThe maximum URL locking time, in seconds.
Returns
The current URL lock expiration time, as string in the format YYYY-MM-DD HH:MM:SS.
Exceptions
Module::Extractor::Database::Exceptionif the prepared SQL statement for calculating the URL lock expiration time is missing.
Main::Database::Exceptionif a MySQL error occured while calculating the current URL lock expiration time.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by fetchUrls(), and renewUrlLockIfOk().

◆ getLoggingMin()

◆ getLoggingVerbose()

std::uint8_t crawlservpp::Wrapper::Database::getLoggingVerbose ( ) const
inlineprotectedinherited

Gets the level for verbose logging.

Returns
The logging level, in which verbose logging is activated.

References crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Crawler::Database::prepare(), crawlservpp::Module::Parser::Database::prepare(), prepare(), and crawlservpp::Module::Analyzer::Database::prepare().

◆ getMaxAllowedPacketSize()

std::uint64_t crawlservpp::Wrapper::Database::getMaxAllowedPacketSize ( ) const
inlineprotectedinherited

Gets the maximum allowed packet size for communicating with the MySQL server.

Returns
The maximum allowed packet size for communicating with the MySQL server, in bytes, or zero if not connected to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getMaxAllowedPacketSize().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Crawler::Database::saveArchivedContent(), crawlservpp::Module::Crawler::Database::saveContent(), crawlservpp::Module::Parser::Database::updateTargetTable(), and updateTargetTable().

◆ getNumberOfUrls()

std::uint64_t crawlservpp::Module::Extractor::Database::getNumberOfUrls ( )

Gets the number of URLs in the URL list.

Returns
The number of URLs in the current URL list, or zero if the URL list is empty.
Exceptions
Module::Extractor::Database::Exceptionif the prepared SQL statement for retrieving the number of URLs in the URL list is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the number of URLs in the URL list.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ getOptions()

◆ getPreparedStatement()

sql::PreparedStatement & crawlservpp::Wrapper::Database::getPreparedStatement ( std::size_t  id)
inlineprotectedinherited

Gets a reference to a prepared SQL statement.

Warning
Do not run checkConnection while using this reference, because the references will be invalidated when reconnecting to the database!
Parameters
idThe ID of the prepared SQL statement to retrieve.
Returns
A reference to the prepared SQL statement.
Exceptions
Main::Database::Exceptionif a MySQL error occured while retrieving the prepared SQL statement.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getPreparedStatement().

Referenced by crawlservpp::Module::Crawler::Database::addUrlIfNotExists(), crawlservpp::Module::Crawler::Database::addUrlsIfNotExist(), checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), getLockTime(), crawlservpp::Module::Crawler::Database::getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), crawlservpp::Module::Crawler::Database::getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), crawlservpp::Module::Crawler::Database::getUrlId(), crawlservpp::Module::Crawler::Database::getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), getUrlLockTime(), crawlservpp::Module::Crawler::Database::getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), getUrlPosition(), crawlservpp::Module::Crawler::Database::isArchivedContentExists(), crawlservpp::Module::Crawler::Database::isUrlCrawled(), crawlservpp::Module::Crawler::Database::lockUrlIfOk(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), renewUrlLockIfOk(), crawlservpp::Module::Crawler::Database::saveArchivedContent(), crawlservpp::Module::Crawler::Database::saveContent(), crawlservpp::Module::Crawler::Database::setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), setUrlsFinishedIfLockOk(), crawlservpp::Module::Crawler::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), updateOrAddEntries(), updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), updateTargetTable(), crawlservpp::Module::Crawler::Database::urlDuplicationCheck(), crawlservpp::Module::Crawler::Database::urlEmptyCheck(), and crawlservpp::Module::Crawler::Database::urlHashCheck().

◆ getQueryProperties()

void crawlservpp::Wrapper::Database::getQueryProperties ( std::uint64_t  queryId,
QueryProperties queryPropertiesTo 
)
inlineinherited

Gets the properties of a query from the database.

Parameters
queryIdThe ID of the query for which the properties will be retrieved from the database.
queryPropertiesToReference to the structure to which the retrieved properties of the query will be written.
Exceptions
Main::Database::Exceptionif no query ID has been specfied, i.e. the query ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the properties of the given query from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getQueryProperties().

Referenced by crawlservpp::Module::Analyzer::Thread::addOptionalQuery(), crawlservpp::Module::Analyzer::Thread::addQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ getRequestCounter()

std::uint64_t crawlservpp::Wrapper::Database::getRequestCounter ( )
inlinestaticinherited

Gets the number of SQL requests performed since the start of the application.

Note
By default, the request counter should be deactivated and the function always return zero.
Returns
The number of SQL requests performed since the start of the application or zero, if the request counter had not been activated on compilation.

References crawlservpp::Main::Database::getRequestCounter().

Referenced by crawlservpp::Main::Database::sqlExecute(), crawlservpp::Main::Database::sqlExecuteQuery(), and crawlservpp::Main::Database::sqlExecuteUpdate().

◆ getTargetTableId()

std::uint64_t crawlservpp::Wrapper::Database::getTargetTableId ( const std::string &  type,
std::uint64_t  listId,
const std::string &  tableName 
)
inlineinherited

Gets the ID of a target table from the database.

Parameters
typeConstant reference to a string containing the type of the target table for which to retrieve its ID.
listIdThe ID of the URL list associated with the target table for which to retrieve its ID.
tableNameConst reference to a string containing the name of the target table for which to retrieve its ID.
Returns
The ID of the specified target table as stored in the database.
Exceptions
Main::Database::Exceptionif no target table or URL list has been specified, i.e. if the string containing the type is empty, or the target table or the URL list ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the ID of the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTableId().

◆ getTargetTableName()

std::string crawlservpp::Wrapper::Database::getTargetTableName ( const std::string &  type,
std::uint64_t  tableId 
)
inlineinherited

Gets the name of a target table from the database.

Parameters
typeString view containing the type of the target table for which to retrieve its name.
tableIdThe ID of the target table for which to retrieve its name.
Returns
A copy of the name of the specified target table as stored in the database.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the name of the target table from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTableName().

◆ getTargetTables()

std::queue< Database::IdString > crawlservpp::Wrapper::Database::getTargetTables ( const std::string &  type,
std::uint64_t  listId 
)
inlineinherited

Gets the target tables of the specified type for a URL list from the database.

Parameters
typeConstant reference to a string containing the type of the target tables to retrieve.
listIdThe ID of the URL list for which to retrieve the target tables.
Returns
A queue containing the IDs and names of the target tables of the given type for the specified URL list.
Exceptions
Main::Database::Exceptionif no target table has been specified, i.e. the string containing the type is empty or the target table ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the target tables from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getTargetTables().

◆ getUrlListIdString()

const std::string & crawlservpp::Wrapper::Database::getUrlListIdString ( ) const
inlineprotectedinherited

Gets the ID of the URL list used by the thread as string.

Returns
A reference to the string containing the ID of the URL list used by the thread.

References crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::prepare().

◆ getUrlLockTime()

std::string crawlservpp::Module::Extractor::Database::getUrlLockTime ( std::uint64_t  urlId)

Gets the current lock expiration time for a URL from the database.

Parameters
urlIdID of the URL whose current lock expiration time will be retrieved from the database.
Returns
The current lock expiration time of the given URL, as string in the format YYYY-MM-DD HH:MM:SS, or an empty string, if no URL is given, or the URL has not been locked.
Exceptions
Module::Extractor::Database::Exceptionif the prepared SQL statement for retrieving the current lock expiration time of a URL is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the current lock expiration time for the given URL.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

◆ getUrlPosition()

std::uint64_t crawlservpp::Module::Extractor::Database::getUrlPosition ( std::uint64_t  urlId)

Gets the position of a URL in the URL list.

Parameters
urlIdThe ID of the URL whose position will be retrieved from the database.
Returns
The position of the URL in the URL list, starting with zero for the beginning of the list.
Exceptions
Module::Extractor::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for retrieving the position of a URL in the URL list is missing.
Main::Database::Exceptionif a MySQL error occured while retrieving the position of the URL in the URL list.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ getWebsiteDomain()

std::string crawlservpp::Wrapper::Database::getWebsiteDomain ( std::uint64_t  websiteId)
inlineinherited

Gets the domain of a website from the database.

Parameters
websiteIdThe ID of the website for which the domain will be retrieved from the database.
Returns
A copy of the domain name of the given website.
Exceptions
Main::Database::Exceptionif no website has been specified, i.e. the website ID is zero.
Main::Database::Exceptionif a MySQL error occured while retrieving the domain name of the given website from the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::getWebsiteDomain().

Referenced by crawlservpp::Module::Crawler::Thread::onReset().

◆ getWebsiteIdString()

const std::string & crawlservpp::Wrapper::Database::getWebsiteIdString ( ) const
inlineprotectedinherited

Gets the ID of the website used by the thread as string.

Returns
A reference to the string containing the ID of the website used by the thread.

References crawlservpp::Wrapper::Database::database.

Referenced by crawlservpp::Module::Analyzer::Database::prepare().

◆ initTargetTables()

void crawlservpp::Module::Extractor::Database::initTargetTables ( )

Creates the target table, if it does not exist, or adds target columns needed by the extractor.

If the target table does not exist, it will be created. If the target table exists, those target columns, that it does not contain already, will be added to the existing table.

If necessary, the linked table will also be created, or updated.

Exceptions
Module::Extractor::Exceptionif the column used to link data to the target table does not exist.
Main::Database::Exceptionif a MySQL error occured while adding the new target table, updating the existing target table, adding the new linked table, or updating the existing linked table in the database.
See also
setTargetTable, setTargetFields, setLinkedTable, setLinkedFields, Main::Database::addTargetTable

References crawlservpp::Wrapper::Database::addOrUpdateTargetTable(), crawlservpp::Struct::TargetTableProperties::columns, crawlservpp::Wrapper::Database::getOptions(), crawlservpp::Module::Extractor::minLinkedColumns, and crawlservpp::Module::Extractor::minTargetColumns.

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ insertCustomData() [1/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertValue data)
inlineinherited

◆ insertCustomData() [2/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertFields data)
inlineinherited

Inserts custom values into multiple table columns of the same type.

Parameters
dataConstant reference to a structure containing the data to be inserted.
Exceptions
Main::Database::Exceptionif no table, columns, or column type have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while inserting the data.
See also
Data::InsertFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::insertCustomData().

◆ insertCustomData() [3/3]

void crawlservpp::Wrapper::Database::insertCustomData ( const Data::InsertFieldsMixed data)
inlineinherited

Inserts custom values into multiple table columns of different types.

Parameters
dataConstant reference to a structure containing the data to be inserted.
Exceptions
Main::Database::Exceptionif no table or columns have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while inserting the data.
See also
Data::InsertFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::insertCustomData().

◆ isColumnExists()

bool crawlservpp::Wrapper::Database::isColumnExists ( const std::string &  tableName,
const std::string &  columnName 
)
inlineinherited

Checks whether a table in the database contains a specific column.

Parameters
tableNameConstant reference to a string containing the name of the table in the database in which the existence of the column will be checked.
columnNameConstant reference to a string containing the name of the column to be checked for in the given table.
Returns
True, if the given column exists in the specified table. False otherwise.
Exceptions
Main::Database::Exceptionif no table or columns have been specified, i.e. one of the strings containing the name and the column is empty.
Main::Database::Exceptionif a MySQL error occured while checking the existence of the given column, e.g. if the specified table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isColumnExists().

Referenced by crawlservpp::Wrapper::Database::addTargetColumn(), and crawlservpp::Module::Analyzer::Thread::uploadResult().

◆ isTableEmpty()

bool crawlservpp::Wrapper::Database::isTableEmpty ( const std::string &  tableName)
inlineinherited

Checks whether a table in the database is empty.

Parameters
tableNameConstant reference to a string containing the name of the table whose contents will be checked in the database.
Returns
True, if the given table is empty. False if it contains data.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. the string containing the name is empty.
Main::Database::Exceptionif a MySQL error occured while checking the content of the given table in the database, e.g. if the table does not exist.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isTableEmpty().

◆ isTableExists()

bool crawlservpp::Wrapper::Database::isTableExists ( const std::string &  tableName)
inlineinherited

Checks whether a table exists in the database.

Parameters
tableNameConstant reference to a string containing the name of the table whose existence in the database will be checked.
Returns
True, if the given table exists in the database. False otherwise.
Exceptions
Main::Database::Exceptionif no table has been specified, i.e. the string containing the name is empty.
Main::Database::Exceptionif a MySQL error occured while checking the existence of the given table in the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::isTableExists().

◆ log() [1/2]

void crawlservpp::Wrapper::Database::log ( std::uint8_t  level,
const std::string &  logEntry 
)
inlineinherited

Writes a thread-specific log entry to the database.

Removes invalid UTF-8 characters if necessary.

If debug logging is active, the entry will be written to the logging file as well.

The log entry will not be written to the database, if the current logging level is lower than the specified logging level. The logging level does not affect the writing of logging entries being to the logging file when debug logging is active.

Note
String views cannot be used, because they are not supported by the API for the MySQL database.
Parameters
levelThe logging level for the entry. The entry will only be written to the database, if the current logging level is at least the logging level for the entry.
logEntryConstant reference to a string containing the log entry.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::log().

Referenced by crawlservpp::Module::Analyzer::Database::addAdditionalTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Analyzer::Database::getCorpus(), crawlservpp::Module::Analyzer::Database::initTargetTable(), crawlservpp::Module::Crawler::Database::prepare(), crawlservpp::Module::Parser::Database::prepare(), prepare(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Crawler::Database::saveArchivedContent(), crawlservpp::Module::Crawler::Database::saveContent(), crawlservpp::Module::Parser::Database::setTargetTable(), setTargetTable(), crawlservpp::Module::Analyzer::Database::setTargetTable(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), updateTargetTable(), and crawlservpp::Module::Crawler::Database::urlHashCheck().

◆ log() [2/2]

void crawlservpp::Wrapper::Database::log ( std::uint8_t  level,
std::queue< std::string > &  logEntries 
)
inlineinherited

Writes multiple thread-specific log entries to the database.

Removes invalid UTF-8 characters if necessary.

If debug logging is active, the entries will be written to the logging file as well.

The log entries will not be written to the database, if the current logging level is lower than the specified logging level. The logging level does not affect the writing of logging entries being to the logging file when debug logging is active.

Note
String views cannot be used, because they are not supported by the API for the MySQL database.
Parameters
levelThe logging level for the entries. The entries will only be written to the database, if the current logging level is at least the logging level for the entry.
logEntriesReference to a queue of strings containing the log entries to be written. It will be emptied regardless whether the log entries will be written to the database.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::log().

◆ prepare()

◆ removeDatabaseLock()

void crawlservpp::Wrapper::Database::removeDatabaseLock ( const std::string &  name)
inlinestaticprotectedinherited

Removes a lock from the database class.

Does nothing if a lock with the given name does not exist in the database class.

Parameters
nameConstant reference to a string containing the name of the lock to be removed from the database class.

References crawlservpp::Main::Database::removeDatabaseLock().

◆ renewUrlLockIfOk()

std::string crawlservpp::Module::Extractor::Database::renewUrlLockIfOk ( std::uint64_t  urlId,
const std::string &  lockTime,
std::uint32_t  lockTimeout 
)

Locks a URL in the database, if it is lockable, or extends its locking time, if it is still locked by the extractor.

Parameters
urlIdID of the URL that will be locked, or whose locking time will be extended.
lockTimeThe expiration time of the previous lock held over the given URL by the current thread.
lockTimeoutThe maximum URL locking time, in seconds.
Returns
The new expiration time of the lock, as string in the format YYYY-MM-DD HH:MM:SS, or an empty string, if the URL could not be locked, because it is currently locked by another thread.
Exceptions
Module::Extractor::Database::Exceptionif no URL has been specified, i.e. the given URL ID is zero, or if the prepared SQL statement for locking a URL is missing.
Main::Database::Exceptionif a MySQL error occured while locking the URL, or renewing its URL lock.

References crawlservpp::Wrapper::Database::checkConnection(), getLockTime(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Module::Extractor::sqlArg3, crawlservpp::Module::Extractor::sqlArg4, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Extractor::Thread::onTick().

◆ reserveForPreparedStatements()

void crawlservpp::Wrapper::Database::reserveForPreparedStatements ( std::size_t  n)
inlineprotectedinherited

Reserves memory for a specific number of additional prepared SQL statements.

Parameters
nNumber of prepared SQL statements for which memory should be reserved.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::reserveForPreparedStatements().

Referenced by crawlservpp::Module::Crawler::Database::prepare(), crawlservpp::Module::Parser::Database::prepare(), prepare(), and crawlservpp::Module::Analyzer::Database::prepare().

◆ setCacheSize()

void crawlservpp::Module::Extractor::Database::setCacheSize ( std::uint64_t  setCacheSize)

Sets the maximum cache size for URLs.

Note
Needs to be set before preparing the SQL statements for the extractor.
Parameters
setCacheSizeThe maximum number of URLs that can be cached.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setExtractCustom()

void crawlservpp::Module::Extractor::Database::setExtractCustom ( bool  isExtractCustom)

Sets whether to extract data from custom URLs.

Note
Needs to be set before preparing the SQL statements for the extractor.
Parameters
isExtractCustomSet to true, and data will be extracted from custom URLs as well.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setLinkedField()

void crawlservpp::Module::Extractor::Database::setLinkedField ( const std::string &  field)

Sets the mname of the linked field.

The name of the linked field must exist in the target table.

Note
Needs to be set before initializing the target table.
Parameters
fieldConstant reference to a string containing the name of the extracted field to link to the linked data.
See also
setLinkedTable

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setLinkedFields()

void crawlservpp::Module::Extractor::Database::setLinkedFields ( const std::vector< std::string > &  fields)

Sets the columns of the linked table.

Note
Needs to be set before initializing the target table.
Parameters
fieldsConstant reference to a vector containing the names of the columns to which the linked data will be written.
See also
setLinkedTable, initTargetTable

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setLinkedTable()

void crawlservpp::Module::Extractor::Database::setLinkedTable ( const std::string &  table)

Sets the name of the linked table.

Note
Needs to be set before initializing the target table.
Parameters
tableConstant reference to a string containing the name of the table to which the linked data will be written.
See also
setLinkedFields, initTargetTable

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setLogging()

void crawlservpp::Wrapper::Database::setLogging ( std::uint8_t  level,
std::uint8_t  min,
std::uint8_t  verbose 
)
inlineinherited

Sets the current, minimal, and verbose logging levels.

Initializes debug logging via logging file if necessary.

Parameters
levelThe current logging level.
minThe minimum logging level.
verboseThe verbose logging level.
Exceptions
Module::Database::Exceptionif the logging file could not be opened for writing.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Module::Database::setLogging().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ setMaxBatchSize()

void crawlservpp::Module::Extractor::Database::setMaxBatchSize ( std::uint16_t  setMaxBatchSize)

Sets the maximum number of URLs and results to be processed at once.

Parameters
setMaxBatchSizeThe maximum number of URLs and results that will be processed in one MySQL query.

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setOverwrite()

void crawlservpp::Module::Extractor::Database::setOverwrite ( bool  isOverwrite)

Sets whether existing datasets with the same ID will be overwritten.

Note
Needs to be set before initializing the target table.
Parameters
isOverwriteSet to true, and datasets with the same ID will be overwritten.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setOverwriteLinked()

void crawlservpp::Module::Extractor::Database::setOverwriteLinked ( bool  isOverwrite)

Sets whether existing linked datasets with the same ID will be overwritten.

Note
Needs to be set before initializing the target table.
Parameters
isOverwriteSet to true, and linked datasets with the same ID will be overwritten.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setRawContentIsSource()

void crawlservpp::Module::Extractor::Database::setRawContentIsSource ( bool  isRawContentIsSource)

Sets whether raw crawled data is used as source for the data to be extracted.

Note
Needs to be set before preparing the SQL statements for the extractor.
Parameters
isRawContentIsSourceSet to true, if raw crawled data will be the source of the extracted data.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setReExtract()

void crawlservpp::Module::Extractor::Database::setReExtract ( bool  isReExtract)

Sets whether to re-extract data from already processed URLs.

Note
Needs to be set before preparing the SQL statements for the extractor.
Parameters
isReExtractSet to true, and data from already processed URLs will be re-extracted.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setSleepOnError()

void crawlservpp::Wrapper::Database::setSleepOnError ( std::uint64_t  seconds)
inlineinherited

Sets the number of seconds to sleep before trying to reconnect after connection loss.

Parameters
secondsThe number of seconds to wait before trying to reconnect to the MySQL server after the connection got lost.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setSleepOnError().

Referenced by crawlservpp::Module::Analyzer::Thread::cleanUpQueries(), crawlservpp::Module::Parser::Thread::onReset(), crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Crawler::Thread::onReset().

◆ setSources()

void crawlservpp::Module::Extractor::Database::setSources ( std::queue< StringString > &  tablesAndColumns)

Sets the tables and columns of the parsed data sources.

Note
Need to be set before preparing the SQL statements for the extractor.
Warning
Uses std::queue::swap() – do not use the argument after the call!
Parameters
tablesAndColumnsReference to a queue containing the tables and columns to be used as sources for the parsed data. Will be invalidated by the call.
See also
prepare

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setTargetFields()

void crawlservpp::Module::Extractor::Database::setTargetFields ( const std::vector< std::string > &  fields)

Sets the columns of the target table.

Note
Needs to be set before initializing the target table.
Parameters
fieldsConstant reference to a vector containing the names of the columns to which the extracted data will be written.
See also
setTargetTable, initTargetTable

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setTargetTable()

void crawlservpp::Module::Extractor::Database::setTargetTable ( const std::string &  table)

Sets the name of the target table.

Note
Needs to be set before initializing the target table.
Parameters
tableConstant reference to a string containing the name of the table to which the extracted data will be written.
See also
setTargetFields, initTargetTable

References crawlservpp::Wrapper::Database::getLoggingMin(), and crawlservpp::Wrapper::Database::log().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ setTimeOut()

void crawlservpp::Wrapper::Database::setTimeOut ( std::uint64_t  milliseconds)
inlineinherited

Sets the maximum execution time for MySQL queries, in milliseconds.

Note
The database connection needs to be estanblished before setting the time out.
Parameters
millisecondsThe number of milliseconds for a MySQL query to run before it gets cancelled, or zero to disable the time-out for MySQL queries.
Exceptions
Main::Database::Exceptionif a MySQL error occurs while setting the execution time.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setTimeOut().

Referenced by crawlservpp::Module::Parser::Thread::onReset().

◆ setUrlListCaseSensitive()

void crawlservpp::Wrapper::Database::setUrlListCaseSensitive ( std::uint64_t  listId,
bool  isCaseSensitive 
)
inlineprotectedinherited

Sets whether the specified URL list is case-sensitive.

Warning
The case-sensitivity should not be changed once URLs have been retrieved!
Parameters
listIdThe ID of the URL list whose case-sensitivity will be changed.
isCaseSensitiveSpecify whether URLs in the given URL list will be case-sensitive or not.
Exceptions
Main::Database::Exceptionif no URL list has been specified, i.e. the URL list ID is zero.
Main::Database::Exceptionif a MySQL error occured while setting the case-sensitivity of the URL list.

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::setUrlListCaseSensitive().

Referenced by crawlservpp::Module::Crawler::Database::setUrlCaseSensitive().

◆ setUrlsFinishedIfLockOk()

void crawlservpp::Module::Extractor::Database::setUrlsFinishedIfLockOk ( std::queue< IdString > &  finished)

Sets URLs to finished in the database, except those locked by another thread.

Skips URLs that have been locked by another thread, and whose lock is still active.

Parameters
finishedReference to a queue of pairs, containing the IDs and URIs of the URLs to be set to finished. If empty, nothing will be done.
Exceptions
Module::Extractor::Database::Exceptionif any of the prepared SQL statements for setting URLs to finished is missing.
Main::Database::Exceptionif a MySQL error occured while setting URLs to finished in the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::nAtOnce10, crawlservpp::Module::Extractor::nAtOnce100, crawlservpp::Module::Extractor::numArgsFinishUrl, crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ sqlException()

void crawlservpp::Wrapper::Database::sqlException ( const std::string &  function,
const sql::SQLException &  e 
)
inlinestaticprotectedinherited

Catches a SQL exception and re-throws it as a specific or a generic Database::Exception.

Note
Always throws an exception.
Parameters
functionConstant reference to a string containing the name of the function in which the exception has been thrown.
eConstant reference to the SQL exception that has been thrown.
Exceptions
Main::Database::ConnectionException
Main::Database::StorageEngineException
Main::Database::PrivilegesException
Main::Database::WrongArgumentsException
Main::Database::IncorrectPathException
Main::Database::Exceptiondepending on the SQL exception thrown.

References crawlservpp::Main::Database::sqlException().

Referenced by crawlservpp::Module::Crawler::Database::addUrlIfNotExists(), crawlservpp::Module::Crawler::Database::addUrlsIfNotExist(), checkExtractingTable(), crawlservpp::Module::Parser::Database::checkParsingTable(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), getLockTime(), crawlservpp::Module::Crawler::Database::getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), crawlservpp::Module::Crawler::Database::getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), crawlservpp::Module::Crawler::Database::getUrlId(), crawlservpp::Module::Crawler::Database::getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), getUrlLockTime(), crawlservpp::Module::Crawler::Database::getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), getUrlPosition(), crawlservpp::Module::Crawler::Database::isArchivedContentExists(), crawlservpp::Module::Crawler::Database::isUrlCrawled(), crawlservpp::Module::Crawler::Database::lockUrlIfOk(), crawlservpp::Module::Analyzer::Database::prepare(), crawlservpp::Module::Parser::Database::renewUrlLockIfOk(), renewUrlLockIfOk(), crawlservpp::Module::Crawler::Database::saveArchivedContent(), crawlservpp::Module::Crawler::Database::saveContent(), crawlservpp::Module::Crawler::Database::setUrlFinishedIfOk(), crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk(), setUrlsFinishedIfLockOk(), crawlservpp::Module::Crawler::Database::unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlIfOk(), unLockUrlIfOk(), crawlservpp::Module::Parser::Database::unLockUrlsIfOk(), unLockUrlsIfOk(), crawlservpp::Module::Analyzer::Database::updateAdditionalTable(), crawlservpp::Module::Parser::Database::updateOrAddEntries(), updateOrAddEntries(), updateOrAddLinked(), crawlservpp::Module::Parser::Database::updateTargetTable(), crawlservpp::Module::Analyzer::Database::updateTargetTable(), updateTargetTable(), crawlservpp::Module::Crawler::Database::urlDuplicationCheck(), crawlservpp::Module::Crawler::Database::urlEmptyCheck(), and crawlservpp::Module::Crawler::Database::urlHashCheck().

◆ sqlExecute()

bool crawlservpp::Wrapper::Database::sqlExecute ( sql::PreparedStatement &  sqlPreparedStatement)
inlinestaticprotectedinherited

◆ sqlExecuteQuery()

sql::ResultSet * crawlservpp::Wrapper::Database::sqlExecuteQuery ( sql::PreparedStatement &  sqlPreparedStatement)
inlinestaticprotectedinherited

Executes a prepared SQL statement and returns the resulting set.

Parameters
sqlPreparedStatementReference to the prepared SQL statement to be executed.
Returns
A pointer to the result set retrieved by executing the prepared SQL statement.

References crawlservpp::Main::Database::sqlExecuteQuery().

Referenced by crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Module::Parser::Database::fetchUrls(), fetchUrls(), crawlservpp::Module::Parser::Database::getAllContents(), getContent(), crawlservpp::Module::Parser::Database::getContentIdFromParsedId(), crawlservpp::Module::Parser::Database::getLatestContent(), getLatestParsedData(), crawlservpp::Module::Parser::Database::getLockTime(), getLockTime(), crawlservpp::Module::Crawler::Database::getNextUrl(), crawlservpp::Module::Parser::Database::getNumberOfContents(), crawlservpp::Module::Crawler::Database::getNumberOfUrls(), crawlservpp::Module::Parser::Database::getNumberOfUrls(), getNumberOfUrls(), crawlservpp::Module::Analyzer::Database::getTargetTableUpdated(), crawlservpp::Module::Crawler::Database::getUrlId(), crawlservpp::Module::Crawler::Database::getUrlLockTime(), crawlservpp::Module::Parser::Database::getUrlLockTime(), getUrlLockTime(), crawlservpp::Module::Crawler::Database::getUrlPosition(), crawlservpp::Module::Parser::Database::getUrlPosition(), getUrlPosition(), crawlservpp::Module::Crawler::Database::isArchivedContentExists(), crawlservpp::Module::Crawler::Database::isUrlCrawled(), crawlservpp::Module::Crawler::Database::urlDuplicationCheck(), crawlservpp::Module::Crawler::Database::urlEmptyCheck(), and crawlservpp::Module::Crawler::Database::urlHashCheck().

◆ sqlExecuteUpdate()

◆ tryDatabaseLock()

bool crawlservpp::Wrapper::Database::tryDatabaseLock ( const std::string &  name)
inlinestaticprotectedinherited

Tries to add a lock to the database class, not blocking execution.

If a lock with the same name already exists, the function will not add a lock and return false instead.

Parameters
nameConstant reference to a string containing the name of the lock to be added to the database class if a lock with the same name does not exist already.
Returns
True, if a lock with the same name did not exist already and the lock has been added. False, if a lock with the same name already exists and no lock has been added.

References crawlservpp::Main::Database::tryDatabaseLock().

◆ unLockUrlIfOk()

bool crawlservpp::Module::Extractor::Database::unLockUrlIfOk ( std::uint64_t  urlId,
const std::string &  lockTime 
)

Unlocks a URL in the database.

Parameters
urlIdID of the URL that will be unlocked, if its lock is still active and held by the current thread.
lockTimeThe expiration time of the lock held over the given URL by the current thread.
Returns
True, if the unlocking was successful, or no URL has been given. False, if the URL could not be unlocked, because its lock has expired and it has already been locked by another thread.
Exceptions
Module::Extractor::Database::Exceptionif the prepared SQL statement for unlocking a URL is missing.
Main::Database::Exceptionif a MySQL error occured while trying to unlock the URL.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecuteUpdate().

Referenced by crawlservpp::Module::Extractor::Thread::onReset(), and crawlservpp::Module::Extractor::Thread::onTick().

◆ unLockUrlsIfOk()

void crawlservpp::Module::Extractor::Database::unLockUrlsIfOk ( std::queue< IdString > &  urls,
std::string &  lockTime 
)

Unlocks multiple URLs in the database at once.

Note
The SQL statements needed for unlocking the URLs will only be created shortly before query execution, as it should only be used during shutdown or reset of the extractor. During normal operations, URLs are unlocked as they are processed — one by one.
Parameters
urlsReference to a queue containing IDs and URIs of the URLs to unlock. It will be cleared while trying to unlock the URLs, even if some or all of the URLs could not be unlocked, because their lock has expired and they have already been locked by another thread. If empty, nothing will be done.
lockTimeThe expiration time of the lock held over the given URLs by the current thread.
Exceptions
Main::Database::Exceptionif a MySQL error occured while trying to unlock the URLs.

References crawlservpp::Wrapper::Database::addPreparedStatement(), crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Wrapper::Database::clearPreparedStatement(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Wrapper::Database::sqlException(), and crawlservpp::Wrapper::Database::sqlExecute().

Referenced by crawlservpp::Module::Extractor::Thread::onClear(), and crawlservpp::Module::Extractor::Thread::onTick().

◆ updateCustomData() [1/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateValue data)
inlineinherited

Updates a custom value in a table row.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no table, columns, or column type have been specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateValue

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ updateCustomData() [2/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateFields data)
inlineinherited

Updates custom values in multiple table columns of the same type.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no columns or no column type are specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateFields

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ updateCustomData() [3/3]

void crawlservpp::Wrapper::Database::updateCustomData ( const Data::UpdateFieldsMixed data)
inlineinherited

Updates custom values in multiple table columns of different types.

Parameters
dataConstant reference to a structure containing the data to be updated.
Exceptions
Main::Database::Exceptionif no columns are specified in the given data structure, if the given data is too large, or if an invalid data has been encountered.
Main::Database::Exceptionif a MySQL error occured while updating the data.
See also
Data::UpdateFieldsMixed

References crawlservpp::Wrapper::Database::database, and crawlservpp::Main::Database::updateCustomData().

◆ updateOrAddEntries()

void crawlservpp::Module::Extractor::Database::updateOrAddEntries ( std::queue< DataEntry > &  entries,
StatusSetter statusSetter 
)

Adds extracted data to the database, or updates data that already exists.

Only updates data, if the extractor is set to overwrite existing data via setOverwrite().

Parameters
entriesReference to a queue containing the data to add. If empty, nothing will be done. The queue will be emptied as the data will be processed, even when some or all of the data has not been added or updated, because it already exists and the extractor has been set not to overwrite data via setOverwrite().
statusSetterData needed to keep the status of the thread updated.
Exceptions
Module::Extractor::Database::Exceptionif any of the prepared SQL statements for adding and updating extracted data is missing.
Main::Database::Exceptionif a MySQL error occured while adding or updating the extracted data in the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Struct::StatusSetter::finish(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::minTargetColumns, crawlservpp::Module::Extractor::nAtOnce10, crawlservpp::Module::Extractor::nAtOnce100, crawlservpp::Module::Extractor::numArgsAddUpdateData, crawlservpp::Module::Extractor::numArgsLinked, crawlservpp::Module::Extractor::numArgsOverwriteData, crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Module::Extractor::sqlArg3, crawlservpp::Module::Extractor::sqlArg4, crawlservpp::Wrapper::Database::sqlException(), crawlservpp::Wrapper::Database::sqlExecute(), and crawlservpp::Struct::StatusSetter::update().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ updateOrAddLinked()

void crawlservpp::Module::Extractor::Database::updateOrAddLinked ( std::queue< DataEntry > &  entries,
StatusSetter statusSetter 
)

Adds linked data to the database, or updates data that already exists.

Only updates data, if the extractor is set to overwrite existing linked data via setOverwriteLinked().

Parameters
entriesReference to a queue containing the data to add. If empty, nothing will be done. The queue will be emptied as the data will be processed, even when some or all of the data has not been added or updated, because it already exists and the extractor has been set not to overwrite data via setOverwriteLinked().
statusSetterData needed to keep the status of the thread updated.
Exceptions
Module::Extractor::Database::Exceptionif any of the prepared SQL statements for adding and updating linked data is missing.
Main::Database::Exceptionif a MySQL error occured while adding or updating the linked data in the database.

References crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Struct::StatusSetter::finish(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Module::Extractor::minLinkedColumns, crawlservpp::Module::Extractor::nAtOnce10, crawlservpp::Module::Extractor::nAtOnce100, crawlservpp::Module::Extractor::numArgsAddUpdateLinkedData, crawlservpp::Module::Extractor::numArgsOverwriteLinkedData, crawlservpp::Module::Extractor::sqlArg1, crawlservpp::Module::Extractor::sqlArg2, crawlservpp::Wrapper::Database::sqlException(), crawlservpp::Wrapper::Database::sqlExecute(), and crawlservpp::Struct::StatusSetter::update().

Referenced by crawlservpp::Module::Extractor::Thread::onReset().

◆ updateTargetTable()

Member Data Documentation

◆ database

Module::Database& crawlservpp::Wrapper::Database::database
protectedinherited

Reference to the database connection for the thread.

Referenced by crawlservpp::Wrapper::Database::addColumn(), crawlservpp::Wrapper::Database::addOrUpdateTargetTable(), crawlservpp::Wrapper::Database::addPreparedStatement(), crawlservpp::Wrapper::Database::beginNoLock(), crawlservpp::Wrapper::Database::checkConnection(), crawlservpp::Module::Analyzer::Database::checkSources(), crawlservpp::Wrapper::Database::clearPreparedStatement(), crawlservpp::Wrapper::Database::compressTable(), crawlservpp::Wrapper::Database::createTable(), crawlservpp::Wrapper::Database::deleteTargetTable(), crawlservpp::Wrapper::Database::dropTable(), crawlservpp::Wrapper::Database::endNoLock(), crawlservpp::Wrapper::Database::getColumnType(), crawlservpp::Wrapper::Database::getConfiguration(), crawlservpp::Wrapper::Database::getCustomData(), crawlservpp::Wrapper::Database::getLastInsertedId(), crawlservpp::Wrapper::Database::getLoggingMin(), crawlservpp::Wrapper::Database::getLoggingVerbose(), crawlservpp::Wrapper::Database::getMaxAllowedPacketSize(), crawlservpp::Wrapper::Database::getOptions(), crawlservpp::Wrapper::Database::getPreparedStatement(), crawlservpp::Wrapper::Database::getQueryProperties(), crawlservpp::Wrapper::Database::getTargetTableId(), crawlservpp::Wrapper::Database::getTargetTableName(), crawlservpp::Wrapper::Database::getTargetTables(), crawlservpp::Wrapper::Database::getUrlListIdString(), crawlservpp::Wrapper::Database::getWebsiteDomain(), crawlservpp::Wrapper::Database::getWebsiteIdString(), crawlservpp::Wrapper::Database::insertCustomData(), crawlservpp::Wrapper::Database::isColumnExists(), crawlservpp::Wrapper::Database::isTableEmpty(), crawlservpp::Wrapper::Database::isTableExists(), crawlservpp::Wrapper::Database::log(), crawlservpp::Wrapper::Database::reserveForPreparedStatements(), crawlservpp::Wrapper::Database::setLogging(), crawlservpp::Wrapper::Database::setSleepOnError(), crawlservpp::Wrapper::Database::setTimeOut(), crawlservpp::Wrapper::Database::setUrlListCaseSensitive(), and crawlservpp::Wrapper::Database::updateCustomData().


The documentation for this class was generated from the following files: