crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
crawlservpp::Module::Parser Namespace Reference

Namespace for parser classes. More...

Classes

class  Config
 Configuration for parsers. More...
 
class  Database
 Class providing database functionality for parser threads by implementing Wrapper::Database. More...
 
class  Thread
 Parser thread. More...
 

Constants

constexpr std::uint8_t crawlerLoggingVerbose {0}
 Logging is disabled. More...
 
constexpr std::uint8_t generalLoggingDefault {1}
 Default logging is enabled. More...
 
constexpr std::uint8_t generalLoggingExtended {2}
 Extended logging is enabled. More...
 
constexpr std::uint8_t generalLoggingVerbose {3}
 Verbose logging is enabled. More...
 
constexpr std::uint8_t parsingSourceUrl {0}
 Parse data from the URL of a crawled web page. More...
 
constexpr std::uint8_t parsingSourceContent {1}
 Parse data from the content of a crawled web page. More...
 
constexpr std::uint64_t defaultCacheSize {2500}
 Default cache size. More...
 
constexpr std::uint32_t defaultLockS {300}
 Default URL locking time, in seconds. More...
 
constexpr std::uint16_t defaultMaxBatchSize {500}
 Default maximum number of URLs to be processed in one MySQL query. More...
 
constexpr std::uint64_t defaultSleepIdleMs {5000}
 Default time to wait before checking for new URLs when all URLs have been parsed, in milliseconds. More...
 
constexpr std::uint64_t defaultSleepMySqlS {60}
 Default time to wait before last try to re-connect to MySQL server, in seconds. More...
 
constexpr auto maxContentSize {1073741824}
 Maximum size of database content (= 1 GiB). More...
 
constexpr auto maxContentSizeString {"1 GiB"sv}
 Maximum size of database content as string. More...
 
constexpr std::uint8_t updateContentCounterEvery {25}
 The number of processed contents after which the thread status will be updated. More...
 

Constants for MySQL Queries

constexpr auto oneAtOnce {1}
 Process one value at once. More...
 
constexpr auto nAtOnce10 {10}
 Process ten values at once. More...
 
constexpr auto nAtOnce100 {100}
 Process one hundred values at once. More...
 
constexpr auto sqlArg1 {1}
 First argument in a SQL query. More...
 
constexpr auto sqlArg2 {2}
 Second argument in a SQL query. More...
 
constexpr auto sqlArg3 {3}
 Third argument in a SQL query. More...
 
constexpr auto sqlArg4 {4}
 Fourth argument in a SQL query. More...
 
constexpr auto sqlArg5 {5}
 Fifth argument in a SQL query. More...
 
constexpr auto sqlArg6 {6}
 Sixth argument in a SQL query. More...
 
constexpr auto parsingTableAlias {"a"sv}
 Alias, used in SQL queries, for the parsing table. More...
 
constexpr auto targetTableAlias {"b"sv}
 Alias, used in SQL queries, for the target table. More...
 
constexpr auto minTargetColumns {4}
 Minimum number of columns in the target table. More...
 
constexpr auto numArgsLockUrl {3}
 Number of arguments for locking one URL. More...
 
constexpr auto minArsgAddUpdateData {5}
 Minimum number of arguments to add or update a data entry. More...
 
constexpr auto numArgsFinishUrl {2}
 Number of arguments for setting one URL to finished. More...
 
constexpr auto maxDateTimeValue {"9999-12-31 23:59:59"sv}
 The maximum value of a DATETIME in the database. More...
 

Detailed Description

Namespace for parser classes.

Variable Documentation

◆ crawlerLoggingVerbose

constexpr std::uint8_t crawlservpp::Module::Parser::crawlerLoggingVerbose {0}
inline

Logging is disabled.

◆ defaultCacheSize

constexpr std::uint64_t crawlservpp::Module::Parser::defaultCacheSize {2500}
inline

Default cache size.

◆ defaultLockS

constexpr std::uint32_t crawlservpp::Module::Parser::defaultLockS {300}
inline

Default URL locking time, in seconds.

◆ defaultMaxBatchSize

constexpr std::uint16_t crawlservpp::Module::Parser::defaultMaxBatchSize {500}
inline

Default maximum number of URLs to be processed in one MySQL query.

◆ defaultSleepIdleMs

constexpr std::uint64_t crawlservpp::Module::Parser::defaultSleepIdleMs {5000}
inline

Default time to wait before checking for new URLs when all URLs have been parsed, in milliseconds.

◆ defaultSleepMySqlS

constexpr std::uint64_t crawlservpp::Module::Parser::defaultSleepMySqlS {60}
inline

Default time to wait before last try to re-connect to MySQL server, in seconds.

◆ generalLoggingDefault

constexpr std::uint8_t crawlservpp::Module::Parser::generalLoggingDefault {1}
inline

◆ generalLoggingExtended

constexpr std::uint8_t crawlservpp::Module::Parser::generalLoggingExtended {2}
inline

◆ generalLoggingVerbose

constexpr std::uint8_t crawlservpp::Module::Parser::generalLoggingVerbose {3}
inline

Verbose logging is enabled.

Referenced by crawlservpp::Module::Parser::Thread::onReset().

◆ maxContentSize

constexpr auto crawlservpp::Module::Parser::maxContentSize {1073741824}
inline

Maximum size of database content (= 1 GiB).

Referenced by crawlservpp::Module::Parser::Database::updateTargetTable().

◆ maxContentSizeString

constexpr auto crawlservpp::Module::Parser::maxContentSizeString {"1 GiB"sv}
inline

Maximum size of database content as string.

Referenced by crawlservpp::Module::Parser::Database::updateTargetTable().

◆ maxDateTimeValue

constexpr auto crawlservpp::Module::Parser::maxDateTimeValue {"9999-12-31 23:59:59"sv}
inline

The maximum value of a DATETIME in the database.

Referenced by crawlservpp::Module::Parser::Database::getLatestContent().

◆ minArsgAddUpdateData

constexpr auto crawlservpp::Module::Parser::minArsgAddUpdateData {5}
inline

Minimum number of arguments to add or update a data entry.

Referenced by crawlservpp::Module::Parser::Database::updateOrAddEntries().

◆ minTargetColumns

constexpr auto crawlservpp::Module::Parser::minTargetColumns {4}
inline

Minimum number of columns in the target table.

Referenced by crawlservpp::Module::Parser::Database::initTargetTable().

◆ nAtOnce10

◆ nAtOnce100

◆ numArgsFinishUrl

constexpr auto crawlservpp::Module::Parser::numArgsFinishUrl {2}
inline

Number of arguments for setting one URL to finished.

Referenced by crawlservpp::Module::Parser::Database::setUrlsFinishedIfLockOk().

◆ numArgsLockUrl

constexpr auto crawlservpp::Module::Parser::numArgsLockUrl {3}
inline

Number of arguments for locking one URL.

Referenced by crawlservpp::Module::Parser::Database::fetchUrls().

◆ oneAtOnce

constexpr auto crawlservpp::Module::Parser::oneAtOnce {1}
inline

Process one value at once.

Referenced by crawlservpp::Module::Parser::Database::prepare().

◆ parsingSourceContent

constexpr std::uint8_t crawlservpp::Module::Parser::parsingSourceContent {1}
inline

Parse data from the content of a crawled web page.

Referenced by crawlservpp::Module::Parser::Thread::onReset().

◆ parsingSourceUrl

constexpr std::uint8_t crawlservpp::Module::Parser::parsingSourceUrl {0}
inline

Parse data from the URL of a crawled web page.

Referenced by crawlservpp::Module::Parser::Thread::onReset().

◆ parsingTableAlias

constexpr auto crawlservpp::Module::Parser::parsingTableAlias {"a"sv}
inline

Alias, used in SQL queries, for the parsing table.

Referenced by crawlservpp::Module::Parser::Database::updateTargetTable().

◆ sqlArg1

◆ sqlArg2

◆ sqlArg3

constexpr auto crawlservpp::Module::Parser::sqlArg3 {3}
inline

◆ sqlArg4

constexpr auto crawlservpp::Module::Parser::sqlArg4 {4}
inline

◆ sqlArg5

constexpr auto crawlservpp::Module::Parser::sqlArg5 {5}
inline

Fifth argument in a SQL query.

Referenced by crawlservpp::Module::Parser::Database::updateOrAddEntries().

◆ sqlArg6

constexpr auto crawlservpp::Module::Parser::sqlArg6 {6}
inline

Sixth argument in a SQL query.

Referenced by crawlservpp::Module::Parser::Database::updateOrAddEntries().

◆ targetTableAlias

constexpr auto crawlservpp::Module::Parser::targetTableAlias {"b"sv}
inline

Alias, used in SQL queries, for the target table.

Referenced by crawlservpp::Module::Parser::Database::updateTargetTable().

◆ updateContentCounterEvery

constexpr std::uint8_t crawlservpp::Module::Parser::updateContentCounterEvery {25}
inline

The number of processed contents after which the thread status will be updated.

Referenced by crawlservpp::Module::Parser::Thread::onReset().