crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Config.hpp File Reference
#include "../../Main/Exception.hpp"
#include "../../Network/Config.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
Include dependency graph for Config.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  crawlservpp::Module::Crawler::Config
 Configuration for crawlers. More...
 
struct  crawlservpp::Module::Crawler::Config::Entries
 Configuration entries for crawler threads. More...
 
class  crawlservpp::Module::Crawler::Config::Exception
 Class for crawler configuration exceptions. More...
 

Namespaces

 crawlservpp::Module::Crawler
 Namespace for crawler classes.
 

Constants

constexpr std::uint8_t crawlservpp::Module::Crawler::crawlerLoggingSilent {0}
 Logging is disabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Crawler::crawlerLoggingDefault {1}
 Default logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Crawler::crawlerLoggingExtended {2}
 Extended logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Crawler::crawlerLoggingVerbose {3}
 Verbose logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Crawler::redirectSourceUrl {0}
 Performing a query on the URL of a crawled web page to determine whether to redirect. More...
 
constexpr std::uint8_t crawlservpp::Module::Crawler::redirectSourceContent {1}
 Performing a query on the content of a crawled web page to determine whether to redirect. More...
 
constexpr std::uint32_t crawlservpp::Module::Crawler::defaultCrawlerLockS {300}
 Default time to lock URLs that are being processed, in seconds. More...
 
constexpr std::uint16_t crawlservpp::Module::Crawler::defaultMaxBatchSize {500}
 Default number of URLs to be processed in one MySQL query. More...
 
constexpr std::int32_t crawlservpp::Module::Crawler::defaultRestartAfter {-1}
 Default time (in s) after which to re-crawl custom URLs once crawling has been completed (-1=deactivated). More...
 
constexpr std::int64_t crawlservpp::Module::Crawler::defaultReTries {720}
 Default number of re-tries on connection errors. More...
 
constexpr std::array crawlservpp::Module::Crawler::defaultRetryHttp {429, 502, 503, 504, 521, 522, 524}
 HTTP errors that will be handled like connection errors by default. More...
 
constexpr std::uint64_t crawlservpp::Module::Crawler::defaultSleepErrorMs {10000}
 Default sleeping time on connection errors, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Crawler::defaultSleepHttpMs {0}
 Default time that will be waited between HTTP requests, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Crawler::defaultSleepIdleMs {5000}
 Default time that will be waited before checking for new URLs when all URLs have been crawled, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Crawler::defaultSleepMySqlS {60}
 Default time to wait before the first try to re-connect to the MySQL server, in seconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Crawler::defaultUrlChunks {5000}
 Default number of crawled URLs to be processed at once without possible interruption. More...
 
constexpr std::uint16_t crawlservpp::Module::Crawler::defaultUrlMaxLength {2000}
 Default maximum length of URLs to add. More...