crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Config.hpp File Reference
#include "../../Network/Config.hpp"
#include <algorithm>
#include <array>
#include <cstdint>
#include <string>
#include <string_view>
#include <vector>
Include dependency graph for Config.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  crawlservpp::Module::Extractor::Config
 Configuration for extractors. More...
 
struct  crawlservpp::Module::Extractor::Config::Entries
 Configuration entries for extractor threads. More...
 
class  crawlservpp::Module::Extractor::Config::Exception
 Class for extractor configuration exceptions. More...
 

Namespaces

 crawlservpp::Module::Extractor
 Namespace for extractor classes.
 

Constants

constexpr std::uint8_t crawlservpp::Module::Extractor::crawlerLoggingVerbose {0}
 Logging is disabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::generalLoggingDefault {1}
 Default logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::generalLoggingExtended {2}
 Extended logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::generalLoggingVerbose {3}
 Verbose logging is enabled. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::variablesSourcesParsed {0}
 Extract variable value from parsed data. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::variablesSourcesContent {1}
 Extract variable value from the content of a crawled web page. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::variablesSourcesUrl {2}
 Extract variable value from the URL of a crawled web page. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::expectedSourceExtracting {0}
 Extract data from other extracted data. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::expectedSourceParsed {1}
 Extract data from parsed data. More...
 
constexpr std::uint8_t crawlservpp::Module::Extractor::expectedSourceContent {2}
 Extract data from the content of a crawled web page. More...
 
constexpr std::array crawlservpp::Module::Extractor::defaultRetryHttpStatusCodes {429, 502, 503, 504}
 HTTP status codes to retry by default. More...
 
constexpr std::array crawlservpp::Module::Extractor::protocolsToRemove {"http://"sv, "https://"sv}
 Protocols to remove from URLs. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultCacheSize {2500}
 Default cache size. More...
 
constexpr std::uint32_t crawlservpp::Module::Extractor::defaultLockS {300}
 Default locking time, in seconds. More...
 
constexpr std::uint16_t crawlservpp::Module::Extractor::defaultMaxBatchSize {500}
 Default number of URLs and results to be processed in one MySQL query. More...
 
constexpr std::int64_t crawlservpp::Module::Extractor::defaultReTries {720}
 Default re-tries on connection error. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultSleepErrorMs {10000}
 Default sleeping time on connection errors, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultSleepHttpMs {0}
 Default time that will be waited between HTTP requests, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultSleepIdleMs {5000}
 Default time to wait before checking for new URLs when all URLs have been processed, in milliseconds. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultSleepMySqlS {60}
 Default time to wait before last try to re-connect to MySQL server, in seconds. More...
 
constexpr auto crawlservpp::Module::Extractor::defaultPagingVariable {"$p"sv}
 Default name of the paging variable. More...
 
constexpr std::uint64_t crawlservpp::Module::Extractor::defaultRecursiveMaxDepth {100}
 Default maximum depth of recursive extracting. More...