crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Thread.hpp File Reference
#include "Config.hpp"
#include "Database.hpp"
#include "../Thread.hpp"
#include "../../Helper/CommaLocale.hpp"
#include "../../Helper/Container.hpp"
#include "../../Helper/DateTime.hpp"
#include "../../Helper/DotLocale.hpp"
#include "../../Helper/Strings.hpp"
#include "../../Helper/Utf8.hpp"
#include "../../Main/Exception.hpp"
#include "../../Network/Curl.hpp"
#include "../../Network/TorControl.hpp"
#include "../../Parsing/URI.hpp"
#include "../../Query/Container.hpp"
#include "../../Struct/CrawlStatsTick.hpp"
#include "../../Struct/CrawlTimersTick.hpp"
#include "../../Struct/CrawlTimersContent.hpp"
#include "../../Struct/NetworkSettings.hpp"
#include "../../Struct/QueryProperties.hpp"
#include "../../Struct/QueryStruct.hpp"
#include "../../Struct/ThreadOptions.hpp"
#include "../../Struct/ThreadStatus.hpp"
#include "../../Timer/Simple.hpp"
#include "../../Wrapper/DatabaseLock.hpp"
#include "../../Wrapper/DatabaseTryLock.hpp"
#include <curl/curl.h>
#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <ios>
#include <memory>
#include <queue>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
Include dependency graph for Thread.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  crawlservpp::Module::Crawler::Thread
 Crawler thread. More...
 
class  crawlservpp::Module::Crawler::Thread::Exception
 Class for crawler exceptions. More...
 

Namespaces

 crawlservpp::Module::Crawler
 Namespace for crawler classes.
 

Constants

constexpr auto crawlservpp::Module::Crawler::robotsMinLineLength {9}
 The minimum length of a robots.txt line containing a useful sitemap. More...
 
constexpr auto crawlservpp::Module::Crawler::robotsFirstLetters {7}
 The first letters of a robots.txt line containing a sitemap. More...
 
constexpr auto crawlservpp::Module::Crawler::robotsSitemapBegin {"sitemap:"sv}
 The beginning of a robots.txt line containing a sitemap. More...
 
constexpr auto crawlservpp::Module::Crawler::robotsRelativeUrl {"/robots.txt"sv}
 The relative URL of robots.txt. More...
 
constexpr auto crawlservpp::Module::Crawler::updateCustomUrlCountEvery {100}
 The number of custom URLs after which the thread status will be updated. More...
 
constexpr auto crawlservpp::Module::Crawler::httpResponseCodeMin {400}
 Minimum HTTP error code. More...
 
constexpr auto crawlservpp::Module::Crawler::httpResponseCodeMax {599}
 Maximum HTTP error code. More...
 
constexpr auto crawlservpp::Module::Crawler::httpResponseCodeIgnore {200}
 HTTP response code to be ignored when checking for errors. More...
 
constexpr auto crawlservpp::Module::Crawler::wwwString {"www."sv}
 The "www." in the beginning of a domain. More...
 
constexpr auto crawlservpp::Module::Crawler::httpsString {"https://"sv}
 The beginning of a URL containing the HTTPS protocol. More...
 
constexpr auto crawlservpp::Module::Crawler::httpsIgnoreString {"https://www."sv}
 The beginning of a HTTPS URL to be ignored. More...
 
constexpr auto crawlservpp::Module::Crawler::httpString {"http://"sv}
 The beginning of a URL containing the HTTP protocol. More...
 
constexpr auto crawlservpp::Module::Crawler::httpIgnoreString {"http://www."sv}
 The beginning of a HTTP URL to be ignored. More...
 
constexpr auto crawlservpp::Module::Crawler::archiveMementoContentType {"application/link-format"sv}
 The content type of a memento. More...
 
constexpr auto crawlservpp::Module::Crawler::archiveRefString {"found capture at "sv}
 The reference string in a memento referencing another memento. More...
 
constexpr auto crawlservpp::Module::Crawler::archiveRefTimeStampLength {14}
 The length of a memento time stamp. More...
 
constexpr auto crawlservpp::Module::Crawler::archiveRenewUrlLockEveryMs {1000}
 Number of milliseconds before renewing URL lock while crawling archives. More...