|
crawlserv++
[under development]
Application for crawling and analyzing textual content of websites.
|
#include "Database.hpp"#include "Exception.hpp"#include "WebServer.hpp"#include "../Data/Compression/Gzip.hpp"#include "../Data/Compression/Zip.hpp"#include "../Data/Compression/Zlib.hpp"#include "../Data/Corpus.hpp"#include "../Data/File.hpp"#include "../Data/ImportExport/OpenDocument.hpp"#include "../Data/ImportExport/Text.hpp"#include "../Helper/CommaLocale.hpp"#include "../Helper/DateTime.hpp"#include "../Helper/FileSystem.hpp"#include "../Helper/Json.hpp"#include "../Helper/Memory.hpp"#include "../Helper/Strings.hpp"#include "../Module/Analyzer/Algo/All.hpp"#include "../Module/Crawler/Thread.hpp"#include "../Module/Extractor/Thread.hpp"#include "../Module/Parser/Thread.hpp"#include "../Module/Thread.hpp"#include "../Query/JsonPath.hpp"#include "../Query/JsonPointer.hpp"#include "../Query/RegEx.hpp"#include "../Query/XPath.hpp"#include "../Struct/AlgoThreadProperties.hpp"#include "../Struct/ConfigProperties.hpp"#include "../Struct/NetworkSettings.hpp"#include "../Struct/QueryProperties.hpp"#include "../Struct/ServerCommandResponse.hpp"#include "../Struct/ServerSettings.hpp"#include "../Struct/ThreadDatabaseEntry.hpp"#include "../Struct/ThreadOptions.hpp"#include "../Struct/UrlListProperties.hpp"#include "../Struct/WebsiteProperties.hpp"#include "../Timer/SimpleHR.hpp"#include "../Wrapper/Database.hpp"#include "../_extern/jsoncons/include/jsoncons/json.hpp"#include "../_extern/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp"#include "../_extern/rapidjson/include/rapidjson/document.h"#include "../_extern/rapidjson/include/rapidjson/prettywriter.h"#include <boost/lexical_cast.hpp>#include <algorithm>#include <chrono>#include <cstddef>#include <cstdint>#include <exception>#include <iostream>#include <memory>#include <mutex>#include <optional>#include <queue>#include <sstream>#include <string>#include <string_view>#include <thread>#include <set>#include <utility>#include <vector>

Go to the source code of this file.
Classes | |
| class | crawlservpp::Main::Server |
| The command-and-control server. More... | |
Namespaces | |
| crawlservpp::Main | |
| Namespace for the main classes of the program. | |
Macros | |
| #define | MAIN_SERVER_CMD(X, Y) |
| #define | MAIN_SERVER_WORKER_BEGIN try { |
| #define | MAIN_SERVER_WORKER_END(X) |
Constants | |
| constexpr auto | crawlservpp::Main::cacheDir {"cache"sv} |
| The name of the (sub-)directory for the file cache. More... | |
| constexpr auto | crawlservpp::Main::cookieDir {"cookies"sv} |
| The name of the (sub-)directory for cookies. More... | |
| constexpr auto | crawlservpp::Main::downloadDir {"dl"sv} |
| The name of the (sub-)directory for downloads. More... | |
| constexpr auto | crawlservpp::Main::debugDir {"debug"sv} |
| The name of the (sub-)directory for debugging. More... | |
| constexpr auto | crawlservpp::Main::dictDir {"dict"sv} |
| The name of the (sub-)directory for dictionaries. More... | |
| constexpr auto | crawlservpp::Main::mdlDir {"mdl"sv} |
| The name of the (sub-)directory for language models. More... | |
| constexpr auto | crawlservpp::Main::webServerPollTimeOutMs {1000} |
| The timeout in milliseconds for the polling of the web server. More... | |
| constexpr auto | crawlservpp::Main::statusHttpCode {200} |
| The HTTP status code for GET replies indicating the status of the server. More... | |
| constexpr auto | crawlservpp::Main::statusHttpContentType {"text/plain"} |
| The HTTP content type for GET replies indicating the status of the server. More... | |
| constexpr auto | crawlservpp::Main::replyHttpCode {200} |
| The HTTP status code for POST replies. More... | |
| constexpr auto | crawlservpp::Main::replyHttpContentType {"application/json"} |
| The HTTP content type for POST replies. More... | |
| constexpr auto | crawlservpp::Main::optionsHttpCode {200} |
| The HTTP status code for OPTIONS replies. More... | |
| constexpr auto | crawlservpp::Main::minNameSpaceLength {3} |
| The minimum length of namespaces. More... | |
| constexpr auto | crawlservpp::Main::minNameSpaceLengthString {"three"sv} |
| The minimum length of namespaces, as string. More... | |
| constexpr auto | crawlservpp::Main::httpString {"http://"sv} |
| The beginning of URLs using the HTTP protocol. More... | |
| constexpr auto | crawlservpp::Main::httpsString {"https://"sv} |
| The beginning of URLs using the HTTPS protocol. More... | |
| constexpr auto | crawlservpp::Main::xmlWarningsDefault {25} |
| The number of XML warnings by default. More... | |
| constexpr auto | crawlservpp::Main::dataTypeColumnNameSeparatorShort {"_"sv} |
| The (short) separator used between data type and column name. More... | |
| constexpr auto | crawlservpp::Main::dataTypeColumnNameSeparatorLong {"__"sv} |
| The (long) separator used between data type and column name. More... | |
| #define MAIN_SERVER_CMD | ( | X, | |
| Y | |||
| ) |
Referenced by crawlservpp::Main::Server::tick().
| #define MAIN_SERVER_WORKER_BEGIN try { |
Referenced by crawlservpp::Main::Server::tick().
| #define MAIN_SERVER_WORKER_END | ( | X | ) |
Referenced by crawlservpp::Main::Server::tick().