crawlserv++
[under development]
Application for crawling and analyzing textual content of websites.
|
#include "Database.hpp"
#include "Exception.hpp"
#include "WebServer.hpp"
#include "../Data/Compression/Gzip.hpp"
#include "../Data/Compression/Zip.hpp"
#include "../Data/Compression/Zlib.hpp"
#include "../Data/Corpus.hpp"
#include "../Data/File.hpp"
#include "../Data/ImportExport/OpenDocument.hpp"
#include "../Data/ImportExport/Text.hpp"
#include "../Helper/CommaLocale.hpp"
#include "../Helper/DateTime.hpp"
#include "../Helper/FileSystem.hpp"
#include "../Helper/Json.hpp"
#include "../Helper/Memory.hpp"
#include "../Helper/Strings.hpp"
#include "../Module/Analyzer/Algo/All.hpp"
#include "../Module/Crawler/Thread.hpp"
#include "../Module/Extractor/Thread.hpp"
#include "../Module/Parser/Thread.hpp"
#include "../Module/Thread.hpp"
#include "../Query/JsonPath.hpp"
#include "../Query/JsonPointer.hpp"
#include "../Query/RegEx.hpp"
#include "../Query/XPath.hpp"
#include "../Struct/AlgoThreadProperties.hpp"
#include "../Struct/ConfigProperties.hpp"
#include "../Struct/NetworkSettings.hpp"
#include "../Struct/QueryProperties.hpp"
#include "../Struct/ServerCommandResponse.hpp"
#include "../Struct/ServerSettings.hpp"
#include "../Struct/ThreadDatabaseEntry.hpp"
#include "../Struct/ThreadOptions.hpp"
#include "../Struct/UrlListProperties.hpp"
#include "../Struct/WebsiteProperties.hpp"
#include "../Timer/SimpleHR.hpp"
#include "../Wrapper/Database.hpp"
#include "../_extern/jsoncons/include/jsoncons/json.hpp"
#include "../_extern/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp"
#include "../_extern/rapidjson/include/rapidjson/document.h"
#include "../_extern/rapidjson/include/rapidjson/prettywriter.h"
#include <boost/lexical_cast.hpp>
#include <algorithm>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <iostream>
#include <memory>
#include <mutex>
#include <optional>
#include <queue>
#include <sstream>
#include <string>
#include <string_view>
#include <thread>
#include <set>
#include <utility>
#include <vector>
Go to the source code of this file.
Classes | |
class | crawlservpp::Main::Server |
The command-and-control server. More... | |
Namespaces | |
crawlservpp::Main | |
Namespace for the main classes of the program. | |
Macros | |
#define | MAIN_SERVER_CMD(X, Y) |
#define | MAIN_SERVER_WORKER_BEGIN try { |
#define | MAIN_SERVER_WORKER_END(X) |
Constants | |
constexpr auto | crawlservpp::Main::cacheDir {"cache"sv} |
The name of the (sub-)directory for the file cache. More... | |
constexpr auto | crawlservpp::Main::cookieDir {"cookies"sv} |
The name of the (sub-)directory for cookies. More... | |
constexpr auto | crawlservpp::Main::downloadDir {"dl"sv} |
The name of the (sub-)directory for downloads. More... | |
constexpr auto | crawlservpp::Main::debugDir {"debug"sv} |
The name of the (sub-)directory for debugging. More... | |
constexpr auto | crawlservpp::Main::dictDir {"dict"sv} |
The name of the (sub-)directory for dictionaries. More... | |
constexpr auto | crawlservpp::Main::mdlDir {"mdl"sv} |
The name of the (sub-)directory for language models. More... | |
constexpr auto | crawlservpp::Main::webServerPollTimeOutMs {1000} |
The timeout in milliseconds for the polling of the web server. More... | |
constexpr auto | crawlservpp::Main::statusHttpCode {200} |
The HTTP status code for GET replies indicating the status of the server. More... | |
constexpr auto | crawlservpp::Main::statusHttpContentType {"text/plain"} |
The HTTP content type for GET replies indicating the status of the server. More... | |
constexpr auto | crawlservpp::Main::replyHttpCode {200} |
The HTTP status code for POST replies. More... | |
constexpr auto | crawlservpp::Main::replyHttpContentType {"application/json"} |
The HTTP content type for POST replies. More... | |
constexpr auto | crawlservpp::Main::optionsHttpCode {200} |
The HTTP status code for OPTIONS replies. More... | |
constexpr auto | crawlservpp::Main::minNameSpaceLength {3} |
The minimum length of namespaces. More... | |
constexpr auto | crawlservpp::Main::minNameSpaceLengthString {"three"sv} |
The minimum length of namespaces, as string. More... | |
constexpr auto | crawlservpp::Main::httpString {"http://"sv} |
The beginning of URLs using the HTTP protocol. More... | |
constexpr auto | crawlservpp::Main::httpsString {"https://"sv} |
The beginning of URLs using the HTTPS protocol. More... | |
constexpr auto | crawlservpp::Main::xmlWarningsDefault {25} |
The number of XML warnings by default. More... | |
constexpr auto | crawlservpp::Main::dataTypeColumnNameSeparatorShort {"_"sv} |
The (short) separator used between data type and column name. More... | |
constexpr auto | crawlservpp::Main::dataTypeColumnNameSeparatorLong {"__"sv} |
The (long) separator used between data type and column name. More... | |
#define MAIN_SERVER_CMD | ( | X, | |
Y | |||
) |
Referenced by crawlservpp::Main::Server::tick().
#define MAIN_SERVER_WORKER_BEGIN try { |
Referenced by crawlservpp::Main::Server::tick().
#define MAIN_SERVER_WORKER_END | ( | X | ) |
Referenced by crawlservpp::Main::Server::tick().