|
crawlserv++
[under development]
Application for crawling and analyzing textual content of websites.
|
#include "HTML.hpp"#include "../Helper/Memory.hpp"#include "../Main/Exception.hpp"#include <pugixml.hpp>#include <cstddef>#include <cstdint>#include <memory>#include <queue>#include <sstream>#include <string>#include <string_view>

Go to the source code of this file.
Classes | |
| class | crawlservpp::Parsing::XML |
| Parses HTML markup into clean XML. More... | |
| class | crawlservpp::Parsing::XML::Exception |
| Class for XML exceptions. More... | |
Namespaces | |
| crawlservpp::Query | |
| Namespace for classes handling queries. | |
| crawlservpp::Parsing | |
| Namespace for classes parsing HTML, URIs, and XML. | |
Macros | |
| #define | PARSING_XML_ |
Constants | |
| constexpr auto | crawlservpp::Parsing::xmlBegin {"<?xml "sv} |
| The beginning of XML markup. More... | |
| constexpr std::array | crawlservpp::Parsing::xmlTags {"<?i>"sv} |
| Array containing additional XML markup tags to be removed. More... | |
| constexpr auto | crawlservpp::Parsing::cDataBegin {"<![CDATA["sv} |
The beginning of a CDATA element. More... | |
| constexpr auto | crawlservpp::Parsing::cDataEnd {"]]>"sv} |
The end of a CDATA element. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalBegin {"<![if "sv} |
| The beginning of a conditional comment. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalEnd {"<![endif]>"sv} |
| The end of a conditional comment. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalInsert {"--"sv} |
| Characters to be inserted/replaced to make conditional comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalInsertOffsetBegin {2} |
| Offset at which to insert at the beginning to make conditional comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalInsertOffsetEnd {9} |
| Offset at which to insert at the end to make conditional comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::conditionalInsertOffsetStrayEnd {2} |
| Offset at which to insert into stray end tag left from conditional comment. More... | |
| constexpr auto | crawlservpp::Parsing::commentCharsToReplace {"--"sv} |
| Characters to be replaced inside comments. More... | |
| constexpr auto | crawlservpp::Parsing::commentCharsReplaceBy {"=="sv} |
| Characters used as replacement inside comments. More... | |
| constexpr auto | crawlservpp::Parsing::invalidBegin {"<? "sv} |
| The beginning of an invalid comment. More... | |
| constexpr auto | crawlservpp::Parsing::invalidEnd {" ?>"sv} |
| The end of an invalid comment. More... | |
| constexpr auto | crawlservpp::Parsing::invalidInsertBegin {"!--"sv} |
| Characters to be inserted at the beginning to make invalid comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::invalidInsertEnd {"--"sv} |
| Characters to be inserted at the end to make invalid comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::invalidInsertOffsetBegin {1} |
| Offset at which to insert at the beginning to make invalid comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::invalidInsertOffsetEnd {2} |
| Offset at which to insert at the end to make invalid comments valid. More... | |
| constexpr auto | crawlservpp::Parsing::numDebugCharacters {50} |
| The maximum number of characters to be shown in error messages. More... | |
| constexpr auto | crawlservpp::Parsing::xmlInstructionBegin {"<?xml:"sv} |
| The beginning of a XML processing instruction. More... | |
| constexpr auto | crawlservpp::Parsing::xmlInstructionEnd {">"sv} |
| The end of a XML processing instruction. More... | |
| #define PARSING_XML_ |