31 #ifndef PARSING_XML_HPP_ 32 #define PARSING_XML_HPP_ 38 #include "../Helper/Memory.hpp" 39 #include "../Main/Exception.hpp" 41 #include <pugixml.hpp> 49 #include <string_view> 63 using std::string_view_literals::operator
""sv;
72 inline constexpr std::array
xmlTags{
"<?i>"sv};
159 explicit XML(
const pugi::xml_node& node);
166 [[nodiscard]]
bool valid()
const;
167 void getContent(std::string& resultTo)
const;
173 void setOptions(
bool showWarnings, std::uint32_t numOfErrors) noexcept;
180 std::string_view content,
183 bool removeXmlInstructions,
184 std::queue<std::string>& warningsTo
218 XML(
XML&&) noexcept = default;
221 XML& operator=(const
XML&) = delete;
224 XML& operator=(
XML&&) noexcept = default;
230 std::unique_ptr<pugi::xml_document> doc;
233 bool warnings{
false};
234 std::uint32_t errors{};
237 static void cDataRepair(std::string& content);
238 static void replaceInvalidConditionalComments(std::string& content);
239 static void replaceInvalidComments(std::string& content);
240 static void removeXmlProcessingInstructions(std::string& content);
241 static void checkResult(pugi::xml_parse_result result,
const std::string& content);
257 inline XML::XML(
const pugi::xml_node& node) {
259 this->doc = std::make_unique<pugi::xml_document>();
261 this->doc->append_copy(node);
279 inline bool XML::valid()
const {
280 return this->doc.operator bool();
300 inline void XML::getContent(std::string& resultTo)
const {
305 std::ostringstream out;
309 this->doc->print(out);
311 resultTo += out.str();
330 inline void XML::setOptions(
bool showWarnings, std::uint32_t numOfErrors) noexcept {
331 this->warnings = showWarnings;
332 this->errors = numOfErrors;
365 inline void XML::parse(
366 std::string_view content,
369 bool removeXmlInstructions,
370 std::queue<std::string>& warningsTo
375 while(content.length() > begin && std::isspace(content.at(begin)) != 0) {
381 xml.reserve(content.size() - begin);
383 for(std::size_t i{begin}; i < content.length(); ++i) {
384 if(content[i] !=
'\0') {
385 xml.push_back(content[i]);
395 if(removeXmlInstructions) {
396 removeXmlProcessingInstructions(xml);
402 tidy.
tidyAndConvert(xml, this->warnings, this->errors, warningsTo);
407 + std::string(e.
view())
419 replaceInvalidConditionalComments(xml);
420 replaceInvalidComments(xml);
424 this->doc = std::make_unique<pugi::xml_document>();
427 XML::checkResult(this->doc->load_buffer(xml.c_str(), xml.size(), pugi::parse_full), xml);
439 inline void XML::clear() {
450 inline void XML::cDataRepair(std::string& content) {
453 if(pos == std::string::npos) {
459 while(pos < content.size()) {
460 const auto next{content.find(
cDataBegin, pos)};
462 if(next == std::string::npos) {
468 if(last != std::string::npos && last > pos) {
473 content.insert(pos +
cDataEnd.length() - 1, 1,
' ');
488 inline void XML::replaceInvalidConditionalComments(std::string& content) {
491 while(pos < content.length()) {
495 if(pos == std::string::npos) {
507 if(end == std::string::npos) {
522 while(subPos < end) {
541 while(pos < content.length()) {
544 if(pos == std::string::npos) {
559 inline void XML::replaceInvalidComments(std::string& content) {
562 while(pos < content.length()) {
566 if(pos == std::string::npos) {
573 if(end == std::string::npos) {
588 while(subPos < end) {
613 inline void XML::removeXmlProcessingInstructions(std::string& content) {
616 while(pos < content.length()) {
619 if(pos == std::string::npos) {
630 if(end == std::string::npos) {
637 for(
const auto& tag :
xmlTags) {
640 while(pos < content.length()) {
641 pos = content.find(tag, pos);
643 if(pos == std::string::npos) {
647 content.erase(pos, tag.length());
653 inline void XML::checkResult(pugi::xml_parse_result result,
const std::string& content) {
659 std::string errorString{
"XML parsing error: "};
661 errorString += result.description();
662 errorString +=
" at #";
663 errorString += std::to_string(result.offset);
666 if(result.offset > 0) {
667 errorString +=
"'[...]";
670 errorString += content.substr(
676 errorString += content.substr(0, result.offset);
679 errorString +=
"[!!!]";
682 errorString +=
"'[...]";
684 errorString +=
"[...]";
687 errorString +=
"'[...]";
688 errorString += content.substr(result.offset);
691 errorString +=
"').";
constexpr auto commentCharsReplaceBy
Characters used as replacement inside comments.
Definition: XML.hpp:102
constexpr auto commentCharsToReplace
Characters to be replaced inside comments.
Definition: XML.hpp:99
constexpr auto conditionalInsertOffsetBegin
Offset at which to insert at the beginning to make conditional comments valid.
Definition: XML.hpp:90
constexpr auto numDebugCharacters
The maximum number of characters to be shown in error messages.
Definition: XML.hpp:123
constexpr auto conditionalInsertOffsetEnd
Offset at which to insert at the end to make conditional comments valid.
Definition: XML.hpp:93
void tidyAndConvert(std::string &inOut, bool warnings, ulong numOfErrors, std::queue< std::string > &warningsTo)
Parse and tidy the given HTML markup and convert the result to XML.
Definition: HTML.hpp:171
constexpr auto xmlInstructionBegin
The beginning of a XML processing instruction.
Definition: XML.hpp:126
constexpr std::array xmlTags
Array containing additional XML markup tags to be removed.
Definition: XML.hpp:72
constexpr auto cDataBegin
The beginning of a CDATA element.
Definition: XML.hpp:75
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
constexpr auto invalidBegin
The beginning of an invalid comment.
Definition: XML.hpp:105
constexpr auto xmlBegin
The beginning of XML markup.
Definition: XML.hpp:69
constexpr auto invalidInsertOffsetEnd
Offset at which to insert at the end to make invalid comments valid.
Definition: XML.hpp:120
static void freeIf(bool isFree, T &target)
Frees memory early by swapping, if necessary.
Definition: Memory.hpp:52
constexpr auto invalidInsertEnd
Characters to be inserted at the end to make invalid comments valid.
Definition: XML.hpp:114
constexpr auto conditionalEnd
The end of a conditional comment.
Definition: XML.hpp:84
Implements a XPath query using the pugixml library.
Definition: XPath.hpp:74
Class for XML exceptions.
Definition: XML.hpp:207
std::string_view view() const noexcept
Gets the description of the exception as a view to the underlying string.
Definition: Exception.hpp:158
constexpr auto invalidInsertBegin
Characters to be inserted at the beginning to make invalid comments valid.
Definition: XML.hpp:111
constexpr auto conditionalBegin
The beginning of a conditional comment.
Definition: XML.hpp:81
Namespace for classes handling queries.
Definition: XML.hpp:51
Parses and cleans HTML markup.
Definition: HTML.hpp:71
constexpr auto conditionalInsertOffsetStrayEnd
Offset at which to insert into stray end tag left from conditional comment.
Definition: XML.hpp:96
Parses HTML markup into clean XML.
Definition: XML.hpp:149
constexpr auto invalidEnd
The end of an invalid comment.
Definition: XML.hpp:108
Namespace for classes parsing HTML, URIs, and XML.
Definition: HTML.hpp:42
constexpr auto invalidInsertOffsetBegin
Offset at which to insert at the beginning to make invalid comments valid.
Definition: XML.hpp:117
constexpr auto cDataEnd
The end of a CDATA element.
Definition: XML.hpp:78
constexpr auto xmlInstructionEnd
The end of a XML processing instruction.
Definition: XML.hpp:129
Class for HTML exceptions.
Definition: HTML.hpp:107
constexpr auto conditionalInsert
Characters to be inserted/replaced to make conditional comments valid.
Definition: XML.hpp:87