crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Container.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2021 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * Container.hpp
24  *
25  * Abstract class for management and type-independent usage of queries.
26  *
27  * Created on: Jan 8, 2019
28  * Author: ans
29  */
30 
31 #ifndef QUERY_CONTAINER_HPP_
32 #define QUERY_CONTAINER_HPP_
33 
34 #include "JsonPath.hpp"
35 #include "JsonPointer.hpp"
36 #include "RegEx.hpp"
37 #include "XPath.hpp"
38 
39 #include "../Helper/Container.hpp"
40 #include "../Helper/Json.hpp"
41 #include "../Helper/Memory.hpp"
42 #include "../Main/Exception.hpp"
43 #include "../Parsing/XML.hpp"
44 #include "../Struct/QueryProperties.hpp"
45 #include "../Struct/QueryStruct.hpp"
46 
47 #include "../_extern/jsoncons/include/jsoncons/json.hpp"
48 #include "../_extern/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp"
49 #include "../_extern/rapidjson/include/rapidjson/document.h"
50 
51 #include <algorithm> // std::find
52 #include <cstddef> // std::size_t
53 #include <cstdint> // std::uint8_t, std::uint32_t, std::uint64_t
54 #include <iterator> // std::make_move_iterator
55 #include <mutex> // std::lock_guard, std::mutex
56 #include <queue> // std::queue
57 #include <string> // std::string
58 #include <utility> // std::pair
59 #include <vector> // std::vector
60 
62 namespace crawlservpp::Query {
63 
64  /*
65  * DECLARATION
66  */
67 
69 
76  class Container {
77  // for convenience
79 
81 
86 
89 
90  using XPathJsonPointer = std::pair<XPath, JsonPointer>;
91  using XPathJsonPath = std::pair<XPath, JsonPath>;
92 
93  public:
96 
98  Container() = default;
99 
101  virtual ~Container() = default;
102 
104 
108 
112  Container(const Container&) = delete;
113 
115  Container& operator=(const Container&) = delete;
116 
118  Container(Container&&) = delete;
119 
121  Container& operator=(Container&&) = delete;
122 
124 
127 
128  bool isQueryUsed(std::uint64_t queryId) const;
129 
131 
132  protected:
134 
149 
152 
153  void setRepairCData(bool isRepairCData);
154  void setRepairComments(bool isRepairComments);
155  void setRemoveXmlInstructions(bool isRemoveXmlInstructions);
156  void setMinimizeMemory(bool isMinimizeMemory);
157  void setTidyErrorsAndWarnings(bool warnings, std::uint32_t numOfErrors);
158  void setQueryTarget(const std::string& content, const std::string& source);
159 
162 
163  [[nodiscard]] std::size_t getNumberOfSubSets() const;
164  bool getTarget(std::string& targetTo);
165  bool getXml(std::string& resultTo, std::queue<std::string>& warningsTo);
166 
170 
172 
178  virtual void initQueries() = 0;
179 
180  //*! Pure virtual function cleaning queries.
187  virtual void deleteQueries() = 0;
188 
192 
194  std::uint64_t id,
195  const QueryProperties& properties
196  );
197  void clearQueries();
198  void clearQueryTarget();
199 
203 
204  bool nextSubSet();
205 
209 
210  bool getBoolFromRegEx(
211  const QueryStruct& query,
212  const std::string& target,
213  bool& resultTo,
214  std::queue<std::string>& warningsTo
215  ) const;
216  bool getSingleFromRegEx(
217  const QueryStruct& query,
218  const std::string& target,
219  std::string& resultTo,
220  std::queue<std::string>& warningsTo
221  ) const;
222  bool getMultiFromRegEx(
223  const QueryStruct& query,
224  const std::string& target,
225  std::vector<std::string>& resultTo,
226  std::queue<std::string>& warningsTo
227  ) const;
228  bool getBoolFromQuery(
229  const QueryStruct& query,
230  bool& resultTo,
231  std::queue<std::string>& warningsTo
232  );
234  const QueryStruct& query,
235  bool& resultTo,
236  std::queue<std::string>& warningsTo
237  );
238  bool getSingleFromQuery(
239  const QueryStruct& query,
240  std::string& resultTo,
241  std::queue<std::string>& warningsTo
242  );
244  const QueryStruct& query,
245  std::string& resultTo,
246  std::queue<std::string>& warningsTo
247  );
248  bool getMultiFromQuery(
249  const QueryStruct& query,
250  std::vector<std::string>& resultTo,
251  std::queue<std::string>& warningsTo
252  );
254  const QueryStruct& query,
255  std::vector<std::string>& resultTo,
256  std::queue<std::string>& warningsTo
257  );
258  bool setSubSetsFromQuery(
259  const QueryStruct& query,
260  std::queue<std::string>& warningsTo
261  );
263  const QueryStruct& query,
264  std::queue<std::string>& warningsTo
265  );
266 
270 
271  void reserveForSubSets(const QueryStruct& query, std::size_t n);
272 
274 
275  private:
276  // queries
277  std::vector<RegEx> queriesRegEx;
278  std::vector<XPath> queriesXPath;
279  std::vector<JsonPointer> queriesJsonPointer;
280  std::vector<JsonPath> queriesJsonPath;
281  std::vector<XPathJsonPointer> queriesXPathJsonPointer;
282  std::vector<XPathJsonPath> queriesXPathJsonPath;
283 
284  // query IDs and their lock
285  std::vector<std::uint64_t> queriesId;
286  mutable std::mutex queriesIdLock;
287 
288  // options
289  bool repairCData{true}; // try to repair CData when parsing HTML/XML
290  bool repairComments{true}; // try to repair broken HTML/XML comments
291  bool removeXmlInstructions{false}; // remove XML processing instructions
292  bool minimizeMemory{false}; // minimize memory usage
293 
294  // content pointers and parsing
295  const std::string * queryTargetPtr{nullptr}; // ptr to content to perform queries on
296  const std::string * queryTargetSourcePtr{nullptr}; // ptr to source of content (for generating warnings)
297  bool xmlParsed{false}; // content has been parsed as HTML/XML
298  bool jsonParsedRapid{false}; // content has been parsed as JSON using RapidJSON
299  bool jsonParsedCons{false}; // content has been parsed as JSON using jsoncons
300  Parsing::XML parsedXML; // content parsed as HTML/XML
301  rapidjson::Document parsedJsonRapid; // content parsed as JSON (using RapidJSON)
302  jsoncons::json parsedJsonCons; // content parsed as JSON (using jsoncons)
303  std::string xmlParsingError; // error while parsing content as HTML/XML
304  std::string jsonParsingError; // error while parsing content as JSON
305 
306  // subset properties and parsing
307  std::uint8_t subSetType{QueryStruct::typeNone}; // type of subsets
308  std::size_t subSetNumber{} ; // number of subsets
309  std::size_t subSetCurrent{}; // current subset (index + 1)
310  bool subSetXmlParsed{false}; // current subset has been parsed as HTML/XML
311  bool subSetJsonParsedRapid{false}; // current subset has been parsed as JSON using RapidJSON
312  bool subSetJsonParsedCons{false}; // current subset has been parsed as JSON using jsoncons
313  Parsing::XML subSetParsedXML; // current subset parsed as HTML/XML
314  rapidjson::Document subSetParsedJsonRapid; // current subset parsed as JSON (using RapidJSON)
315  jsoncons::json subSetParsedJsonCons; // current subset parsed as JSON (using jsoncons)
316  std::string subSetXmlParsingError; // error while parsing current subset as HTML/XML
317  std::string subSetJsonParsingError; // error while parsing current subset as JSON
318 
319  // subset data
320  std::vector<Parsing::XML> xPathSubSets;
321  std::vector<rapidjson::Document> jsonPointerSubSets;
322  std::vector<jsoncons::json> jsonPathSubSets;
323 
324  std::vector<std::string> stringifiedSubSets;
325 
326  // internal helper functions
327  bool parseXml(std::queue<std::string>& warningsTo);
328  bool parseJsonRapid(std::queue<std::string>& warningsTo);
329  bool parseJsonCons(std::queue<std::string>& warningsTo);
330  bool parseSubSetXml(std::queue<std::string>& warningsTo);
331  bool parseSubSetJsonRapid(std::queue<std::string>& warningsTo);
332  bool parseSubSetJsonCons(std::queue<std::string>& warningsTo);
333  void resetSubSetParsingState();
334  void clearSubSets();
335 
336  void stringifySubSets(std::queue<std::string>& warningsTo);
337  void insertSubSets(std::vector<std::string>& subsets);
338  void insertSubSets(std::vector<Parsing::XML>& subsets);
339  void insertSubSets(std::vector<jsoncons::json>& subsets);
340  void insertSubSets(std::vector<rapidjson::Document>& subsets);
341  };
342 
343  /*
344  * IMPLEMENTATION
345  */
346 
347  /*
348  * PUBLIC GETTER
349  */
350 
352 
357  inline bool Container::isQueryUsed(std::uint64_t queryId) const {
358  std::lock_guard<std::mutex> idLock(this->queriesIdLock);
359 
360  return std::find(
361  this->queriesId.cbegin(),
362  this->queriesId.cend(),
363  queryId
364  ) != this->queriesId.cend();
365  }
366 
367  /*
368  * SETTERS (proctected)
369  */
370 
372 
376  inline void Container::setRepairCData(bool isRepairCData) {
377  this->repairCData = isRepairCData;
378  }
379 
381 
385  inline void Container::setRepairComments(bool isRepairComments) {
386  this->repairComments = isRepairComments;
387  }
388 
390 
394  inline void Container::setRemoveXmlInstructions(bool isRemoveXmlInstructions) {
395  this->removeXmlInstructions = isRemoveXmlInstructions;
396  }
397 
399 
407  inline void Container::setMinimizeMemory(bool isMinimizeMemory) {
408  this->minimizeMemory = isMinimizeMemory;
409  }
410 
412 
428  inline void Container::setTidyErrorsAndWarnings(bool warnings, std::uint32_t numOfErrors) {
429  this->parsedXML.setOptions(warnings, numOfErrors);
430  this->subSetParsedXML.setOptions(warnings, numOfErrors);
431  }
432 
434 
454  const std::string& content,
455  const std::string& source
456  ) {
457  // clear old target
458  this->clearQueryTarget();
459 
460  // set new target
461  this->queryTargetPtr = &content;
462  this->queryTargetSourcePtr = &source;
463  }
464 
465  /*
466  * GETTERS (protected)
467  */
468 
470 
475  inline std::size_t Container::getNumberOfSubSets() const {
476  return this->subSetNumber;
477  }
478 
480 
494  inline bool Container::getTarget(std::string& targetTo) {
495  if(this->queryTargetPtr != nullptr) {
496  targetTo = *(this->queryTargetPtr);
497 
498  return true;
499  }
500 
501  return true;
502  }
503 
505 
521  inline bool Container::getXml(std::string& resultTo, std::queue<std::string>& warningsTo) {
522  if(this->parseXml(warningsTo)) {
523  this->parsedXML.getContent(resultTo);
524 
525  return true;
526  }
527 
528  if(warningsTo.empty()) {
529  warningsTo.emplace(
530  "WARNING: [XML] "
531  + this->xmlParsingError
532  + " ["
533  + *(this->queryTargetSourcePtr)
534  + "]"
535  );
536  }
537 
538  return false;
539  }
540 
541  /*
542  * QUERIES (protected)
543  */
544 
546 
566  std::uint64_t id,
567  const QueryProperties& properties
568  ) {
569  QueryStruct newQuery;
570 
571  newQuery.resultBool = properties.resultBool;
572  newQuery.resultSingle = properties.resultSingle;
573  newQuery.resultMulti = properties.resultMulti;
574  newQuery.resultSubSets = properties.resultSubSets;
575 
576  if(!properties.text.empty()) {
577  if(properties.type == "regex") {
578  // add RegEx query
579  newQuery.index = this->queriesRegEx.size();
580 
581  try {
582  this->queriesRegEx.emplace_back(
583  properties.text,
584  properties.resultBool
585  || properties.resultSingle,
586  properties.resultMulti
587  );
588  }
589  catch(const RegExException& e) {
590  throw Exception(
591  "[RegEx] "
592  + std::string(e.view())
593  );
594  }
595 
596  newQuery.type = QueryStruct::typeRegEx;
597 
598  }
599  else if(properties.type == "xpath") {
600  // add XPath query
601  newQuery.index = this->queriesXPath.size();
602 
603  try {
604  this->queriesXPath.emplace_back(
605  properties.text,
606  properties.textOnly
607  );
608  }
609  catch(const XPathException& e) {
610  throw Exception(
611  "[XPath] "
612  + std::string(e.view())
613  );
614  }
615 
616  newQuery.type = QueryStruct::typeXPath;
617  }
618  else if(properties.type == "jsonpointer") {
619  // add JSONPointer query
620  newQuery.index = this->queriesJsonPointer.size();
621 
622  try {
623  this->queriesJsonPointer.emplace_back(
624  properties.text,
625  properties.textOnly
626  );
627  }
628  catch(const JsonPointerException &e) {
629  throw Exception(
630  "[JSONPointer] "
631  + std::string(e.view())
632  );
633  }
634 
636  }
637  else if(properties.type == "jsonpath") {
638  // add JSONPath query
639  newQuery.index = this->queriesJsonPath.size();
640 
641  try {
642  this->queriesJsonPath.emplace_back(
643  properties.text,
644  properties.textOnly
645  );
646  }
647  catch(const JsonPathException &e) {
648  throw Exception(
649  "[JSONPath] "
650  + std::string(e.view())
651  );
652  }
653 
654  newQuery.type = QueryStruct::typeJsonPath;
655  }
656  else if(properties.type == "xpathjsonpointer") {
657  // add combined XPath and JSONPointer query
658  newQuery.index = this->queriesXPathJsonPointer.size();
659 
660  // split XPath query (first line) from JSON query
661  const auto splitPos{properties.text.find('\n')};
662  const std::string xPathQuery(
663  properties.text,
664  0,
665  splitPos
666  );
667  std::string jsonQuery;
668 
669  if(
670  splitPos != std::string::npos
671  && properties.text.size() > splitPos + 1
672  ) {
673  jsonQuery = properties.text.substr(splitPos + 1);
674  }
675 
676  try {
677  this->queriesXPathJsonPointer.emplace_back(
678  XPath(xPathQuery, true),
679  JsonPointer(jsonQuery, properties.textOnly)
680  );
681  }
682  catch(const XPathException& e) {
683  throw Exception(
684  "[XPath] "
685  + std::string(e.view())
686  );
687  }
688  catch(const JsonPointerException &e) {
689  throw Exception(
690  "[JSONPointer] "
691  + std::string(e.view())
692  );
693  }
694 
696 
697  }
698  else if(properties.type == "xpathjsonpath") {
699  // add combined XPath and JSONPath query
700  newQuery.index = this->queriesXPathJsonPath.size();
701 
702  // split XPath query (first line) from JSON query
703  const auto splitPos{properties.text.find('\n')};
704  const std::string xPathQuery(
705  properties.text,
706  0,
707  splitPos
708  );
709  std::string jsonQuery;
710 
711  if(
712  splitPos != std::string::npos
713  && properties.text.size() > splitPos + 1
714  ) {
715  jsonQuery = properties.text.substr(splitPos + 1);
716  }
717 
718  try {
719  this->queriesXPathJsonPath.emplace_back(
720  XPath(xPathQuery, true),
721  JsonPath(jsonQuery, properties.textOnly)
722  );
723  }
724  catch(const XPathException& e) {
725  throw Exception(
726  "[XPath] "
727  + std::string(e.view())
728  );
729  }
730  catch(const JsonPathException &e) {
731  throw Exception(
732  "[JSONPath] "
733  + std::string(e.view())
734  );
735  }
736 
738  }
739  else {
740  throw Exception(
741  "Query::Container::addQuery(): Unknown query type '"
742  + properties.type
743  + "'"
744  );
745  }
746  }
747 
748  // thread-safely add ID
749  if(id > 0) {
750  std::lock_guard<std::mutex> lockIds(this->queriesIdLock);
751 
752  this->queriesId.emplace_back(id);
753  }
754 
755  return newQuery;
756  }
757 
759  inline void Container::clearQueries() {
760  Helper::Memory::free(this->queriesXPath);
761  Helper::Memory::free(this->queriesRegEx);
762  Helper::Memory::free(this->queriesJsonPointer);
763  Helper::Memory::free(this->queriesJsonPath);
764  Helper::Memory::free(this->queriesXPathJsonPointer);
765  Helper::Memory::free(this->queriesXPathJsonPath);
766  }
767 
770  // clear old subsets
771  this->clearSubSets();
772 
773  // clear parsed content
774  this->parsedXML.clear();
775 
776  Helper::Memory::free(this->parsedJsonCons);
777  Helper::Json::free(this->parsedJsonRapid);
778 
779  // reset parsing state
780  this->xmlParsed = false;
781  this->jsonParsedRapid = false;
782  this->jsonParsedCons = false;
783 
784  // clear parsing errors
785  Helper::Memory::free(this->xmlParsingError);
786  Helper::Memory::free(this->jsonParsingError);
787 
788  // unset pointers
789  this->queryTargetPtr = nullptr;
790  this->queryTargetSourcePtr = nullptr;
791  }
792 
793  /*
794  * SUBSETS (protected)
795  */
796 
798 
808  inline bool Container::nextSubSet() {
809  // check subsets
810  if(this->subSetNumber < this->subSetCurrent) {
811  throw Exception(
812  "Query::Container::nextSubSet():"
813  " Invalid subset selected"
814  );
815  }
816 
817  if(this->subSetNumber == this->subSetCurrent) {
818  return false;
819  }
820 
821  // clear previous subset
822  if(this->subSetCurrent > 0) {
824  !(this->stringifiedSubSets.empty()),
825  this->stringifiedSubSets.at(this->subSetCurrent - 1)
826  );
827 
828  switch(this->subSetType) {
830  this->xPathSubSets.at(
831  this->subSetCurrent - 1
832  ).clear();
833 
834  break;
835 
838  this->jsonPointerSubSets.at(
839  this->subSetCurrent - 1
840  )
841  );
842 
843  break;
844 
847  this->jsonPathSubSets.at(
848  this->subSetCurrent - 1
849  )
850  );
851 
852  break;
853 
854  default:
855  break;
856  }
857  }
858 
859  // increment index (+ 1) of current subset
860  ++(this->subSetCurrent);
861 
862  return true;
863  }
864 
865  /*
866  * RESULTS (protected)
867  */
868 
870 
890  const QueryStruct& query,
891  const std::string& target,
892  bool& resultTo,
893  std::queue<std::string>& warningsTo
894  ) const {
895  // check query type
896  if(query.type != QueryStruct::typeRegEx) {
897  if(query.type != QueryStruct::typeNone) {
898  warningsTo.emplace(
899  "WARNING: RegEx query is of invalid type - not RegEx."
900  );
901  }
902  }
903  // check result type
904  else if(query.type != QueryStruct::typeNone && !query.resultBool) {
905  warningsTo.emplace(
906  "WARNING: RegEx query has invalid result type - not boolean."
907  );
908  }
909  // check query target
910  else if(target.empty()) {
911  resultTo = false;
912 
913  return true;
914  }
915  else {
916  // get boolean result from a RegEx query
917  try {
918  resultTo = this->queriesRegEx.at(query.index).getBool(
919  target
920  );
921 
922  return true;
923  }
924  catch(const RegExException& e) {
925  warningsTo.emplace(
926  "WARNING: RegEx error - "
927  + std::string(e.view())
928  + " ["
929  + target
930  + "]."
931  );
932  }
933  }
934 
935  return false;
936  }
937 
939 
959  const QueryStruct& query,
960  const std::string& target,
961  std::string& resultTo,
962  std::queue<std::string>& warningsTo
963  ) const {
964  // check query type
965  if(query.type != QueryStruct::typeRegEx) {
966  if(query.type != QueryStruct::typeNone) {
967  warningsTo.emplace(
968  "WARNING: RegEx query is of invalid type - not RegEx."
969  );
970  }
971  }
972  // check result type
973  else if(query.type != QueryStruct::typeNone && !query.resultSingle) {
974  warningsTo.emplace(
975  "WARNING: RegEx query has invalid result type - not single."
976  );
977  }
978  // check query target
979  else if(target.empty()) {
980  resultTo.clear();
981 
982  return true;
983  }
984  else {
985  // get single result from a RegEx query
986  try {
987  this->queriesRegEx.at(query.index).getFirst(
988  target,
989  resultTo
990  );
991 
992  return true;
993  }
994  catch(const RegExException& e) {
995  warningsTo.emplace(
996  "WARNING: RegEx error - "
997  + std::string(e.view())
998  + " ["
999  + target
1000  + "]."
1001  );
1002  }
1003  }
1004 
1005  return false;
1006  }
1007 
1009 
1029  const QueryStruct& query,
1030  const std::string& target,
1031  std::vector<std::string>& resultTo,
1032  std::queue<std::string>& warningsTo
1033  ) const {
1034  // check query type
1035  if(query.type != QueryStruct::typeRegEx) {
1036  if(query.type != QueryStruct::typeNone) {
1037  warningsTo.emplace(
1038  "WARNING: RegEx query is of invalid type - not RegEx."
1039  );
1040  }
1041  }
1042  // check result type
1043  else if(query.type != QueryStruct::typeNone && !query.resultMulti) {
1044  warningsTo.emplace(
1045  "WARNING: RegEx query has invalid result type - not multi."
1046  );
1047  }
1048  // check query target
1049  else if(target.empty()) {
1050  resultTo.clear();
1051 
1052  return true;
1053  }
1054  else {
1055  // get multiple results from a RegEx query
1056  try {
1057  this->queriesRegEx.at(query.index).getAll(
1058  target,
1059  resultTo
1060  );
1061 
1062  return true;
1063  }
1064  catch(const RegExException& e) {
1065  warningsTo.emplace(
1066  "WARNING: RegEx error - "
1067  + std::string(e.view())
1068  + " ["
1069  + target
1070  + "]."
1071  );
1072  }
1073  }
1074 
1075  return false;
1076  }
1077 
1079 
1099  const QueryStruct& query,
1100  bool& resultTo,
1101  std::queue<std::string>& warningsTo
1102  ) {
1103  // check pointers
1104  if(this->queryTargetPtr == nullptr) {
1105  throw Exception(
1106  "Query::Container::getBoolFromQuery():"
1107  " No content specified"
1108  );
1109  }
1110 
1111  if(this->queryTargetSourcePtr == nullptr) {
1112  throw Exception(
1113  "Query::Container::getBoolFromQuery():"
1114  " No content source specified"
1115  );
1116  }
1117 
1118  // check result type
1119  if(query.type != QueryStruct::typeNone && !query.resultBool) {
1120  warningsTo.emplace(
1121  "WARNING: Query has invalid result type - not boolean."
1122  );
1123 
1124  return false;
1125  }
1126 
1127  // check query target
1128  if(this->queryTargetPtr->empty()) {
1129  resultTo = false;
1130 
1131  return true;
1132  }
1133 
1134  switch(query.type) {
1136  // get boolean result from a RegEx query
1137  try {
1138  resultTo = this->queriesRegEx.at(query.index).getBool(
1139  *(this->queryTargetPtr)
1140  );
1141 
1142  return true;
1143  }
1144  catch(const RegExException& e) {
1145  warningsTo.emplace(
1146  "WARNING: RegEx error - "
1147  + std::string(e.view())
1148  + " ["
1149  + *(this->queryTargetSourcePtr)
1150  + "]."
1151  );
1152  }
1153 
1154  break;
1155 
1157  // parse content as HTML/XML if still necessary
1158  if(this->parseXml(warningsTo)) {
1159  // get boolean result from a XPath query
1160  try {
1161  resultTo = this->queriesXPath.at(query.index).getBool(
1162  this->parsedXML
1163  );
1164 
1165  return true;
1166  }
1167  catch(const XPathException& e) {
1168  warningsTo.emplace(
1169  "WARNING: XPath error - "
1170  + std::string(e.view())
1171  + " ["
1172  + *(this->queryTargetSourcePtr)
1173  + "]."
1174  );
1175  }
1176  }
1177 
1178  break;
1179 
1181  // parse content as JSON using RapidJSON if still necessary
1182  if(this->parseJsonRapid(warningsTo)) {
1183  // get boolean result from a JSONPointer query
1184  try {
1185  resultTo = this->queriesJsonPointer.at(query.index).getBool(
1186  this->parsedJsonRapid
1187  );
1188 
1189  return true;
1190  }
1191  catch(const JsonPointerException& e) {
1192  warningsTo.emplace(
1193  "WARNING: JSONPointer error - "
1194  + std::string(e.view())
1195  + " ["
1196  + *(this->queryTargetSourcePtr)
1197  + "]."
1198  );
1199  }
1200  }
1201 
1202  break;
1203 
1205  // parse content as JSON using jsoncons if still necessary
1206  if(this->parseJsonCons(warningsTo)) {
1207  // get boolean result from a JSONPath query
1208  try {
1209  resultTo = this->queriesJsonPath.at(query.index).getBool(
1210  this->parsedJsonCons
1211  );
1212 
1213  return true;
1214  }
1215  catch(const JsonPathException& e) {
1216  warningsTo.emplace(
1217  "WARNING: JSONPath error - "
1218  + std::string(e.view())
1219  + " ["
1220  + *(this->queryTargetSourcePtr)
1221  + "]."
1222  );
1223  }
1224  }
1225 
1226  break;
1227 
1229  // parse content as HTML/XML if still necessary
1230  if(this->parseXml(warningsTo)) {
1231  // get first result from the XPath query
1232  try {
1233  std::string json;
1234 
1235  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
1236  this->parsedXML,
1237  json
1238  );
1239 
1240  if(json.empty()) {
1241  resultTo = false;
1242  }
1243  else {
1244  // temporarily parse JSON using rapidJSON
1245  const auto parsedJson(Helper::Json::parseRapid(json));
1246 
1247  // get boolean result from the JSONPointer query
1248  resultTo = this->queriesXPathJsonPointer.at(query.index).second.getBool(
1249  parsedJson
1250  );
1251  }
1252 
1253  return true;
1254  }
1255  catch(const XPathException& e) {
1256  warningsTo.emplace(
1257  "WARNING: XPath error - "
1258  + std::string(e.view())
1259  + " ["
1260  + *(this->queryTargetSourcePtr)
1261  + "]."
1262  );
1263  }
1264  catch(const JsonPointerException& e) {
1265  warningsTo.emplace(
1266  "WARNING: JSONPointer error - "
1267  + std::string(e.view())
1268  + " ["
1269  + *(this->queryTargetSourcePtr)
1270  + "]."
1271  );
1272  }
1273  catch(const JsonException& e) {
1274  warningsTo.emplace(
1275  "WARNING: JSONPath error - "
1276  + std::string(e.view())
1277  + " ["
1278  + *(this->queryTargetSourcePtr)
1279  + "]."
1280  );
1281  }
1282  }
1283 
1284  break;
1285 
1287  // parse content as HTML/XML if still necessary
1288  if(this->parseXml(warningsTo)) {
1289  // get first result from the XPath query
1290  try {
1291  std::string json;
1292 
1293  this->queriesXPathJsonPath.at(query.index).first.getFirst(
1294  this->parsedXML,
1295  json
1296  );
1297 
1298  if(json.empty()) {
1299  resultTo = false;
1300  }
1301  else {
1302  // temporarily parse JSON using jsoncons
1303  const auto parsedJson(Helper::Json::parseCons(json));
1304 
1305  // get boolean result from the JSONPath query
1306  resultTo = this->queriesXPathJsonPath.at(query.index).second.getBool(
1307  parsedJson
1308  );
1309  }
1310 
1311  return true;
1312  }
1313  catch(const XPathException& e) {
1314  warningsTo.emplace(
1315  "WARNING: XPath error - "
1316  + std::string(e.view())
1317  + " ["
1318  + *(this->queryTargetSourcePtr)
1319  + "]."
1320  );
1321  }
1322  catch(const JsonPathException& e) {
1323  warningsTo.emplace(
1324  "WARNING: JSONPath error - "
1325  + std::string(e.view())
1326  + " ["
1327  + *(this->queryTargetSourcePtr)
1328  + "]."
1329  );
1330  }
1331  catch(const JsonException& e) {
1332  warningsTo.emplace(
1333  "WARNING: JSONPath error - "
1334  + std::string(e.view())
1335  + " ["
1336  + *(this->queryTargetSourcePtr)
1337  + "]."
1338  );
1339  }
1340  }
1341 
1342  break;
1343 
1344  case QueryStruct::typeNone:
1345  break;
1346 
1347  default:
1348  throw Exception(
1349  "Query::Container::getBoolFromQuery():"
1350  " Unknown query type"
1351  );
1352  }
1353 
1354  return false;
1355  }
1356 
1358 
1379  const QueryStruct& query,
1380  bool& resultTo,
1381  std::queue<std::string>& warningsTo
1382  ) {
1383  // check pointer
1384  if(this->queryTargetSourcePtr == nullptr) {
1385  throw Exception(
1386  "Query::Container::getBoolFromQueryOnSubSet():"
1387  " No content source has been specified"
1388  );
1389  }
1390 
1391  // check current subset
1392  if(this->subSetCurrent == 0) {
1393  throw Exception(
1394  "Query::Container::getBoolFromQueryOnSubSet():"
1395  " No subset has been specified"
1396  );
1397  }
1398 
1399  if(this->subSetCurrent > this->subSetNumber) {
1400  throw Exception(
1401  "Query::Container::getBoolFromQueryOnSubSet():"
1402  " Invalid subset has been specified"
1403  );
1404  }
1405 
1406  // check result type
1407  if(query.type != QueryStruct::typeNone && !query.resultBool) {
1408  warningsTo.emplace(
1409  "WARNING: Query has invalid result type - not boolean."
1410  );
1411 
1412  return false;
1413  }
1414 
1415  switch(query.type) {
1417  // get boolean result from a RegEx query on the current subset
1418  try {
1419  if(this->subSetType != QueryStruct::typeRegEx) {
1420  this->stringifySubSets(warningsTo);
1421  }
1422 
1423  resultTo = this->queriesRegEx.at(query.index).getBool(
1424  this->stringifiedSubSets.at(this->subSetCurrent - 1)
1425  );
1426 
1427  return true;
1428  }
1429  catch(const RegExException& e) {
1430  warningsTo.emplace(
1431  "WARNING: RegEx error - "
1432  + std::string(e.view())
1433  + " ["
1434  + *(this->queryTargetSourcePtr)
1435  + "]."
1436  );
1437  }
1438 
1439  break;
1440 
1442  // parse current subset as HTML/XML if still necessary
1443  if(this->parseSubSetXml(warningsTo)) {
1444  // get boolean result from a XPath query on the current subset
1445  try {
1446  if(this->subSetType == QueryStruct::typeXPath) {
1447  resultTo = this->queriesXPath.at(query.index).getBool(
1448  this->xPathSubSets.at(this->subSetCurrent - 1)
1449  );
1450  }
1451  else {
1452  resultTo = this->queriesXPath.at(query.index).getBool(
1453  this->subSetParsedXML
1454  );
1455  }
1456 
1457  return true;
1458  }
1459  catch(const XPathException& e) {
1460  warningsTo.emplace(
1461  "WARNING: XPath error - "
1462  + std::string(e.view())
1463  + " ["
1464  + *(this->queryTargetSourcePtr)
1465  + "]."
1466  );
1467  }
1468  }
1469 
1470  break;
1471 
1473  // parse current subset as JSON using RapidJSON if still necessary
1474  if(this->parseSubSetJsonRapid(warningsTo)) {
1475  // get boolean result from a JSONPointer query on the current subset
1476  try {
1477  if(this->subSetType == QueryStruct::typeJsonPointer) {
1478  resultTo = this->queriesJsonPointer.at(query.index).getBool(
1479  this->jsonPointerSubSets.at(this->subSetCurrent - 1)
1480  );
1481  }
1482  else {
1483  resultTo = this->queriesJsonPointer.at(query.index).getBool(
1484  this->subSetParsedJsonRapid
1485  );
1486  }
1487 
1488  return true;
1489  }
1490  catch(const JsonPointerException& e) {
1491  warningsTo.emplace(
1492  "WARNING: JSONPointer error - "
1493  + std::string(e.view())
1494  + " ["
1495  + *(this->queryTargetSourcePtr)
1496  + "]."
1497  );
1498  }
1499  }
1500 
1501  break;
1502 
1504  // parse current subset as JSON using jsoncons if still necessary
1505  if(this->parseSubSetJsonRapid(warningsTo)) {
1506  // get boolean result from a JSONPath query on the current subset
1507  try {
1508  if(this->subSetType == QueryStruct::typeJsonPath) {
1509  resultTo = this->queriesJsonPath.at(query.index).getBool(
1510  this->jsonPathSubSets.at(this->subSetCurrent - 1)
1511  );
1512  }
1513  else {
1514  resultTo = this->queriesJsonPath.at(query.index).getBool(
1515  this->subSetParsedJsonCons
1516  );
1517  }
1518 
1519  return true;
1520  }
1521  catch(const JsonPathException& e) {
1522  warningsTo.emplace(
1523  "WARNING: JSONPath error - "
1524  + std::string(e.view())
1525  + " ["
1526  + *(this->queryTargetSourcePtr)
1527  + "]."
1528  );
1529  }
1530  }
1531 
1532  break;
1533 
1535  // parse current subset as HTML/XML if still necessary
1536  if(this->parseSubSetXml(warningsTo)) {
1537  // get first result from the XPath query on the current subset
1538  try {
1539  std::string json;
1540 
1541  if(this->subSetType == QueryStruct::typeXPath) {
1542  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
1543  this->xPathSubSets.at(this->subSetCurrent - 1),
1544  json
1545  );
1546  }
1547  else {
1548  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
1549  this->subSetParsedXML,
1550  json
1551  );
1552  }
1553 
1554  if(json.empty()) {
1555  resultTo = false;
1556  }
1557  else {
1558  // temporarily parse JSON using rapidJSON
1559  const auto parsedJson(Helper::Json::parseRapid(json));
1560 
1561  // get boolean result from the JSONPointer query
1562  resultTo = this->queriesXPathJsonPointer.at(query.index).second.getBool(
1563  parsedJson
1564  );
1565  }
1566 
1567  return true;
1568  }
1569  catch(const XPathException& e) {
1570  warningsTo.emplace(
1571  "WARNING: XPath error - "
1572  + std::string(e.view())
1573  + " ["
1574  + *(this->queryTargetSourcePtr)
1575  + "]."
1576  );
1577  }
1578  catch(const JsonPointerException& e) {
1579  warningsTo.emplace(
1580  "WARNING: JSONPointer error - "
1581  + std::string(e.view())
1582  + " ["
1583  + *(this->queryTargetSourcePtr)
1584  + "]."
1585  );
1586  }
1587  catch(const JsonException& e) {
1588  warningsTo.emplace(
1589  "WARNING: JSONPath error - "
1590  + std::string(e.view())
1591  + " ["
1592  + *(this->queryTargetSourcePtr)
1593  + "]."
1594  );
1595  }
1596  }
1597 
1598  break;
1599 
1601  // parse current subset as HTML/XML if still necessary
1602  if(this->parseSubSetXml(warningsTo)) {
1603  // get first result from the XPath query on the current subset
1604  try {
1605  std::string json;
1606 
1607  if(this->subSetType == QueryStruct::typeXPath) {
1608  this->queriesXPathJsonPath.at(query.index).first.getFirst(
1609  this->xPathSubSets.at(this->subSetCurrent - 1),
1610  json
1611  );
1612  }
1613  else {
1614  this->queriesXPathJsonPath.at(query.index).first.getFirst(
1615  this->subSetParsedXML,
1616  json
1617  );
1618  }
1619 
1620  if(json.empty()) {
1621  resultTo = false;
1622  }
1623  else {
1624  // temporarily parse JSON using jsoncons
1625  const auto parsedJson(Helper::Json::parseCons(json));
1626 
1627  // get boolean result from the JSONPath query
1628  resultTo = this->queriesXPathJsonPath.at(query.index).second.getBool(
1629  parsedJson
1630  );
1631  }
1632 
1633  return true;
1634  }
1635  catch(const XPathException& e) {
1636  warningsTo.emplace(
1637  "WARNING: XPath error - "
1638  + std::string(e.view())
1639  + " ["
1640  + *(this->queryTargetSourcePtr)
1641  + "]."
1642  );
1643  }
1644  catch(const JsonPathException& e) {
1645  warningsTo.emplace(
1646  "WARNING: JSONPath error - "
1647  + std::string(e.view())
1648  + " ["
1649  + *(this->queryTargetSourcePtr)
1650  + "]."
1651  );
1652  }
1653  catch(const JsonException& e) {
1654  warningsTo.emplace(
1655  "WARNING: JSONPath error - "
1656  + std::string(e.view())
1657  + " ["
1658  + *(this->queryTargetSourcePtr)
1659  + "]."
1660  );
1661  }
1662  }
1663 
1664  break;
1665 
1666  case QueryStruct::typeNone:
1667  break;
1668 
1669  default:
1670  throw Exception(
1671  "Query::Container::getBoolFromQueryOnSubSet():"
1672  " Unknown query type"
1673  );
1674  }
1675 
1676  return false;
1677  }
1678 
1680 
1700  const QueryStruct& query,
1701  std::string& resultTo,
1702  std::queue<std::string>& warningsTo
1703  ) {
1704  // check pointers
1705  if(this->queryTargetPtr == nullptr) {
1706  throw Exception(
1707  "Query::Container::getSingleFromQuery():"
1708  " No content has been specified"
1709  );
1710  }
1711 
1712  if(this->queryTargetSourcePtr == nullptr) {
1713  throw Exception(
1714  "Query::Container::getSingleFromQuery():"
1715  " No content source has been specified"
1716  );
1717  }
1718 
1719  // check result type
1720  if(query.type != QueryStruct::typeNone && !query.resultSingle) {
1721  warningsTo.emplace(
1722  "WARNING: Query has invalid result type - not single."
1723  );
1724 
1725  return false;
1726  }
1727 
1728  // check query target
1729  if(this->queryTargetPtr->empty()) {
1730  resultTo = "";
1731 
1732  return true;
1733  }
1734 
1735  switch(query.type) {
1737  // get single result from a RegEx query
1738  try {
1739  this->queriesRegEx.at(query.index).getFirst(
1740  *(this->queryTargetPtr),
1741  resultTo
1742  );
1743 
1744  return true;
1745  }
1746  catch(const RegExException& e) {
1747  warningsTo.emplace(
1748  "WARNING: RegEx error - "
1749  + std::string(e.view())
1750  + " ["
1751  + *(this->queryTargetSourcePtr)
1752  + "]."
1753  );
1754  }
1755 
1756  break;
1757 
1759  // parse content as HTML/XML if still necessary
1760  if(this->parseXml(warningsTo)) {
1761  // get single result from a XPath query
1762  try {
1763  this->queriesXPath.at(query.index).getFirst(
1764  this->parsedXML,
1765  resultTo
1766  );
1767 
1768  return true;
1769  }
1770  catch(const XPathException& e) {
1771  warningsTo.emplace(
1772  "WARNING: XPath error - "
1773  + std::string(e.view())
1774  + " ["
1775  + *(this->queryTargetSourcePtr)
1776  + "]."
1777  );
1778  }
1779  }
1780 
1781  break;
1782 
1784  // parse content as JSON using RapidJSON if still necessary
1785  if(this->parseJsonRapid(warningsTo)) {
1786  // get single result from a JSONPointer query
1787  try {
1788  this->queriesJsonPointer.at(query.index).getFirst(
1789  this->parsedJsonRapid,
1790  resultTo
1791  );
1792 
1793  return true;
1794  }
1795  catch(const JsonPointerException& e) {
1796  warningsTo.emplace(
1797  "WARNING: JSONPointer error - "
1798  + std::string(e.view())
1799  + " ["
1800  + *(this->queryTargetSourcePtr)
1801  + "]."
1802  );
1803  }
1804  }
1805 
1806  break;
1807 
1809  // parse content as JSON using jsoncons if still necessary
1810  if(this->parseJsonCons(warningsTo)) {
1811  // get single result from a JSONPath query
1812  try {
1813  this->queriesJsonPath.at(query.index).getFirst(
1814  this->parsedJsonCons,
1815  resultTo
1816  );
1817 
1818  return true;
1819  }
1820  catch(const JsonPathException& e) {
1821  warningsTo.emplace(
1822  "WARNING: JSONPath error - "
1823  + std::string(e.view())
1824  + " ["
1825  + *(this->queryTargetSourcePtr)
1826  + "]."
1827  );
1828  }
1829  }
1830 
1831  break;
1832 
1834  // parse content as HTML/XML if still necessary
1835  if(this->parseXml(warningsTo)) {
1836  // get first result from the XPath query
1837  try {
1838  std::string json;
1839 
1840  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
1841  this->parsedXML,
1842  json
1843  );
1844 
1845  if(json.empty()) {
1846  resultTo = "";
1847  }
1848  else {
1849  // temporarily parse JSON using rapidJSON
1850  const auto parsedJson(Helper::Json::parseRapid(json));
1851 
1852  // get single result from the JSONPointer query
1853  this->queriesXPathJsonPointer.at(query.index).second.getFirst(
1854  parsedJson,
1855  resultTo
1856  );
1857  }
1858  }
1859  catch(const XPathException& e) {
1860  warningsTo.emplace(
1861  "WARNING: XPath error - "
1862  + std::string(e.view())
1863  + " ["
1864  + *(this->queryTargetSourcePtr)
1865  + "]."
1866  );
1867  }
1868  catch(const JsonPointerException& e) {
1869  warningsTo.emplace(
1870  "WARNING: JSONPointer error - "
1871  + std::string(e.view())
1872  + " ["
1873  + *(this->queryTargetSourcePtr)
1874  + "]."
1875  );
1876  }
1877  catch(const JsonException& e) {
1878  warningsTo.emplace(
1879  "WARNING: JSONPath error - "
1880  + std::string(e.view())
1881  + " ["
1882  + *(this->queryTargetSourcePtr)
1883  + "]."
1884  );
1885  }
1886  }
1887 
1888  break;
1889 
1891  // parse content as HTML/XML if still necessary
1892  if(this->parseXml(warningsTo)) {
1893  // get first result from the XPath query
1894  try {
1895  std::string json;
1896 
1897  this->queriesXPathJsonPath.at(query.index).first.getFirst(
1898  this->parsedXML,
1899  json
1900  );
1901 
1902  if(json.empty()) {
1903  resultTo = "";
1904 
1905  return true;
1906  }
1907 
1908  try {
1909  // temporarily parse JSON using jsoncons
1910  const auto parsedJson(Helper::Json::parseCons(json));
1911 
1912  // get single result from the JSONPath query
1913  this->queriesXPathJsonPath.at(query.index).second.getFirst(
1914  parsedJson,
1915  resultTo
1916  );
1917 
1918  return true;
1919  }
1920  catch(const JsonException& e) {
1921  warningsTo.emplace(
1922  "WARNING: JSONPath error - "
1923  + std::string(e.view())
1924  + " ["
1925  + *(this->queryTargetSourcePtr)
1926  + "]."
1927  );
1928  }
1929  }
1930  catch(const XPathException& e) {
1931  warningsTo.emplace(
1932  "WARNING: XPath error - "
1933  + std::string(e.view())
1934  + " ["
1935  + *(this->queryTargetSourcePtr)
1936  + "]."
1937  );
1938  }
1939  catch(const JsonPathException& e) {
1940  warningsTo.emplace(
1941  "WARNING: JSONPath error - "
1942  + std::string(e.view())
1943  + " ["
1944  + *(this->queryTargetSourcePtr)
1945  + "]."
1946  );
1947  }
1948  catch(const JsonException& e) {
1949  warningsTo.emplace(
1950  "WARNING: JSONPath error - "
1951  + std::string(e.view())
1952  + " ["
1953  + *(this->queryTargetSourcePtr)
1954  + "]."
1955  );
1956  }
1957  }
1958 
1959  break;
1960 
1961  case QueryStruct::typeNone:
1962  break;
1963 
1964  default:
1965  throw Exception("Query::Container::getSingleFromQuery(): Unknown query type");
1966  }
1967 
1968  return false;
1969  }
1970 
1972 
1993  const QueryStruct& query,
1994  std::string& resultTo,
1995  std::queue<std::string>& warningsTo
1996  ) {
1997  // check pointer
1998  if(this->queryTargetSourcePtr == nullptr) {
1999  throw Exception(
2000  "Query::Container::getSingleFromQueryOnSubSet():"
2001  " No content source has been specified"
2002  );
2003  }
2004 
2005  // check current subset
2006  if(this->subSetCurrent == 0) {
2007  throw Exception(
2008  "Query::Container::getSingleFromQueryOnSubSet():"
2009  " No subset has been specified"
2010  );
2011  }
2012 
2013  if(this->subSetCurrent > this->subSetNumber) {
2014  throw Exception(
2015  "Query::Container::getSingleFromQueryOnSubSet():"
2016  " Invalid subset has been specified"
2017  );
2018  }
2019 
2020  // check result type
2021  if(
2022  query.type != QueryStruct::typeNone
2023  && !query.resultSingle
2024  ) {
2025  warningsTo.emplace(
2026  "WARNING: Query has invalid result type - not single."
2027  );
2028 
2029  return false;
2030  }
2031 
2032  switch(query.type) {
2034  // get single result from a RegEx query on the current subset
2035  try {
2036  if(this->subSetType != QueryStruct::typeRegEx) {
2037  this->stringifySubSets(warningsTo);
2038  }
2039 
2040  this->queriesRegEx.at(query.index).getFirst(
2041  this->stringifiedSubSets.at(this->subSetCurrent - 1),
2042  resultTo
2043  );
2044 
2045  return true;
2046  }
2047  catch(const RegExException& e) {
2048  warningsTo.emplace(
2049  "WARNING: RegEx error - "
2050  + std::string(e.view())
2051  + " ["
2052  + *(this->queryTargetSourcePtr)
2053  + "]."
2054  );
2055  }
2056 
2057  break;
2058 
2060  // parse current subset as HTML/XML if still necessary
2061  if(this->parseSubSetXml(warningsTo)) {
2062  // get single result from a XPath query on the current subset
2063  try {
2064  if(this->subSetType == QueryStruct::typeXPath) {
2065  this->queriesXPath.at(query.index).getFirst(
2066  this->xPathSubSets.at(this->subSetCurrent - 1),
2067  resultTo
2068  );
2069  }
2070  else {
2071  this->queriesXPath.at(query.index).getFirst(
2072  this->subSetParsedXML,
2073  resultTo
2074  );
2075  }
2076 
2077  return true;
2078  }
2079  catch(const XPathException& e) {
2080  warningsTo.emplace(
2081  "WARNING: XPath error - "
2082  + std::string(e.view())
2083  + " ["
2084  + *(this->queryTargetSourcePtr)
2085  + "]."
2086  );
2087  }
2088  }
2089 
2090  break;
2091 
2093  // parse current subset as JSON using RapidJSON if still necessary
2094  if(this->parseSubSetJsonRapid(warningsTo)) {
2095  // get single result from a JSONPointer query on the current subset
2096  try {
2097  if(this->subSetType == QueryStruct::typeJsonPointer) {
2098  this->queriesJsonPointer.at(query.index).getFirst(
2099  this->jsonPointerSubSets.at(this->subSetCurrent - 1),
2100  resultTo
2101  );
2102  }
2103  else {
2104  this->queriesJsonPointer.at(query.index).getFirst(
2105  this->subSetParsedJsonRapid,
2106  resultTo
2107  );
2108  }
2109 
2110  return true;
2111  }
2112  catch(const JsonPointerException& e) {
2113  warningsTo.emplace(
2114  "WARNING: JSONPointer error - "
2115  + std::string(e.view())
2116  + " ["
2117  + *(this->queryTargetSourcePtr)
2118  + "]."
2119  );
2120  }
2121  }
2122 
2123  break;
2124 
2126  // parse current subset as JSON using jsoncons if still necessary
2127  if(this->parseSubSetJsonRapid(warningsTo)) {
2128  // get single result from a JSONPath query on the current subset
2129  try {
2130  if(this->subSetType == QueryStruct::typeJsonPath) {
2131  this->queriesJsonPath.at(query.index).getFirst(
2132  this->jsonPathSubSets.at(this->subSetCurrent - 1),
2133  resultTo
2134  );
2135  }
2136  else {
2137  this->queriesJsonPath.at(query.index).getFirst(
2138  this->subSetParsedJsonCons,
2139  resultTo
2140  );
2141  }
2142 
2143  return true;
2144  }
2145  catch(const JsonPathException& e) {
2146  warningsTo.emplace(
2147  "WARNING: JSONPath error - "
2148  + std::string(e.view())
2149  + " ["
2150  + *(this->queryTargetSourcePtr)
2151  + "]."
2152  );
2153  }
2154  }
2155 
2156  break;
2157 
2159  // parse current subset as HTML/XML if still necessary
2160  if(this->parseSubSetXml(warningsTo)) {
2161  // get first result from the XPath query on the current subset
2162  try {
2163  std::string json;
2164 
2165  if(this->subSetType == QueryStruct::typeXPath) {
2166  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
2167  this->xPathSubSets.at(this->subSetCurrent - 1),
2168  json
2169  );
2170  }
2171  else {
2172  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
2173  this->subSetParsedXML,
2174  json
2175  );
2176  }
2177 
2178  if(json.empty()) {
2179  resultTo = "";
2180  }
2181  else {
2182  // temporarily parse JSON using rapidJSON
2183  const auto parsedJson(Helper::Json::parseRapid(json));
2184 
2185  // get single result from the JSONPointer query
2186  this->queriesXPathJsonPointer.at(query.index).second.getFirst(
2187  parsedJson,
2188  resultTo
2189  );
2190  }
2191 
2192  return true;
2193  }
2194  catch(const XPathException& e) {
2195  warningsTo.emplace(
2196  "WARNING: XPath error - "
2197  + std::string(e.view())
2198  + " ["
2199  + *(this->queryTargetSourcePtr)
2200  + "]."
2201  );
2202  }
2203  catch(const JsonPointerException& e) {
2204  warningsTo.emplace(
2205  "WARNING: JSONPointer error - "
2206  + std::string(e.view())
2207  + " ["
2208  + *(this->queryTargetSourcePtr)
2209  + "]."
2210  );
2211  }
2212  catch(const JsonException& e) {
2213  warningsTo.emplace(
2214  "WARNING: JSONPath error - "
2215  + std::string(e.view())
2216  + " ["
2217  + *(this->queryTargetSourcePtr)
2218  + "]."
2219  );
2220  }
2221  }
2222 
2223  break;
2224 
2226  // parse current subset as HTML/XML if still necessary
2227  if(this->parseSubSetXml(warningsTo)) {
2228  // get first result from the XPath query on the current subset
2229  try {
2230  std::string json;
2231 
2232  if(this->subSetType == QueryStruct::typeXPath) {
2233  this->queriesXPathJsonPath.at(query.index).first.getFirst(
2234  this->xPathSubSets.at(this->subSetCurrent - 1),
2235  json
2236  );
2237  }
2238  else {
2239  this->queriesXPathJsonPath.at(query.index).first.getFirst(
2240  this->subSetParsedXML,
2241  json
2242  );
2243  }
2244 
2245  if(json.empty()) {
2246  resultTo = "";
2247  }
2248  else {
2249  // temporarily parse JSON using jsoncons
2250  const auto parsedJson(Helper::Json::parseCons(json));
2251 
2252  // get single result from the JSONPath query
2253  this->queriesXPathJsonPath.at(query.index).second.getFirst(
2254  parsedJson,
2255  resultTo
2256  );
2257  }
2258 
2259  return true;
2260  }
2261  catch(const XPathException& e) {
2262  warningsTo.emplace(
2263  "WARNING: XPath error - "
2264  + std::string(e.view())
2265  + " ["
2266  + *(this->queryTargetSourcePtr)
2267  + "]."
2268  );
2269  }
2270  catch(const JsonPathException& e) {
2271  warningsTo.emplace(
2272  "WARNING: JSONPath error - "
2273  + std::string(e.view())
2274  + " ["
2275  + *(this->queryTargetSourcePtr)
2276  + "]."
2277  );
2278  }
2279  catch(const JsonException& e) {
2280  warningsTo.emplace(
2281  "WARNING: JSONPath error - "
2282  + std::string(e.view())
2283  + " ["
2284  + *(this->queryTargetSourcePtr)
2285  + "]."
2286  );
2287  }
2288  }
2289 
2290  break;
2291 
2292  case QueryStruct::typeNone:
2293  break;
2294 
2295  default:
2296  throw Exception(
2297  "Query::Container::getSingleFromQueryOnSubSet():"
2298  " Unknown query type"
2299  );
2300  }
2301 
2302  return false;
2303  }
2304 
2306 
2326  const QueryStruct& query,
2327  std::vector<std::string>& resultTo,
2328  std::queue<std::string>& warningsTo
2329  ) {
2330  // check pointers
2331  if(this->queryTargetPtr == nullptr) {
2332  throw Exception(
2333  "Query::Container::getMultiFromQuery():"
2334  " No content has been specified"
2335  );
2336  }
2337 
2338  if(this->queryTargetSourcePtr == nullptr) {
2339  throw Exception(
2340  "Query::Container::getMultiFromQuery():"
2341  " No content source has been specified"
2342  );
2343  }
2344 
2345  // check result type
2346  if(query.type != QueryStruct::typeNone && !query.resultMulti) {
2347  warningsTo.emplace(
2348  "WARNING: Query has invalid result type - not multi."
2349  );
2350 
2351  return false;
2352  }
2353 
2354  // check query target
2355  if(this->queryTargetPtr->empty()) {
2356  resultTo.clear();
2357 
2358  return true;
2359  }
2360 
2361  switch(query.type) {
2363  // get multiple results from a RegEx query
2364  try {
2365  this->queriesRegEx.at(query.index).getAll(
2366  *(this->queryTargetPtr),
2367  resultTo
2368  );
2369 
2370  return true;
2371  }
2372  catch(const RegExException& e) {
2373  warningsTo.emplace(
2374  "WARNING: RegEx error - "
2375  + std::string(e.view())
2376  + " ["
2377  + *(this->queryTargetSourcePtr)
2378  + "]."
2379  );
2380  }
2381 
2382  break;
2383 
2385  // parse content as HTML/XML if still necessary
2386  if(this->parseXml(warningsTo)) {
2387  // get multiple results from a XPath query
2388  try {
2389  this->queriesXPath.at(query.index).getAll(this->parsedXML, resultTo);
2390 
2391  return true;
2392  }
2393  catch(const XPathException& e) {
2394  warningsTo.emplace(
2395  "WARNING: XPath error - "
2396  + std::string(e.view())
2397  + " ["
2398  + *(this->queryTargetSourcePtr)
2399  + "]."
2400  );
2401  }
2402  }
2403 
2404  break;
2405 
2407  // parse content as JSON using RapidJSON if still necessary
2408  if(this->parseJsonRapid(warningsTo)) {
2409  // get multiple results from a JSONPointer query
2410  try {
2411  this->queriesJsonPointer.at(query.index).getAll(this->parsedJsonRapid, resultTo);
2412 
2413  return true;
2414  }
2415  catch(const JsonPointerException& e) {
2416  warningsTo.emplace(
2417  "WARNING: JSONPointer error - "
2418  + std::string(e.view())
2419  + " ["
2420  + *(this->queryTargetSourcePtr)
2421  + "]."
2422  );
2423  }
2424  }
2425 
2426  break;
2427 
2429  // parse content as JSON using jsoncons if still necessary
2430  if(this->parseJsonCons(warningsTo)) {
2431  // get multiple results from a JSONPath query
2432  try {
2433  this->queriesJsonPath.at(query.index).getAll(
2434  this->parsedJsonCons,
2435  resultTo
2436  );
2437 
2438  return true;
2439  }
2440  catch(const JsonPathException& e) {
2441  warningsTo.emplace(
2442  "WARNING: JSONPath error - "
2443  + std::string(e.view())
2444  + " ["
2445  + *(this->queryTargetSourcePtr)
2446  + "]."
2447  );
2448  }
2449  }
2450 
2451  break;
2452 
2454  // parse content as HTML/XML if still necessary
2455  if(this->parseXml(warningsTo)) {
2456  // get first result from the XPath query
2457  try {
2458  std::string json;
2459 
2460  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
2461  this->parsedXML,
2462  json
2463  );
2464 
2465  if(json.empty()) {
2466  resultTo.clear();
2467  }
2468  else {
2469  // temporarily parse JSON using rapidJSON
2470  const auto parsedJson(Helper::Json::parseRapid(json));
2471 
2472  // get multiple results from the JSONPointer query
2473  this->queriesXPathJsonPointer.at(query.index).second.getAll(
2474  parsedJson,
2475  resultTo
2476  );
2477  }
2478 
2479  return true;
2480  }
2481  catch(const XPathException& e) {
2482  warningsTo.emplace(
2483  "WARNING: XPath error - "
2484  + std::string(e.view())
2485  + " ["
2486  + *(this->queryTargetSourcePtr)
2487  + "]."
2488  );
2489  }
2490  catch(const JsonPointerException& e) {
2491  warningsTo.emplace(
2492  "WARNING: JSONPointer error - "
2493  + std::string(e.view())
2494  + " ["
2495  + *(this->queryTargetSourcePtr)
2496  + "]."
2497  );
2498  }
2499  catch(const JsonException& e) {
2500  warningsTo.emplace(
2501  "WARNING: JSONPath error - "
2502  + std::string(e.view())
2503  + " ["
2504  + *(this->queryTargetSourcePtr)
2505  + "]."
2506  );
2507  }
2508  }
2509 
2510  break;
2511 
2513  // parse content as HTML/XML if still necessary
2514  if(this->parseXml(warningsTo)) {
2515  // get first result from the XPath query
2516  try {
2517  std::string json;
2518 
2519  this->queriesXPathJsonPath.at(query.index).first.getFirst(
2520  this->parsedXML,
2521  json
2522  );
2523 
2524  if(json.empty()) {
2525  resultTo.clear();
2526  }
2527  else {
2528  // temporarily parse JSON using jsoncons
2529  const auto parsedJson(Helper::Json::parseCons(json));
2530 
2531  // get multiple results from the JSONPath query
2532  this->queriesXPathJsonPath.at(query.index).second.getAll(
2533  parsedJson,
2534  resultTo
2535  );
2536  }
2537 
2538  return true;
2539  }
2540  catch(const XPathException& e) {
2541  warningsTo.emplace(
2542  "WARNING: XPath error - "
2543  + std::string(e.view())
2544  + " ["
2545  + *(this->queryTargetSourcePtr)
2546  + "]."
2547  );
2548  }
2549  catch(const JsonPathException& e) {
2550  warningsTo.emplace(
2551  "WARNING: JSONPath error - "
2552  + std::string(e.view())
2553  + " ["
2554  + *(this->queryTargetSourcePtr)
2555  + "]."
2556  );
2557  }
2558  catch(const JsonException& e) {
2559  warningsTo.emplace(
2560  "WARNING: JSONPath error - "
2561  + std::string(e.view())
2562  + " ["
2563  + *(this->queryTargetSourcePtr)
2564  + "]."
2565  );
2566  }
2567  }
2568 
2569  break;
2570 
2571  case QueryStruct::typeNone:
2572  break;
2573 
2574  default:
2575  throw Exception(
2576  "Query::Container::getMultiFromQuery():"
2577  " Unknown query type"
2578  );
2579  }
2580 
2581  return false;
2582  }
2583 
2585 
2606  const QueryStruct& query,
2607  std::vector<std::string>& resultTo,
2608  std::queue<std::string>& warningsTo
2609  ) {
2610  // check pointer
2611  if(this->queryTargetSourcePtr == nullptr) {
2612  throw Exception(
2613  "Query::Container::getMultiFromQueryOnSubSet():"
2614  " No content source has been specified"
2615  );
2616  }
2617 
2618  // check current subset
2619  if(this->subSetCurrent == 0) {
2620  throw Exception(
2621  "Query::Container::getMultiFromQueryOnSubSet():"
2622  " No subset has been specified"
2623  );
2624  }
2625 
2626  if(this->subSetCurrent > this->subSetNumber) {
2627  throw Exception(
2628  "Query::Container::getMultiFromQueryOnSubSet():"
2629  " Invalid subset has been specified"
2630  );
2631  }
2632 
2633  // check result type
2634  if(
2635  query.type != QueryStruct::typeNone
2636  && !query.resultMulti
2637  ) {
2638  warningsTo.emplace(
2639  "WARNING: Query has invalid result type - not multi."
2640  );
2641 
2642  return false;
2643  }
2644 
2645  switch(query.type) {
2647  // get multiple result from a RegEx query on the current subset
2648  try {
2649  if(this->subSetType != QueryStruct::typeRegEx) {
2650  this->stringifySubSets(warningsTo);
2651  }
2652 
2653  this->queriesRegEx.at(query.index).getAll(
2654  this->stringifiedSubSets.at(this->subSetCurrent - 1),
2655  resultTo
2656  );
2657 
2658  return true;
2659  }
2660  catch(const RegExException& e) {
2661  warningsTo.emplace(
2662  "WARNING: RegEx error - "
2663  + std::string(e.view())
2664  + " ["
2665  + *(this->queryTargetSourcePtr)
2666  + "]."
2667  );
2668  }
2669 
2670  break;
2671 
2673  // parse current subset as HTML/XML if still necessary
2674  if(this->parseSubSetXml(warningsTo)) {
2675  // get multiple results from a XPath query on the current subset
2676  try {
2677  if(this->subSetType == QueryStruct::typeXPath) {
2678  this->queriesXPath.at(query.index).getAll(
2679  this->xPathSubSets.at(this->subSetCurrent - 1),
2680  resultTo
2681  );
2682  }
2683  else {
2684  this->queriesXPath.at(query.index).getAll(
2685  this->subSetParsedXML,
2686  resultTo
2687  );
2688  }
2689 
2690  return true;
2691  }
2692  catch(const XPathException& e) {
2693  warningsTo.emplace(
2694  "WARNING: XPath error - "
2695  + std::string(e.view())
2696  + " ["
2697  + *(this->queryTargetSourcePtr)
2698  + "]."
2699  );
2700  }
2701  }
2702 
2703  break;
2704 
2706  // parse current subset as JSON using RapidJSON if still necessary
2707  if(this->parseSubSetJsonRapid(warningsTo)) {
2708  // get multiple results from a JSONPointer query on the current subset
2709  try {
2710  if(this->subSetType == QueryStruct::typeJsonPointer) {
2711  this->queriesJsonPointer.at(query.index).getAll(
2712  this->jsonPointerSubSets.at(this->subSetCurrent - 1),
2713  resultTo
2714  );
2715  }
2716  else {
2717  this->queriesJsonPointer.at(query.index).getAll(
2718  this->subSetParsedJsonRapid,
2719  resultTo
2720  );
2721  }
2722 
2723  return true;
2724  }
2725  catch(const JsonPointerException& e) {
2726  warningsTo.emplace(
2727  "WARNING: JSONPointer error - "
2728  + std::string(e.view())
2729  + " ["
2730  + *(this->queryTargetSourcePtr)
2731  + "]."
2732  );
2733  }
2734  }
2735 
2736  break;
2737 
2739  // parse current subset as JSON using jsoncons if still necessary
2740  if(this->parseSubSetJsonRapid(warningsTo)) {
2741  // get multiple results from a JSONPath query on the current subset
2742  try {
2743  if(this->subSetType == QueryStruct::typeJsonPath) {
2744  this->queriesJsonPath.at(query.index).getAll(
2745  this->jsonPathSubSets.at(this->subSetCurrent - 1),
2746  resultTo
2747  );
2748  }
2749  else {
2750  this->queriesJsonPath.at(query.index).getAll(
2751  this->subSetParsedJsonCons,
2752  resultTo
2753  );
2754  }
2755 
2756  return true;
2757  }
2758  catch(const JsonPathException& e) {
2759  warningsTo.emplace(
2760  "WARNING: JSONPath error - "
2761  + std::string(e.view())
2762  + " ["
2763  + *(this->queryTargetSourcePtr)
2764  + "]."
2765  );
2766  }
2767  }
2768 
2769  break;
2770 
2772  // parse current subset as HTML/XML if still necessary
2773  if(this->parseSubSetXml(warningsTo)) {
2774  // get first result from the XPath query on the current subset
2775  try {
2776  std::string json;
2777 
2778  if(this->subSetType == QueryStruct::typeXPath) {
2779  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
2780  this->xPathSubSets.at(this->subSetCurrent - 1),
2781  json
2782  );
2783  }
2784  else {
2785  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
2786  this->subSetParsedXML,
2787  json
2788  );
2789  }
2790 
2791  if(json.empty()) {
2792  resultTo.clear();
2793  }
2794  else {
2795  // temporarily parse JSON using rapidJSON
2796  const auto parsedJson(Helper::Json::parseRapid(json));
2797 
2798  // get multiple results from the JSONPointer query
2799  this->queriesXPathJsonPointer.at(query.index).second.getAll(
2800  parsedJson,
2801  resultTo
2802  );
2803  }
2804 
2805  return true;
2806  }
2807  catch(const XPathException& e) {
2808  warningsTo.emplace(
2809  "WARNING: XPath error - "
2810  + std::string(e.view())
2811  + " ["
2812  + *(this->queryTargetSourcePtr)
2813  + "]."
2814  );
2815  }
2816  catch(const JsonPointerException& e) {
2817  warningsTo.emplace(
2818  "WARNING: JSONPointer error - "
2819  + std::string(e.view())
2820  + " ["
2821  + *(this->queryTargetSourcePtr)
2822  + "]."
2823  );
2824  }
2825  catch(const JsonException& e) {
2826  warningsTo.emplace(
2827  "WARNING: JSONPath error - "
2828  + std::string(e.view())
2829  + " ["
2830  + *(this->queryTargetSourcePtr)
2831  + "]."
2832  );
2833  }
2834  }
2835 
2836  break;
2837 
2839  // parse current subset as HTML/XML if still necessary
2840  if(this->parseSubSetXml(warningsTo)) {
2841  // get first result from the XPath query on the current subset
2842  try {
2843  std::string json;
2844 
2845  if(this->subSetType == QueryStruct::typeXPath) {
2846  this->queriesXPathJsonPath.at(query.index).first.getFirst(
2847  this->xPathSubSets.at(this->subSetCurrent - 1),
2848  json
2849  );
2850  }
2851  else {
2852  this->queriesXPathJsonPath.at(query.index).first.getFirst(
2853  this->subSetParsedXML,
2854  json
2855  );
2856  }
2857 
2858  if(json.empty()) {
2859  resultTo.clear();
2860  }
2861  else {
2862  // temporarily parse JSON using jsoncons
2863  const auto parsedJson(Helper::Json::parseCons(json));
2864 
2865  // get multiple results from the JSONPath query
2866  this->queriesXPathJsonPath.at(query.index).second.getAll(
2867  parsedJson,
2868  resultTo
2869  );
2870  }
2871 
2872  return true;
2873  }
2874  catch(const XPathException& e) {
2875  warningsTo.emplace(
2876  "WARNING: XPath error - "
2877  + std::string(e.view())
2878  + " ["
2879  + *(this->queryTargetSourcePtr)
2880  + "]."
2881  );
2882  }
2883  catch(const JsonPathException& e) {
2884  warningsTo.emplace(
2885  "WARNING: JSONPath error - "
2886  + std::string(e.view())
2887  + " ["
2888  + *(this->queryTargetSourcePtr)
2889  + "]."
2890  );
2891  }
2892  catch(const JsonException& e) {
2893  warningsTo.emplace(
2894  "WARNING: JSONPath error - "
2895  + std::string(e.view())
2896  + " ["
2897  + *(this->queryTargetSourcePtr)
2898  + "]."
2899  );
2900  }
2901  }
2902 
2903  break;
2904 
2905  case QueryStruct::typeNone:
2906  break;
2907 
2908  default:
2909  throw Exception(
2910  "Query::Container::getMultiFromQueryOnSubSet():"
2911  " Unknown query type"
2912  );
2913  }
2914 
2915  return false;
2916  }
2917 
2919 
2941  const QueryStruct& query,
2942  std::queue<std::string>& warningsTo
2943  ) {
2944  // clear old subsets
2945  this->clearSubSets();
2946 
2947  // set new subset type
2948  switch(query.type) {
2950  this->subSetType = QueryStruct::typeXPath;
2951 
2952  break;
2953 
2956  this->subSetType = QueryStruct::typeJsonPointer;
2957 
2958  break;
2959 
2962  this->subSetType = QueryStruct::typeJsonPath;
2963 
2964  break;
2965 
2966  default:
2967  break;
2968  }
2969 
2970  // check pointers
2971  if(this->queryTargetPtr == nullptr) {
2972  throw Exception(
2973  "Query::Container::setSubSetsFromQuery():"
2974  " No content has been specified"
2975  );
2976  }
2977 
2978  if(this->queryTargetSourcePtr == nullptr) {
2979  throw Exception(
2980  "Query::Container::setSubSetsFromQuery():"
2981  " No content source has been specified"
2982  );
2983  }
2984 
2985  // check result type
2986  if(
2987  query.type != QueryStruct::typeNone
2988  && !query.resultSubSets
2989  ) {
2990  warningsTo.emplace(
2991  "WARNING: Query has invalid result type - not subsets."
2992  );
2993 
2994  return false;
2995  }
2996 
2997  // check query target
2998  if(this->queryTargetPtr->empty()) {
2999  return true;
3000  }
3001 
3002  switch(query.type) {
3004  // get subsets (i. e. all matches) from a RegEx query
3005  try {
3006  this->queriesRegEx.at(query.index).getAll(
3007  *(this->queryTargetPtr),
3008  this->stringifiedSubSets
3009  );
3010 
3011  this->subSetNumber = this->stringifiedSubSets.size();
3012 
3013  return true;
3014  }
3015  catch(const RegExException& e) {
3016  warningsTo.emplace(
3017  "WARNING: RegEx error - "
3018  + std::string(e.view())
3019  + " ["
3020  + *(this->queryTargetSourcePtr)
3021  + "]."
3022  );
3023  }
3024 
3025  break;
3026 
3028  // parse content as HTML/XML if still necessary
3029  if(this->parseXml(warningsTo)) {
3030  // get subsets from a XPath query
3031  try {
3032  this->queriesXPath.at(query.index).getSubSets(
3033  this->parsedXML,
3034  this->xPathSubSets
3035  );
3036 
3037  this->subSetNumber = this->xPathSubSets.size();
3038 
3039  return true;
3040  }
3041  catch(const XPathException& e) {
3042  warningsTo.emplace(
3043  "WARNING: XPath error - "
3044  + std::string(e.view())
3045  + " ["
3046  + *(this->queryTargetSourcePtr)
3047  + "]."
3048  );
3049  }
3050  }
3051 
3052  break;
3053 
3055  // parse content as JSON using RapidJSON if still necessary
3056  if(this->parseJsonRapid(warningsTo)) {
3057  // get subsets from a JSONPointer query
3058  try {
3059  this->queriesJsonPointer.at(query.index).getSubSets(
3060  this->parsedJsonRapid,
3061  this->jsonPointerSubSets
3062  );
3063 
3064  this->subSetNumber = this->jsonPointerSubSets.size();
3065 
3066  return true;
3067  }
3068  catch(const JsonPointerException& e) {
3069  warningsTo.emplace(
3070  "WARNING: JSONPointer error - "
3071  + std::string(e.view())
3072  + " ["
3073  + *(this->queryTargetSourcePtr)
3074  + "]."
3075  );
3076  }
3077  }
3078 
3079  break;
3080 
3082  // parse content as JSON using jsoncons if still necessary
3083  if(this->parseJsonCons(warningsTo)) {
3084  // get subsets from a JSONPath query
3085  try {
3086  this->queriesJsonPath.at(query.index).getSubSets(
3087  this->parsedJsonCons,
3088  this->jsonPathSubSets
3089  );
3090 
3091  this->subSetNumber = this->jsonPathSubSets.size();
3092 
3093  return true;
3094  }
3095  catch(const JsonPathException& e) {
3096  warningsTo.emplace(
3097  "WARNING: JSONPath error - "
3098  + std::string(e.view())
3099  + " ["
3100  + *(this->queryTargetSourcePtr)
3101  + "]."
3102  );
3103  }
3104  }
3105 
3106  break;
3107 
3109  // parse content as HTML/XML if still necessary
3110  if(this->parseXml(warningsTo)) {
3111  // get first result from the XPath query
3112  try {
3113  std::string json;
3114 
3115  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
3116  this->parsedXML,
3117  json
3118  );
3119 
3120  if(!json.empty()) {
3121  // temporarily parse JSON using rapidJSON
3122  const auto parsedJson(Helper::Json::parseRapid(json));
3123 
3124  // get subsets from the JSONPointer query
3125  this->queriesXPathJsonPointer.at(query.index).second.getSubSets(
3126  parsedJson,
3127  this->jsonPointerSubSets
3128  );
3129  }
3130 
3131  return true;
3132  }
3133  catch(const XPathException& e) {
3134  warningsTo.emplace(
3135  "WARNING: XPath error - "
3136  + std::string(e.view())
3137  + " ["
3138  + *(this->queryTargetSourcePtr)
3139  + "]."
3140  );
3141  }
3142  catch(const JsonPointerException& e) {
3143  warningsTo.emplace(
3144  "WARNING: JSONPointer error - "
3145  + std::string(e.view())
3146  + " ["
3147  + *(this->queryTargetSourcePtr)
3148  + "]."
3149  );
3150  }
3151  catch(const JsonException& e) {
3152  warningsTo.emplace(
3153  "WARNING: JSONPath error - "
3154  + std::string(e.view())
3155  + " ["
3156  + *(this->queryTargetSourcePtr)
3157  + "]."
3158  );
3159  }
3160  }
3161 
3162  break;
3163 
3165  // parse content as HTML/XML if still necessary
3166  if(this->parseXml(warningsTo)) {
3167  // get first result from the XPath query
3168  try {
3169  std::string json;
3170 
3171  this->queriesXPathJsonPath.at(query.index).first.getFirst(
3172  this->parsedXML,
3173  json
3174  );
3175 
3176  if(!json.empty()) {
3177  // temporarily parse JSON using jsoncons
3178  const auto parsedJson(Helper::Json::parseCons(json));
3179 
3180  // get subsets from the JSONPointer query
3181  this->queriesXPathJsonPath.at(query.index).second.getSubSets(
3182  parsedJson,
3183  this->jsonPathSubSets
3184  );
3185  }
3186 
3187  return true;
3188  }
3189  catch(const XPathException& e) {
3190  warningsTo.emplace(
3191  "WARNING: XPath error - "
3192  + std::string(e.view())
3193  + " ["
3194  + *(this->queryTargetSourcePtr)
3195  + "]."
3196  );
3197  }
3198  catch(const JsonPathException& e) {
3199  warningsTo.emplace(
3200  "WARNING: JSONPath error - "
3201  + std::string(e.view())
3202  + " ["
3203  + *(this->queryTargetSourcePtr)
3204  + "]."
3205  );
3206  }
3207  catch(const JsonException& e) {
3208  warningsTo.emplace(
3209  "WARNING: JSONPath error - "
3210  + std::string(e.view())
3211  + " ["
3212  + *(this->queryTargetSourcePtr)
3213  + "]."
3214  );
3215  }
3216  }
3217 
3218  break;
3219 
3220  case QueryStruct::typeNone:
3221  break;
3222 
3223  default:
3224  throw Exception(
3225  "Query::Container::setSubSetsFromQuery():"
3226  " Unknown query type"
3227  );
3228  }
3229 
3230  return false;
3231  }
3232 
3234 
3256  const QueryStruct& query,
3257  std::queue<std::string>& warningsTo
3258  ) {
3259  // check pointer
3260  if(this->queryTargetSourcePtr == nullptr) {
3261  throw Exception(
3262  "Query::Container::addSubSetsFromQueryOnSubSet():"
3263  " No content source has been specified"
3264  );
3265  }
3266 
3267  // check current subset
3268  if(this->subSetCurrent == 0) {
3269  throw Exception(
3270  "Query::Container::addSubSetsFromQueryOnSubSet():"
3271  " No subset has been specified"
3272  );
3273  }
3274 
3275  if(this->subSetCurrent > this->subSetNumber) {
3276  throw Exception(
3277  "Query::Container::addSubSetsFromQueryOnSubSet():"
3278  " Invalid subset has been specified"
3279  );
3280  }
3281 
3282  // check result type
3283  if(
3284  query.type != QueryStruct::typeNone
3285  && !query.resultSubSets
3286  ) {
3287  warningsTo.emplace(
3288  "WARNING: Query has invalid result type - not subset."
3289  );
3290 
3291  return false;
3292  }
3293 
3294  switch(query.type) {
3296  // get more subsets from a RegEx query on the current subset
3297  try {
3298  // stringify old subsets if necessary
3299  if(this->subSetType != QueryStruct::typeRegEx) {
3300  this->stringifySubSets(warningsTo);
3301  }
3302 
3303  // get new subsets from RegEx query
3304  std::vector<std::string> subsets;
3305 
3306  this->queriesRegEx.at(query.index).getAll(
3307  this->stringifiedSubSets.at(this->subSetCurrent - 1),
3308  subsets
3309  );
3310 
3311  // check number of results
3312  if(subsets.empty()) {
3313  return false;
3314  }
3315 
3316  // insert new RegEx subsets
3317  this->insertSubSets(subsets);
3318 
3319  return true;
3320  }
3321  catch(const RegExException& e) {
3322  warningsTo.emplace(
3323  "WARNING: RegEx error - "
3324  + std::string(e.view())
3325  + " ["
3326  + *(this->queryTargetSourcePtr)
3327  + "]."
3328  );
3329  }
3330 
3331  break;
3332 
3334  // parse current subset as HTML/XML (and stringify old subsets) if still necessary
3335  if(this->parseSubSetXml(warningsTo)) {
3336  // get more subsets from a XPath query on the current subset
3337  try {
3338  std::vector<Parsing::XML> subsets;
3339 
3340  if(this->subSetType == QueryStruct::typeXPath) {
3341  this->queriesXPath.at(query.index).getSubSets(
3342  this->xPathSubSets.at(this->subSetCurrent - 1),
3343  subsets
3344  );
3345  }
3346  else {
3347  this->queriesXPath.at(query.index).getSubSets(
3348  this->subSetParsedXML,
3349  subsets
3350  );
3351  }
3352 
3353  // check number of results
3354  if(subsets.empty()) {
3355  return false;
3356  }
3357 
3358  // insert new XPath subsets
3359  this->insertSubSets(subsets);
3360 
3361  return true;
3362  }
3363  catch(const XPathException& e) {
3364  warningsTo.emplace(
3365  "WARNING: XPath error - "
3366  + std::string(e.view())
3367  + " ["
3368  + *(this->queryTargetSourcePtr)
3369  + "]."
3370  );
3371  }
3372  }
3373 
3374  break;
3375 
3377  // parse current subset as JSON using RapidJSON (and stringify old subsets) if still necessary
3378  if(this->parseSubSetJsonRapid(warningsTo)) {
3379  // get more subsets from a JSONPointer query on the current subset
3380  try {
3381  std::vector<rapidjson::Document> subsets;
3382 
3383  if(this->subSetType == QueryStruct::typeJsonPointer) {
3384  this->queriesJsonPointer.at(query.index).getSubSets(
3385  this->jsonPointerSubSets.at(this->subSetCurrent - 1),
3386  subsets
3387  );
3388  }
3389  else {
3390  this->queriesJsonPointer.at(query.index).getSubSets(
3391  this->subSetParsedJsonRapid,
3392  subsets
3393  );
3394  }
3395 
3396  // check number of results
3397  if(subsets.empty()) {
3398  return false;
3399  }
3400 
3401  // insert new JSONPointer subsets
3402  this->insertSubSets(subsets);
3403 
3404  return true;
3405  }
3406  catch(const JsonPointerException& e) {
3407  warningsTo.emplace(
3408  "WARNING: JSONPointer error - "
3409  + std::string(e.view())
3410  + " ["
3411  + *(this->queryTargetSourcePtr)
3412  + "]."
3413  );
3414  }
3415  }
3416 
3417  break;
3418 
3420  // parse current subset as JSON using jsoncons (and stringify old subsets) if still necessary
3421  if(this->parseSubSetJsonRapid(warningsTo)) {
3422  // get more subsets from a JSONPath query on the current subset
3423  try {
3424  std::vector<jsoncons::json> subsets;
3425 
3426  if(this->subSetType == QueryStruct::typeJsonPath) {
3427  this->queriesJsonPath.at(query.index).getSubSets(
3428  this->jsonPathSubSets.at(this->subSetCurrent - 1),
3429  subsets
3430  );
3431  }
3432  else {
3433  this->queriesJsonPath.at(query.index).getSubSets(
3434  this->subSetParsedJsonCons,
3435  subsets
3436  );
3437  }
3438 
3439  // check number of results
3440  if(subsets.empty()) {
3441  return false;
3442  }
3443 
3444  // insert new JSONPath subsets
3445  this->insertSubSets(subsets);
3446 
3447  return true;
3448  }
3449  catch(const JsonPathException& e) {
3450  warningsTo.emplace(
3451  "WARNING: JSONPath error - "
3452  + std::string(e.view())
3453  + " ["
3454  + *(this->queryTargetSourcePtr)
3455  + "]."
3456  );
3457  }
3458  }
3459 
3460  break;
3461 
3463  // parse current subset as HTML/XML (and stringify old subsets) if still necessary
3464  if(this->parseSubSetXml(warningsTo)) {
3465  // get first result from the XPath query on the current subset
3466  try {
3467  std::string json;
3468 
3469  if(this->subSetType == QueryStruct::typeXPath) {
3470  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
3471  this->xPathSubSets.at(this->subSetCurrent - 1),
3472  json
3473  );
3474  }
3475  else {
3476  this->queriesXPathJsonPointer.at(query.index).first.getFirst(
3477  this->subSetParsedXML,
3478  json
3479  );
3480  }
3481 
3482  if(json.empty()) {
3483  return false;
3484  }
3485 
3486  // temporarily parse JSON using rapidJSON
3487  const auto parsedJson(Helper::Json::parseRapid(json));
3488 
3489  // get more subsets from the JSONPointer query
3490  std::vector<rapidjson::Document> subsets;
3491 
3492  this->queriesXPathJsonPointer.at(query.index).second.getSubSets(parsedJson, subsets);
3493 
3494  // stringify old subsets if necessary (and if it was not already done for HTML/XML parsing)
3495  if(this->subSetType == QueryStruct::typeXPath) {
3496  this->stringifySubSets(warningsTo);
3497  }
3498 
3499  // check number of results
3500  if(subsets.empty()) {
3501  return false;
3502  }
3503 
3504  // insert new JSONPointer subsets
3505  this->insertSubSets(subsets);
3506 
3507  return true;
3508  }
3509  catch(const XPathException& e) {
3510  warningsTo.emplace(
3511  "WARNING: XPath error - "
3512  + std::string(e.view())
3513  + " ["
3514  + *(this->queryTargetSourcePtr)
3515  + "]."
3516  );
3517  }
3518  catch(const JsonPointerException& e) {
3519  warningsTo.emplace(
3520  "WARNING: JSONPointer error - "
3521  + std::string(e.view())
3522  + " ["
3523  + *(this->queryTargetSourcePtr)
3524  + "]."
3525  );
3526  }
3527  catch(const JsonException& e) {
3528  warningsTo.emplace(
3529  "WARNING: JSONPath error - "
3530  + std::string(e.view())
3531  + " ["
3532  + *(this->queryTargetSourcePtr)
3533  + "]."
3534  );
3535  }
3536  }
3537 
3538  break;
3539 
3541  // parse current subset as HTML/XML if still necessary
3542  if(this->parseSubSetXml(warningsTo)) {
3543  // get first result from the XPath query on the current subset
3544  try {
3545  std::string json;
3546 
3547  if(this->subSetType == QueryStruct::typeXPath) {
3548  this->queriesXPathJsonPath.at(query.index).first.getFirst(
3549  this->xPathSubSets.at(this->subSetCurrent - 1),
3550  json
3551  );
3552  }
3553  else {
3554  this->queriesXPathJsonPath.at(query.index).first.getFirst(
3555  this->subSetParsedXML,
3556  json
3557  );
3558  }
3559 
3560  if(json.empty()) {
3561  return true;
3562  }
3563 
3564  // temporarily parse JSON using jsoncons
3565  const auto parsedJson(Helper::Json::parseCons(json));
3566 
3567  // get more subsets from JSONPath query
3568  std::vector<jsoncons::json> subsets;
3569 
3570  // get multiple results from the JSONPath query
3571  this->queriesXPathJsonPath.at(query.index).second.getSubSets(parsedJson, subsets);
3572 
3573  // stringify old subsets if necessary (and if it was not already done for HTML/XML parsing)
3574  if(this->subSetType == QueryStruct::typeXPath) {
3575  this->stringifySubSets(warningsTo);
3576  }
3577 
3578  // check number of results
3579  if(subsets.empty()) {
3580  return false;
3581  }
3582 
3583  // insert new JSONPath subsets
3584  this->insertSubSets(subsets);
3585 
3586  return true;
3587  }
3588  catch(const XPathException& e) {
3589  warningsTo.emplace(
3590  "WARNING: XPath error - "
3591  + std::string(e.view())
3592  + " ["
3593  + *(this->queryTargetSourcePtr)
3594  + "]."
3595  );
3596  }
3597  catch(const JsonPathException& e) {
3598  warningsTo.emplace(
3599  "WARNING: JSONPath error - "
3600  + std::string(e.view())
3601  + " ["
3602  + *(this->queryTargetSourcePtr)
3603  + "]."
3604  );
3605  }
3606  catch(const JsonException& e) {
3607  warningsTo.emplace(
3608  "WARNING: JSONPath error - "
3609  + std::string(e.view())
3610  + " ["
3611  + *(this->queryTargetSourcePtr)
3612  + "]."
3613  );
3614  }
3615  }
3616 
3617  break;
3618 
3619  case QueryStruct::typeNone:
3620  break;
3621 
3622  default:
3623  throw Exception(
3624  "Query::Container::addSubSetsFromQueryOnSubSet():"
3625  " Unknown query type"
3626  );
3627  }
3628 
3629  return false;
3630  }
3631 
3632  /*
3633  * MEMORY (protected)
3634  */
3635 
3637 
3645  inline void Container::reserveForSubSets(const QueryStruct& query, std::size_t n) {
3646  this->stringifiedSubSets.reserve(n);
3647 
3648  switch(query.type) {
3650  this->xPathSubSets.reserve(n);
3651 
3652  break;
3653 
3656  this->jsonPointerSubSets.reserve(n);
3657 
3658  break;
3659 
3662  this->jsonPathSubSets.reserve(n);
3663 
3664  break;
3665 
3666  default:
3667  break;
3668  }
3669  }
3670 
3671  /*
3672  * INTERNAL HELPER FUNCTIONS (private)
3673  */
3674 
3675  // parse content as HTML/XML if still necessary,
3676  // return false if parsing failed, throws Container::Exception
3677  inline bool Container::parseXml(std::queue<std::string>& warningsTo) {
3678  // check pointers
3679  if(this->queryTargetPtr == nullptr) {
3680  throw Exception(
3681  "Query::Container::parseXml():"
3682  " No content has been specified"
3683  );
3684  }
3685 
3686  if(this->queryTargetSourcePtr == nullptr) {
3687  throw Exception(
3688  "Query::Container::parseXml():"
3689  " No content source has been specified"
3690  );
3691  }
3692 
3693  if(
3694  !(this->xmlParsed)
3695  && this->xmlParsingError.empty()
3696  ) {
3697  try {
3698  this->parsedXML.parse(
3699  *(this->queryTargetPtr),
3700  this->repairCData,
3701  this->repairComments,
3702  this->removeXmlInstructions,
3703  warningsTo
3704  );
3705 
3706  this->xmlParsed = true;
3707  }
3708  catch(const XMLException& e) {
3709  this->xmlParsingError = e.view();
3710 
3711  warningsTo.emplace(
3712  "WARNING: [XML] "
3713  + this->xmlParsingError
3714  + " ["
3715  + *(this->queryTargetSourcePtr)
3716  + "]"
3717  );
3718  }
3719  }
3720 
3721  return this->xmlParsed;
3722  }
3723 
3724  // parse content as JSON using RapidJSON if still necessary,
3725  // return false if parsing failed, throws Container::Exception
3726  inline bool Container::parseJsonRapid(std::queue<std::string>& warningsTo) {
3727  // check pointers
3728  if(this->queryTargetPtr == nullptr) {
3729  throw Exception(
3730  "Query::Container::parseJsonRapid():"
3731  " No content has been specified"
3732  );
3733  }
3734 
3735  if(this->queryTargetSourcePtr == nullptr) {
3736  throw Exception(
3737  "Query::Container::parseJsonRapid():"
3738  " No content source has been specified"
3739  );
3740  }
3741 
3742  if(
3743  !(this->jsonParsedRapid)
3744  && this->jsonParsingError.empty()
3745  ) {
3746  try {
3747  this->parsedJsonRapid = Helper::Json::parseRapid(
3748  *(this->queryTargetPtr)
3749  );
3750 
3751  this->jsonParsedRapid = true;
3752  }
3753  catch(const JsonException& e) {
3754  this->jsonParsingError = e.view();
3755 
3756  warningsTo.emplace(
3757  "WARNING: [JSON] "
3758  + this->jsonParsingError
3759  + " ["
3760  + *(this->queryTargetSourcePtr)
3761  + "]"
3762  );
3763  }
3764  }
3765 
3766  return this->jsonParsedRapid;
3767  }
3768 
3769  // parse content as JSON using jsoncons if still necessary,
3770  // return false if parsing failed, throws Container::Exception
3771  inline bool Container::parseJsonCons(std::queue<std::string>& warningsTo) {
3772  // check pointers
3773  if(this->queryTargetPtr == nullptr) {
3774  throw Exception(
3775  "Query::Container::parseJsonCons():"
3776  " No content has been specified"
3777  );
3778  }
3779 
3780  if(this->queryTargetSourcePtr == nullptr) {
3781  throw Exception(
3782  "Query::Container::parseJsonCons():"
3783  " No content source has been specified"
3784  );
3785  }
3786 
3787  if(
3788  !(this->jsonParsedCons)
3789  && this->jsonParsingError.empty()
3790  ) {
3791  try {
3792  this->parsedJsonCons = Helper::Json::parseCons(
3793  *(this->queryTargetPtr)
3794  );
3795 
3796  this->jsonParsedCons = true;
3797  }
3798  catch(const JsonException& e) {
3799  this->jsonParsingError = e.view();
3800 
3801  warningsTo.emplace("WARNING: [JSON] " + this->jsonParsingError);
3802  }
3803  }
3804 
3805  return this->jsonParsedCons;
3806  }
3807 
3808  // parse subset as HTML/XML if still necessary,
3809  // return false if parsing failed, throws Container::Exception
3810  inline bool Container::parseSubSetXml(std::queue<std::string>& warningsTo) {
3811  // check current subset
3812  if(this->subSetCurrent == 0) {
3813  throw Exception(
3814  "Query::Container::parseSubSetXml():"
3815  " No subset has been specified"
3816  );
3817  }
3818 
3819  if(this->subSetCurrent > this->subSetNumber) {
3820  throw Exception(
3821  "Query::Container::parseSubSetXml():"
3822  " Invalid subset has been specified"
3823  );
3824  }
3825 
3826  // if the subset is of type XPath, no further parsing is required
3827  if(this->subSetType == QueryStruct::typeXPath) {
3828  return this->xPathSubSets.at(this->subSetCurrent - 1).valid();
3829  }
3830 
3831  if(
3832  !(this->subSetXmlParsed)
3833  && this->subSetXmlParsingError.empty()
3834  ) {
3835  // stringify the subsets if still necessary
3836  this->stringifySubSets(warningsTo);
3837 
3838  try {
3839  this->subSetParsedXML.parse(
3840  this->stringifiedSubSets.at(this->subSetCurrent - 1),
3841  this->repairCData,
3842  this->repairComments,
3843  this->removeXmlInstructions,
3844  warningsTo
3845  );
3846 
3847  this->subSetXmlParsed = true;
3848  }
3849  catch(const XMLException& e) {
3850  this->subSetXmlParsingError = e.view();
3851 
3852  warningsTo.emplace("WARNING: [XML] " + this->subSetXmlParsingError);
3853  }
3854  }
3855 
3856  return this->subSetXmlParsed;
3857  }
3858 
3859  // parse subset as JSON using RapidJSON if still necessary,
3860  // return false if parsing failed, throws Container::Exception
3861  inline bool Container::parseSubSetJsonRapid(std::queue<std::string>& warningsTo) {
3862  // check current subset
3863  if(this->subSetCurrent == 0) {
3864  throw Exception(
3865  "Query::Container::parseSubSetJsonRapid():"
3866  " No subset has been specified"
3867  );
3868  }
3869 
3870  if(this->subSetCurrent > this->subSetNumber) {
3871  throw Exception(
3872  "Query::Container::parseSubSetJsonRapid():"
3873  " Invalid subset has been specified"
3874  );
3875  }
3876 
3877  // if the subset is of type JSONPointer, no further parsing is required
3878  if(this->subSetType == QueryStruct::typeJsonPointer) {
3879  return true;
3880  }
3881 
3882  if(
3883  !(this->subSetJsonParsedRapid)
3884  && this->subSetJsonParsingError.empty()
3885  ) {
3886  // stringify the subsets if still necessary
3887  this->stringifySubSets(warningsTo);
3888 
3889  try {
3890  this->subSetParsedJsonRapid = Helper::Json::parseRapid(
3891  this->stringifiedSubSets.at(this->subSetCurrent - 1)
3892  );
3893 
3894  this->subSetJsonParsedRapid = true;
3895  }
3896  catch(const JsonException& e) {
3897  this->subSetJsonParsingError = e.view();
3898 
3899  warningsTo.emplace("WARNING: [JSON] " + this->subSetJsonParsingError);
3900  }
3901  }
3902 
3903  return this->subSetJsonParsedRapid;
3904  }
3905 
3906  // parse subset as JSON using jsoncons if still necessary,
3907  // return false if parsing failed, throws Container::Exception
3908  inline bool Container::parseSubSetJsonCons(std::queue<std::string>& warningsTo) {
3909  // check current subset
3910  if(this->subSetCurrent == 0) {
3911  throw Exception(
3912  "Query::Container::parseSubSetJsonCons():"
3913  " No subset has been specified"
3914  );
3915  }
3916 
3917  if(this->subSetCurrent > this->subSetNumber) {
3918  throw Exception(
3919  "Query::Container::parseSubSetJsonCons():"
3920  " Invalid subset has been specified"
3921  );
3922  }
3923 
3924  // if the subset is of type JSONPath, no further parsing is required
3925  if(this->subSetType == QueryStruct::typeJsonPath) {
3926  return true;
3927  }
3928 
3929  if(
3930  !(this->subSetJsonParsedCons)
3931  && this->subSetJsonParsingError.empty()
3932  ) {
3933  // stringify the subsets if still necessary
3934  this->stringifySubSets(warningsTo);
3935 
3936  try {
3937  this->subSetParsedJsonCons = Helper::Json::parseCons(
3938  this->stringifiedSubSets.at(this->subSetCurrent - 1)
3939  );
3940 
3941  this->subSetJsonParsedCons = true;
3942  }
3943  catch(const JsonException& e) {
3944  this->subSetJsonParsingError = e.view();
3945 
3946  warningsTo.emplace("WARNING: [JSON] " + this->subSetJsonParsingError);
3947  }
3948  }
3949 
3950  return this->subSetJsonParsedCons;
3951  }
3952 
3953  // reset parsing state for subset
3954  inline void Container::resetSubSetParsingState() {
3955  // unset parsing state
3956  this->subSetXmlParsed = false;
3957  this->subSetJsonParsedRapid = false;
3958  this->subSetJsonParsedCons = false;
3959 
3960  // clear parsing errors
3961  Helper::Memory::free(this->subSetXmlParsingError);
3962  Helper::Memory::free(this->subSetJsonParsingError);
3963 
3964  // clear parsed content
3965  this->subSetParsedXML.clear();
3966 
3967  Helper::Memory::free(this->subSetParsedJsonCons);
3968  Helper::Json::free(this->subSetParsedJsonRapid);
3969  }
3970 
3971  // clear subsets
3972  inline void Container::clearSubSets() {
3973  if(this->minimizeMemory) {
3974  switch(this->subSetType) {
3976  Helper::Memory::free(this->xPathSubSets);
3977 
3978  break;
3979 
3981  Helper::Memory::free(this->jsonPointerSubSets);
3982 
3983  break;
3984 
3986  Helper::Memory::free(this->jsonPathSubSets);
3987 
3988  break;
3989 
3990  default:
3991  break;
3992  }
3993 
3994  Helper::Memory::free(this->stringifiedSubSets);
3995  }
3996  else {
3997  switch(this->subSetType) {
3999  this->xPathSubSets.clear();
4000 
4001  break;
4002 
4004  this->jsonPointerSubSets.clear();
4005 
4006  break;
4007 
4009  this->jsonPathSubSets.clear();
4010 
4011  break;
4012 
4013  default:
4014  break;
4015  }
4016 
4017  this->stringifiedSubSets.clear();
4018  }
4019 
4020  this->subSetType = QueryStruct::typeNone;
4021  this->subSetNumber = 0;
4022  this->subSetCurrent = 0;
4023 
4024  // reset parsing state for subset
4025  this->resetSubSetParsingState();
4026  }
4027 
4028  // stringify subsets if still necessary
4029  inline void Container::stringifySubSets(std::queue<std::string>& warningsTo) {
4030  if(!(this->stringifiedSubSets.empty())) {
4031  return;
4032  }
4033 
4034  switch(this->subSetType) {
4036  for(const auto& subset : this->xPathSubSets) {
4037  std::string subsetString;
4038 
4039  subset.getContent(subsetString);
4040 
4041  this->stringifiedSubSets.emplace_back(subsetString);
4042  }
4043 
4044  break;
4045 
4047  for(const auto& subset : this->jsonPathSubSets) {
4048  this->stringifiedSubSets.emplace_back(
4049  Helper::Json::stringify(subset)
4050  );
4051  }
4052 
4053  break;
4054 
4056  for(const auto& subset : this->jsonPointerSubSets) {
4057  this->stringifiedSubSets.emplace_back(
4058  Helper::Json::stringify(subset)
4059  );
4060  }
4061 
4062  break;
4063 
4065  warningsTo.emplace(
4066  "WARNING: RegEx subsets cannot be stringified."
4067  );
4068 
4069  break;
4070 
4071  case QueryStruct::typeNone:
4072  break;
4073 
4074  default:
4075  warningsTo.emplace(
4076  "WARNING: Unknown subset type"
4077  " in Query::Container::stringifySubSets(...)."
4078  );
4079  }
4080  }
4081 
4082  // insert RegEx subsets after the current subset
4083  // NOTE: the new subsets will be moved away from the vector;
4084  // if the subset type is different, the old subsets need to be already stringified
4085  inline void Container::insertSubSets(std::vector<std::string>& subsets) {
4086  // update number of subsets
4087  this->subSetNumber += subsets.size();
4088 
4089  // insert new subsets
4090  Helper::Container::moveInto(this->stringifiedSubSets, subsets, this->subSetCurrent);
4091 
4092  // clear non-stringified subsets if necessary
4093  switch(this->subSetType) {
4095  if(this->minimizeMemory) {
4096  Helper::Memory::free(this->xPathSubSets);
4097  }
4098  else {
4099  this->xPathSubSets.clear();
4100  }
4101 
4102  break;
4103 
4105  if(this->minimizeMemory) {
4106  Helper::Memory::free(this->jsonPointerSubSets);
4107  }
4108  else {
4109  this->jsonPointerSubSets.clear();
4110  }
4111 
4112  break;
4113 
4115  if(this->minimizeMemory) {
4116  Helper::Memory::free(this->jsonPathSubSets);
4117  }
4118  else {
4119  this->jsonPathSubSets.clear();
4120  }
4121 
4122  break;
4123 
4124  default:
4125  break;
4126  }
4127 
4128  // set the subset type to RegEx (i. e. strings only)
4129  this->subSetType = QueryStruct::typeRegEx;
4130  }
4131 
4132  // insert XPath subsets after the current subset, stringify new subsets if needed
4133  // NOTE: the new subsets will be moved away from the vector;
4134  // if the subset type is different, the old subsets need to be already stringified
4135  inline void Container::insertSubSets(std::vector<Parsing::XML>& subsets) {
4136  // update number of subsets
4137  this->subSetNumber += subsets.size();
4138 
4139  // check subset type
4140  if(this->subSetType == QueryStruct::typeXPath) {
4141  // insert new XPath subsets
4142  Helper::Container::moveInto(this->xPathSubSets, subsets, this->subSetCurrent);
4143 
4144  // stringify new subsets if the others are also stringified
4145  if(!(this->stringifiedSubSets.empty())) {
4146  std::vector<std::string> stringified;
4147 
4148  stringified.reserve(subsets.size());
4149 
4150  for(const auto& subset : subsets) {
4151  std::string subsetString;
4152 
4153  subset.getContent(subsetString);
4154 
4155  stringified.emplace_back(subsetString);
4156  }
4157 
4159  this->stringifiedSubSets,
4160  stringified,
4161  this->subSetCurrent
4162  );
4163  }
4164  }
4165  else {
4166  // stringify new subsets (old ones should already be stringified)
4167  std::vector<std::string> stringified;
4168 
4169  stringified.reserve(subsets.size());
4170 
4171  for(const auto& subset : subsets) {
4172  stringified.emplace_back();
4173 
4174  subset.getContent(stringified.back());
4175  }
4176 
4177  // insert new (stringified) XPath subsets
4179  this->stringifiedSubSets,
4180  stringified,
4181  this->subSetCurrent
4182  );
4183 
4184  // clear non-stringified subsets if neccesary
4185  switch(this->subSetType) {
4187  if(this->minimizeMemory) {
4188  Helper::Memory::free(this->jsonPointerSubSets);
4189  }
4190  else {
4191  this->jsonPointerSubSets.clear();
4192  }
4193 
4194  break;
4195 
4197  if(this->minimizeMemory) {
4198  Helper::Memory::free(this->jsonPathSubSets);
4199  }
4200  else {
4201  this->jsonPathSubSets.clear();
4202  }
4203 
4204  break;
4205 
4206  default:
4207  break;
4208  }
4209 
4210  // set the subset type to RegEx (i. e. strings only)
4211  this->subSetType = QueryStruct::typeRegEx;
4212  }
4213  }
4214 
4215  // insert JSONPath subsets after the current subset, stringify subsets if needed
4216  // NOTE: the new subsets will be moved away from the vector;
4217  // if the subset type is different, the old subsets need to be already stringified
4218  inline void Container::insertSubSets(std::vector<jsoncons::json>& subsets) {
4219  // update number of subsets
4220  this->subSetNumber += subsets.size();
4221 
4222  // check subset type
4223  if(this->subSetType == QueryStruct::typeJsonPath) {
4224  // insert new JSONPath subsets
4225  Helper::Container::moveInto(this->jsonPathSubSets, subsets, this->subSetCurrent);
4226 
4227  // stringify new subsets if the others are also stringified
4228  if(!(this->stringifiedSubSets.empty())) {
4229  std::vector<std::string> stringified;
4230 
4231  stringified.reserve(subsets.size());
4232 
4233  for(const auto& subset : subsets) {
4234  stringified.emplace_back(
4235  Helper::Json::stringify(subset)
4236  );
4237  }
4238 
4240  this->stringifiedSubSets,
4241  stringified,
4242  this->subSetCurrent
4243  );
4244  }
4245  }
4246  else {
4247  // stringify new subsets
4248  std::vector<std::string> stringified;
4249 
4250  stringified.reserve(subsets.size());
4251 
4252  for(const auto& subset : subsets) {
4253  stringified.emplace_back(
4254  Helper::Json::stringify(subset)
4255  );
4256  }
4257 
4258  // insert new (stringified) JSONPath subsets
4260  this->stringifiedSubSets,
4261  stringified,
4262  this->subSetCurrent
4263  );
4264 
4265  // clear non-stringified subsets if neccesary
4266  switch(this->subSetType) {
4268  if(this->minimizeMemory) {
4269  Helper::Memory::free(this->xPathSubSets);
4270  }
4271  else {
4272  this->xPathSubSets.clear();
4273  }
4274 
4275  break;
4276 
4278  if(this->minimizeMemory) {
4279  Helper::Memory::free(this->jsonPointerSubSets);
4280  }
4281  else {
4282  this->jsonPointerSubSets.clear();
4283  }
4284 
4285  break;
4286 
4287  default:
4288  break;
4289  }
4290 
4291  // set the subset type to RegEx (i. e. strings only)
4292  this->subSetType = QueryStruct::typeRegEx;
4293  }
4294  }
4295 
4296  // insert JSONPointer subsets after the current subset, stringify new subsets if needed
4297  // NOTE: the new subsets will be moved away from the vector;
4298  // if the subset type is different, the old subsets need to be already stringified
4299  inline void Container::insertSubSets(std::vector<rapidjson::Document>& subsets) {
4300  // update number of subsets
4301  this->subSetNumber += subsets.size();
4302 
4303  // check subset type
4304  if(this->subSetType == QueryStruct::typeJsonPointer) {
4305  // insert new JSONPointer subsets
4306  Helper::Container::moveInto(this->jsonPointerSubSets, subsets, this->subSetCurrent);
4307 
4308  // stringify new subsets if the others are also stringified
4309  if(!(this->stringifiedSubSets.empty())) {
4310  std::vector<std::string> stringified;
4311 
4312  stringified.reserve(subsets.size());
4313 
4314  for(const auto& subset : subsets) {
4315  stringified.emplace_back(
4316  Helper::Json::stringify(subset)
4317  );
4318  }
4319 
4321  this->stringifiedSubSets,
4322  stringified,
4323  this->subSetCurrent
4324  );
4325  }
4326  }
4327  else {
4328  // stringify new subsets
4329  std::vector<std::string> stringified;
4330 
4331  stringified.reserve(subsets.size());
4332 
4333  for(const auto& subset : subsets) {
4334  stringified.emplace_back(
4335  Helper::Json::stringify(subset)
4336  );
4337  }
4338 
4339  // insert new (stringified) JSONPointer subsets
4340  Helper::Container::moveInto(this->stringifiedSubSets, stringified, this->subSetCurrent);
4341 
4342  // clear non-stringified subsets if neccesary
4343  switch(this->subSetType) {
4345  if(this->minimizeMemory) {
4346  Helper::Memory::free(this->xPathSubSets);
4347  }
4348  else {
4349  this->xPathSubSets.clear();
4350  }
4351 
4352  break;
4353 
4355  if(this->minimizeMemory) {
4356  Helper::Memory::free(this->jsonPathSubSets);
4357  }
4358  else {
4359  this->jsonPathSubSets.clear();
4360  }
4361 
4362  break;
4363 
4364  default:
4365  break;
4366  }
4367 
4368  // set the subset type to RegEx (i. e. strings only)
4369  this->subSetType = QueryStruct::typeRegEx;
4370  }
4371  }
4372 
4373 } /* namespace crawlservpp::Query */
4374 
4375 #endif /* QUERY_CONTAINER_HPP_ */
bool resultSingle
Indicates whether the query generates a single result.
Definition: QueryProperties.hpp:56
static constexpr std::uint8_t typeJsonPointer
Query type identifying a JSONPointer query.
Definition: QueryStruct.hpp:58
Class for XPath exceptions.
Definition: XPath.hpp:104
bool resultBool
Indicates whether the query generates a boolean result.
Definition: QueryStruct.hpp:80
Query properties containing its name, text, type, and result type(s).
Definition: QueryProperties.hpp:39
Class for JSONPointer exceptions.
Definition: JsonPointer.hpp:97
void getContent(std::string &resultTo) const
Gets the stringified content inside the underlying document.
Definition: XML.hpp:300
virtual void initQueries()=0
Pure virtual function initializing queries.
std::string text
The query.
Definition: QueryProperties.hpp:47
Class for query container exceptions.
Definition: Container.hpp:148
bool getBoolFromQueryOnSubSet(const QueryStruct &query, bool &resultTo, std::queue< std::string > &warningsTo)
Gets a boolean result from a query of any type on the current subset.
Definition: Container.hpp:1378
void clearQueries()
Clears all queries currently managed by the container and frees the associated memory.
Definition: Container.hpp:759
bool resultSingle
Indicates whether the query generates a single result.
Definition: QueryStruct.hpp:83
bool getBoolFromRegEx(const QueryStruct &query, const std::string &target, bool &resultTo, std::queue< std::string > &warningsTo) const
Gets a boolean result from a RegEx query on a separate string.
Definition: Container.hpp:889
void setOptions(bool showWarnings, std::uint32_t numOfErrors) noexcept
Sets logging options.
Definition: XML.hpp:330
bool resultMulti
Indicates whether the query generates multiple results.
Definition: QueryStruct.hpp:86
static constexpr std::uint8_t typeJsonPath
Query type identifying a JSONPath query.
Definition: QueryStruct.hpp:61
bool resultSubSets
Indicates whether the query generates subsets as results.
Definition: QueryProperties.hpp:66
Query container.
Definition: Container.hpp:76
static void moveInto(T &to, T &from)
Moves the elements of an iterable container into another iterable container.
Definition: Container.hpp:99
std::uint8_t type
The type of the query (see above).
Definition: QueryStruct.hpp:74
bool getSingleFromQuery(const QueryStruct &query, std::string &resultTo, std::queue< std::string > &warningsTo)
Gets a single result from a query of any type on the current query target.
Definition: Container.hpp:1699
rapidjson::Document parseRapid(std::string_view json)
Parses JSON code using RapidJSON.
Definition: Json.hpp:575
bool getSingleFromQueryOnSubSet(const QueryStruct &query, std::string &resultTo, std::queue< std::string > &warningsTo)
Gets a single result from a query of any type on the current subset.
Definition: Container.hpp:1992
bool getSingleFromRegEx(const QueryStruct &query, const std::string &target, std::string &resultTo, std::queue< std::string > &warningsTo) const
Gets a single result from a RegEx query on a separate string.
Definition: Container.hpp:958
Class for JSONPath exceptions.
Definition: RegEx.hpp:108
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
Implements a JSONPath query using the jsoncons library.
Definition: JsonPath.hpp:56
bool getBoolFromQuery(const QueryStruct &query, bool &resultTo, std::queue< std::string > &warningsTo)
Gets a boolean result from a query of any type on the current query target.
Definition: Container.hpp:1098
Class for JSON exceptions.
Definition: Json.hpp:136
Container & operator=(const Container &)=delete
Deleted copy assignment operator.
Container()=default
Default constructor.
void clear()
Clears the content of the underlying XML document.
Definition: XML.hpp:439
void setQueryTarget(const std::string &content, const std::string &source)
Sets the content to use the managed queries on.
Definition: Container.hpp:453
bool resultSubSets
Indicates whether the query generates subsets as results.
Definition: QueryStruct.hpp:93
bool getXml(std::string &resultTo, std::queue< std::string > &warningsTo)
Parses the current query target as tidied XML and writes it to the given string.
Definition: Container.hpp:521
bool resultMulti
Indicates whether the query generates multiple results.
Definition: QueryProperties.hpp:59
void clearQueryTarget()
Clears the current query target and frees the associated memory.
Definition: Container.hpp:769
static void freeIf(bool isFree, T &target)
Frees memory early by swapping, if necessary.
Definition: Memory.hpp:52
bool textOnly
Indicates whether the query should be considered text-only.
Definition: QueryProperties.hpp:69
bool getTarget(std::string &targetTo)
Gets the current query target, if available, and writes it to the given string.
Definition: Container.hpp:494
static constexpr std::uint8_t typeNone
Unspecified query type.
Definition: QueryStruct.hpp:49
static constexpr std::uint8_t typeRegEx
Query type identifying a RegEx query.
Definition: QueryStruct.hpp:52
static constexpr std::uint8_t typeXPathJsonPointer
Query type identifying a combined XPath and JSONPointer query.
Definition: QueryStruct.hpp:64
void setTidyErrorsAndWarnings(bool warnings, std::uint32_t numOfErrors)
Sets how tidy-html5 reports errors and warnings.
Definition: Container.hpp:428
std::size_t index
The index of the query inside its container.
Definition: QueryStruct.hpp:77
Implements a XPath query using the pugixml library.
Definition: XPath.hpp:74
bool resultBool
Indicates whether the query generates a boolean result.
Definition: QueryProperties.hpp:53
void parse(std::string_view content, bool repairCData, bool repairComments, bool removeXmlInstructions, std::queue< std::string > &warningsTo)
Parses the given HTML markup into the underlying XML document.
Definition: XML.hpp:365
static constexpr std::uint8_t typeXPathJsonPath
Query type identifying a combined XPath and JSONPath query.
Definition: QueryStruct.hpp:67
void reserveForSubSets(const QueryStruct &query, std::size_t n)
Reserves memory for a specific number of subsets.
Definition: Container.hpp:3645
void setRepairComments(bool isRepairComments)
Sets whether to try to repair broken HTML/XML comments.
Definition: Container.hpp:385
void setMinimizeMemory(bool isMinimizeMemory)
Sets whether to minimize memory usage.
Definition: Container.hpp:407
void setRemoveXmlInstructions(bool isRemoveXmlInstructions)
Sets whether to remove XML processing instructions (<?xml:...>) before parsing HTML/XML content...
Definition: Container.hpp:394
Class for XML exceptions.
Definition: XML.hpp:207
std::string_view view() const noexcept
Gets the description of the exception as a view to the underlying string.
Definition: Exception.hpp:158
bool isQueryUsed(std::uint64_t queryId) const
Checks whether the specified query is used by the container.
Definition: Container.hpp:357
bool nextSubSet()
Requests the next subset for all subsequent queries.
Definition: Container.hpp:808
Namespace for classes handling queries.
Definition: XML.hpp:51
std::string type
The type of the query.
Definition: QueryProperties.hpp:50
Structure to identify a query including its type and result type(s).
Definition: QueryStruct.hpp:40
Parses HTML markup into clean XML.
Definition: XML.hpp:149
bool setSubSetsFromQuery(const QueryStruct &query, std::queue< std::string > &warningsTo)
Sets subsets for subsequent queries using a query of any type.
Definition: Container.hpp:2940
Class for JSONPath exceptions.
Definition: JsonPath.hpp:85
bool getMultiFromQueryOnSubSet(const QueryStruct &query, std::vector< std::string > &resultTo, std::queue< std::string > &warningsTo)
Gets multiple results from a query of any type on the current subset.
Definition: Container.hpp:2605
virtual void deleteQueries()=0
bool addSubSetsFromQueryOnSubSet(const QueryStruct &query, std::queue< std::string > &warningsTo)
Inserts more subsets after the current one based on a query on the current subset.
Definition: Container.hpp:3255
Implements an extended JSONPointer query using the rapidJSON library.
Definition: JsonPointer.hpp:68
bool getMultiFromRegEx(const QueryStruct &query, const std::string &target, std::vector< std::string > &resultTo, std::queue< std::string > &warningsTo) const
Gets multiple results from a RegEx query on a separate string.
Definition: Container.hpp:1028
QueryStruct addQuery(std::uint64_t id, const QueryProperties &properties)
Adds a query with the given query properties to the container.
Definition: Container.hpp:565
std::size_t getNumberOfSubSets() const
Gets the number of subsets currently acquired.
Definition: Container.hpp:475
virtual ~Container()=default
Default destructor.
bool getMultiFromQuery(const QueryStruct &query, std::vector< std::string > &resultTo, std::queue< std::string > &warningsTo)
Gets multiple results from a query of any type on the current query target.
Definition: Container.hpp:2325
static void free(T &target)
Frees memory by swapping.
Definition: Memory.hpp:42
jsoncons::json parseCons(std::string_view json)
Parses JSON code using jsoncons.
Definition: Json.hpp:645
static constexpr std::uint8_t typeXPath
Query type identifying a XPath query.
Definition: QueryStruct.hpp:55
static void free(rapidjson::Document &target)
Frees memory by swapping.
Definition: Json.hpp:862
std::string stringify(const std::vector< std::string > &vectorToStringify)
Stringifies a vector of strings into one string containing a JSON array.
Definition: Json.hpp:158
void setRepairCData(bool isRepairCData)
Sets whether to try to repair CData when parsing XML.
Definition: Container.hpp:376