crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
JsonPointer.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2020 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * JSONPointer.hpp
24  *
25  * Using the rapidJSON library to implement a JSONPointer query
26  * with boolean, single and/or multiple results.
27  *
28  * NOTE: Different from the standard, multiple results are
29  * supported when using $$ as a placeholder for 0..n,
30  * where n is the number of matches minus 1.
31  *
32  * Created on: Apr 19, 2019
33  * Author: ans
34  */
35 
36 #ifndef QUERY_JSONPOINTER_HPP_
37 #define QUERY_JSONPOINTER_HPP_
38 
39 #include "../Helper/Json.hpp"
40 #include "../Helper/Strings.hpp"
41 #include "../Main/Exception.hpp"
42 
43 #include "../_extern/rapidjson/include/rapidjson/document.h"
44 #include "../_extern/rapidjson/include/rapidjson/pointer.h"
45 
46 #include <cstddef> // std::size_t
47 #include <limits> // std::numeric_limits
48 #include <string> // std::string, std::to_string
49 #include <vector> // std::vector
50 
51 namespace crawlservpp::Query {
52 
53  /*
54  * DECLARATION
55  */
56 
58 
68  class JsonPointer {
69  public:
72 
73  JsonPointer(const std::string& pointerString, bool textOnlyQuery);
74 
78 
79  [[nodiscard]] bool getBool(const rapidjson::Document& doc) const;
80  void getFirst(const rapidjson::Document& doc, std::string& resultTo) const;
81  void getAll(const rapidjson::Document& doc, std::vector<std::string>& resultTo) const;
82  void getSubSets(const rapidjson::Document& doc, std::vector<rapidjson::Document>& resultTo) const;
83 
85 
87 
98 
99  private:
100  rapidjson::Pointer pointerFirst;
101  std::string pointerStringMulti;
102  bool textOnly;
103  };
104 
105  /*
106  * IMPLEMENTATION
107  */
108 
110 
127  inline JsonPointer::JsonPointer(const std::string& pointerString, bool textOnlyQuery) : textOnly(textOnlyQuery) {
128  // copy and trim pointer string
129  std::string string{pointerString};
130 
131  Helper::Strings::trim(string);
132 
133  if(string.empty()) {
134  throw Exception("No JSONPointer string given");
135  }
136 
137  // check whether multiple JSONPointers need to be constructed
138  if(string.find("$$") != std::string::npos) {
139  this->pointerStringMulti = string;
140  }
141 
142  if(this->pointerStringMulti.empty()) {
143  this->pointerFirst = rapidjson::Pointer(string);
144 
145  if(!(this->pointerFirst.IsValid())) {
146  throw JsonPointer::Exception("Invalid JSONPointer '" + string + "'");
147  }
148  }
149  else {
150  Helper::Strings::replaceAll(string, "$$", "0");
151 
152  this->pointerFirst = rapidjson::Pointer(string);
153 
154  if(!(this->pointerFirst.IsValid())) {
155  throw JsonPointer::Exception("Invalid JSONPointer '" + string + "'");
156  }
157  }
158  }
159 
161 
174  inline bool JsonPointer::getBool(const rapidjson::Document& doc) const {
175  // check document and pointer
176  if(doc.HasParseError()) {
177  throw JsonPointer::Exception("JSON parsing error");
178  }
179 
180  if(!(this->pointerFirst.IsValid())) {
181  throw JsonPointer::Exception("Invalid JSONPointer");
182  }
183 
184  // evaluate query with boolean result
185  return this->pointerFirst.Get(doc) != nullptr;
186  }
187 
189 
210  // NOTE: if the match is an array, only the first element will be returned, unless the query is text-only
211  inline void JsonPointer::getFirst(const rapidjson::Document& doc, std::string& resultTo) const {
212  // empty result
213  resultTo.clear();
214 
215  // check document and pointer
216  if(doc.HasParseError()) {
217  throw JsonPointer::Exception("Invalid JSON");
218  }
219 
220  if(!(this->pointerFirst.IsValid())) {
221  throw JsonPointer::Exception("Invalid JSONPointer");
222  }
223 
224  // get result
225  const auto * match{this->pointerFirst.Get(doc)};
226 
227  // check whether match exists
228  if(match != nullptr) {
229  // check type of result
230  if(match->IsString()) {
231  resultTo = std::string(match->GetString(), match->GetStringLength());
232  }
233  else if(match->IsArray() && !(this->textOnly)) {
234  const auto& iterator{match->GetArray().Begin()};
235 
236  if(iterator != nullptr) {
237  if(iterator->IsString()) {
238  resultTo = std::string(iterator->GetString(), iterator->GetStringLength());
239  }
240  else {
241  resultTo = Helper::Json::stringify(*iterator);
242  }
243  }
244  }
245  else {
246  // stringify result
247  resultTo = Helper::Json::stringify(*match);
248  }
249  }
250  }
251 
253 
275  inline void JsonPointer::getAll(const rapidjson::Document& doc, std::vector<std::string>& resultTo) const {
276  // empty result
277  resultTo.clear();
278 
279  // check document and pointer
280  if(doc.HasParseError()) {
281  throw JsonPointer::Exception("Invalid JSON");
282  }
283 
284  if(!(this->pointerFirst.IsValid())) {
285  throw JsonPointer::Exception("Invalid JSONPointer");
286  }
287 
288  // check whether multiple matches are possible
289  if(this->pointerStringMulti.empty()) {
290  // get first match only, because multiple matches are not possible
291  const auto * match{this->pointerFirst.Get(doc)};
292 
293  // check whether match exists
294  if(match != nullptr) {
295  // check for array
296  if(match->IsArray() && !(this->textOnly)) {
297  // reserve memory for array members
298  resultTo.reserve(match->GetArray().Size());
299 
300  // go through all array members
301  for(const auto& member : match->GetArray()) {
302  // check for string
303  if(member.IsString()) {
304  resultTo.emplace_back(member.GetString(), member.GetStringLength());
305  }
306  else {
307  // stringify array member
308  resultTo.emplace_back(Helper::Json::stringify(member));
309  }
310  }
311  }
312  // check for string
313  else if(match->IsString()) {
314  resultTo.emplace_back(match->GetString(), match->GetStringLength());
315  }
316  else {
317  // stringify match
318  resultTo.emplace_back(Helper::Json::stringify(*match));
319  }
320  }
321  }
322  else {
323  // get all matches
324  std::size_t counter{};
325 
326  while(true) {
327  std::string pointerString(this->pointerStringMulti);
328 
329  Helper::Strings::replaceAll(pointerString, "$$", std::to_string(counter));
330 
331  const rapidjson::Pointer pointer{pointerString};
332 
333  if(!(pointer.IsValid())) {
334  throw JsonPointer::Exception("Invalid JSONPointer '" + pointerString + "'");
335  }
336 
337  const auto * match{pointer.Get(doc)};
338 
339  if(match == nullptr) {
340  break;
341  }
342 
343  // check type of result
344  if(match->IsString()) {
345  resultTo.emplace_back(match->GetString(), match->GetStringLength());
346  }
347  else {
348  // stringify result
349  resultTo.emplace_back(Helper::Json::stringify(*match));
350  }
351 
352  ++counter;
353  }
354  }
355  }
356 
358 
382  inline void JsonPointer::getSubSets(const rapidjson::Document& doc, std::vector<rapidjson::Document>& resultTo) const {
383  // empty result
384  resultTo.clear();
385 
386  // check document and pointer
387  if(doc.HasParseError()) {
388  throw JsonPointer::Exception("Invalid JSON");
389  }
390 
391  if(!(this->pointerFirst.IsValid())) {
392  throw JsonPointer::Exception("Invalid JSONPointer");
393  }
394 
395  // check whether multiple matches are possible
396  if(this->pointerStringMulti.empty()) { // get first match only, because multiple matches are not possible
397  // get single match
398  const auto * match{this->pointerFirst.Get(doc)};
399 
400  // check whether match exists
401  if(match != nullptr) {
402  // check whether match is an array (and query is not text-only)
403  if(match->IsArray() && !(this->textOnly)) {
404  // reserve memory for results
405  resultTo.reserve(match->GetArray().Size());
406 
407  // go through all array members
408  for(const auto& member : match->GetArray()) {
409  // create new document for the array member at the end of the results
410  resultTo.emplace_back();
411 
412  // copy the array member to the new document
413  resultTo.back().CopyFrom(member, resultTo.back().GetAllocator());
414  }
415  }
416  else {
417  // create a new document for the match at the end of the results
418  resultTo.emplace_back();
419 
420  // copy the match to the new document
421  resultTo.back().CopyFrom(*match, resultTo.back().GetAllocator());
422  }
423  }
424  }
425  else {
426  // get all matches
427  std::size_t counter{};
428 
429  // loop through all possible matches
430  while(true) {
431  // copy JSONPointer string for placeholder replacement
432  std::string pointerString{this->pointerStringMulti};
433 
434  // replace placeholders with counter value
435  Helper::Strings::replaceAll(pointerString, "$$", std::to_string(counter));
436 
437  // create (and check) JSONPointer
438  const rapidjson::Pointer pointer{pointerString};
439 
440  if(!(pointer.IsValid())) {
442  "Invalid JSONPointer '"
443  + pointerString + "'"
444  );
445  }
446 
447  // get (and check) match
448  const auto * match{pointer.Get(doc)};
449 
450  if(match == nullptr || match->IsNull()) {
451  break;
452  }
453 
454  // create a new document for the match at the end of the results
455  resultTo.emplace_back();
456 
457  // copy the match to the new document
458  resultTo.back().CopyFrom(*match, resultTo.back().GetAllocator());
459 
460  // increment counter
461  ++counter;
462  }
463  }
464  }
465 
466 } /* namespace crawlservpp::Query */
467 
468 #endif /* QUERY_JSONPOINTER_HPP_ */
Class for JSONPointer exceptions.
Definition: JsonPointer.hpp:97
JsonPointer(const std::string &pointerString, bool textOnlyQuery)
Constructor setting a JSONPointer string and whether the result should be text-only.
Definition: JsonPointer.hpp:127
void getAll(const rapidjson::Document &doc, std::vector< std::string > &resultTo) const
Gets all matches from performing the query on a parsed JSON document.
Definition: JsonPointer.hpp:275
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
void trim(std::string &stringToTrim)
Removes whitespaces around a string.
Definition: Strings.hpp:360
void getFirst(const rapidjson::Document &doc, std::string &resultTo) const
Gets the first match from performing the query on a parsed JSON document.
Definition: JsonPointer.hpp:211
void replaceAll(std::string &strInOut, std::string_view needle, std::string_view replacement)
Replaces all occurences within a string with another string.
Definition: Strings.hpp:246
void getSubSets(const rapidjson::Document &doc, std::vector< rapidjson::Document > &resultTo) const
Gets all matching subsets from performing the query on a parsed JSON document.
Definition: JsonPointer.hpp:382
Namespace for classes handling queries.
Definition: XML.hpp:51
Implements an extended JSONPointer query using the rapidJSON library.
Definition: JsonPointer.hpp:68
bool getBool(const rapidjson::Document &doc) const
Gets a boolean result from performing the query on a parsed JSON document.
Definition: JsonPointer.hpp:174
std::string stringify(const std::vector< std::string > &vectorToStringify)
Stringifies a vector of strings into one string containing a JSON array.
Definition: Json.hpp:158