crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
JsonPath.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2020 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * JSONPath.hpp
24  *
25  * Using the jsoncons library to implement a JSONPath query
26  * with boolean, single and/or multiple results.
27  *
28  * Created on: Apr 26, 2019
29  * Author: ans
30  */
31 
32 #ifndef QUERY_JSONPATH_HPP_
33 #define QUERY_JSONPATH_HPP_
34 
35 #include "../Helper/Strings.hpp"
36 #include "../Main/Exception.hpp"
37 
38 #include "../_extern/jsoncons/include/jsoncons/json.hpp"
39 #include "../_extern/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp"
40 
41 #include <string> // std::string
42 #include <vector> // std::vector
43 
44 namespace crawlservpp::Query {
45 
46  /*
47  * DECLARATION
48  */
49 
51 
56  class JsonPath {
57  public:
60 
61  JsonPath(const std::string& pathString, bool textOnlyQuery);
62 
66 
67  [[nodiscard]] bool getBool(const jsoncons::json& json) const;
68  void getFirst(const jsoncons::json& json, std::string& resultTo) const;
69  void getAll(const jsoncons::json& json, std::vector<std::string>& resultTo) const;
70  void getSubSets(const jsoncons::json& json, std::vector<jsoncons::json>& resultTo) const;
71 
73 
75 
86 
87  private:
88  std::string jsonPath;
89  const bool textOnly;
90  };
91 
92  /*
93  * IMPLEMENTATION
94  */
95 
97 
112  inline JsonPath::JsonPath(const std::string& pathString, bool textOnlyQuery)
113  : jsonPath(pathString),
114  textOnly(textOnlyQuery) {
115  Helper::Strings::trim(this->jsonPath);
116 
117  if(this->jsonPath.empty()) {
118  throw Exception("No JSONPath string given");
119  }
120  }
121 
123 
135  inline bool JsonPath::getBool(const jsoncons::json& json) const {
136  // check JSONPath query
137  if(this->jsonPath.empty()) {
138  throw Exception("No JSONPath query defined");
139  }
140 
141  try {
142  // evaluate query with boolean result
143  const auto result{jsoncons::jsonpath::json_query(json, this->jsonPath)};
144 
145  return !(result.is_array() && result.empty());
146  }
147  catch(const jsoncons::json_exception& e) {
148  throw Exception(
149  std::string(e.what())
150  + " (JSONPath: '"
151  + this->jsonPath
152  + "')"
153  );
154  }
155  }
156 
158 
179  inline void JsonPath::getFirst(const jsoncons::json& json, std::string& resultTo) const {
180  // empty target
181  resultTo.clear();
182 
183  // check JSONPath
184  if(this->jsonPath.empty()) {
185  throw Exception("No JSONPath defined");
186  }
187 
188  try {
189  // get result
190  const auto result{jsoncons::jsonpath::json_query(json, this->jsonPath)};
191 
192  // check validity of result
193  if(!result.is_array()) {
194  throw Exception("jsoncons::jsonpath::json_query() did not return an array");
195  }
196 
197  // check whether there are matches
198  if(!result.array_value().empty()) {
199  if(result[0].is_array() && !(this->textOnly)) {
200  // return first array member of first match
201  resultTo = result[0][0].as<std::string>();
202  }
203  else {
204  // return first match only
205  resultTo = result[0].as<std::string>();
206  }
207  }
208  }
209  catch(const jsoncons::json_exception& e) {
210  throw Exception(
211  std::string(e.what())
212  + " (JSONPath: '"
213  + this->jsonPath + "')"
214  );
215  }
216  }
217 
219 
241  inline void JsonPath::getAll(const jsoncons::json& json, std::vector<std::string>& resultTo) const {
242  // empty target
243  resultTo.clear();
244 
245  // check JSONPath
246  if(this->jsonPath.empty()) {
247  throw Exception("No JSONPath defined");
248  }
249 
250  try {
251  // get result
252  const auto result{jsoncons::jsonpath::json_query(json, this->jsonPath)};
253 
254  // check validity of result
255  if(!result.is_array()) {
256  throw Exception("jsoncons::jsonpath::json_query() did not return an array");
257  }
258 
259  // check number of matches
260  switch(result.array_value().size()) {
261  case 0:
262  break;
263 
264  case 1:
265  if(result[0].is_array() && !(this->textOnly)) {
266  // return all array members of first match
267  resultTo.reserve(result[0].array_value().size());
268 
269  for(const auto& element : result[0].array_range()) {
270  resultTo.emplace_back(element.as<std::string>());
271  }
272  }
273  else {
274  resultTo.emplace_back(result[0].as<std::string>());
275  }
276 
277  break;
278 
279  default:
280  // return all matches
281  resultTo.reserve(result.array_value().size());
282 
283  for(const auto& element : result.array_range()) {
284  resultTo.emplace_back(element.as<std::string>());
285  }
286  }
287  }
288  catch(const jsoncons::json_exception& e) {
289  throw Exception(
290  std::string(e.what())
291  + " (JSONPath: '"
292  + this->jsonPath + "')"
293  );
294  }
295  }
296 
298 
323  inline void JsonPath::getSubSets(const jsoncons::json& json, std::vector<jsoncons::json>& resultTo) const {
324  // empty target
325  resultTo.clear();
326 
327  // check JSONPath
328  if(this->jsonPath.empty()) {
329  throw Exception("No JSONPath defined");
330  }
331 
332  try {
333  // get result
334  const auto result{jsoncons::jsonpath::json_query(json, this->jsonPath)};
335 
336  // check validity of result
337  if(!result.is_array()) {
338  throw Exception("jsoncons::jsonpath::json_query() did not return an array");
339  }
340 
341  // check number of matches
342  switch(result.array_value().size()) {
343  case 0:
344  break;
345 
346  case 1:
347  if(result[0].is_array() && !(this->textOnly)) {
348  // return all array members of first match
349  resultTo.reserve(result[0].array_value().size());
350 
351  for(const auto& element : result[0].array_range()) {
352  resultTo.emplace_back(element);
353  }
354  }
355  else {
356  resultTo.emplace_back(result[0]);
357  }
358 
359  break;
360 
361  default:
362  // return all matches
363  resultTo.reserve(result.array_value().size());
364 
365  for(const auto& element : result.array_range()) {
366  resultTo.emplace_back(element);
367  }
368  }
369  }
370  catch(const jsoncons::json_exception& e) {
371  throw Exception(
372  std::string(e.what())
373  + " (JSONPath: '"
374  + this->jsonPath + "')"
375  );
376  }
377  }
378 
379 } /* namespace crawlservpp::Query */
380 
381 #endif /* QUERY_JSONPATH_HPP_ */
void getAll(const jsoncons::json &json, std::vector< std::string > &resultTo) const
Gets all matches from performing the query on a parsed JSON document.
Definition: JsonPath.hpp:241
bool getBool(const jsoncons::json &json) const
Gets a boolean result from performing the query on a parsed JSON document.
Definition: JsonPath.hpp:135
void getFirst(const jsoncons::json &json, std::string &resultTo) const
Gets the first match from performing the query on a parsed JSON document.
Definition: JsonPath.hpp:179
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
Implements a JSONPath query using the jsoncons library.
Definition: JsonPath.hpp:56
void trim(std::string &stringToTrim)
Removes whitespaces around a string.
Definition: Strings.hpp:360
JsonPath(const std::string &pathString, bool textOnlyQuery)
Constructor setting a JSONPath string and whether the result should be text-only. ...
Definition: JsonPath.hpp:112
void getSubSets(const jsoncons::json &json, std::vector< jsoncons::json > &resultTo) const
Gets all matching subsets from performing the query on a parsed JSON document.
Definition: JsonPath.hpp:323
Namespace for classes handling queries.
Definition: XML.hpp:51
Class for JSONPath exceptions.
Definition: JsonPath.hpp:85