crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Curl.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2021 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * Curl.hpp
24  *
25  * Using the libcurl library to provide networking functionality.
26  * This class is used by both the crawler and the extractor.
27  * NOT THREAD-SAFE! Use multiple instances for multiple threads.
28  *
29  * Created on: Oct 8, 2018
30  * Author: ans
31  */
32 
33 #ifndef NETWORK_CURL_HPP_
34 #define NETWORK_CURL_HPP_
35 
36 #include "Config.hpp"
37 
38 #include "../Data/Compression/Gzip.hpp"
39 #include "../Helper/FileSystem.hpp"
40 #include "../Helper/Utf8.hpp"
41 #include "../Main/Exception.hpp"
42 #include "../Struct/NetworkSettings.hpp"
43 #include "../Wrapper/Curl.hpp"
44 #include "../Wrapper/CurlList.hpp"
45 
46 #ifndef CRAWLSERVPP_TESTING
47 
48 #include "../Helper/Portability/curl.h"
49 
50 #else
51 
52 #include "FakeCurl/FakeCurl.hpp"
53 
54 #endif
55 
56 #include <algorithm> // std::find, std::remove_if, std::transform
57 #include <array> // std::array
58 #include <cctype> // std::isspace, std::tolower
59 #include <chrono> // std::chrono
60 #include <cstddef> // std::size_t
61 #include <cstdint> // std::uint32_t, std::uint64_t
62 #include <exception> // std::exception
63 #include <functional> // std::function
64 #include <limits> // std::numeric_limits
65 #include <queue> // std::queue
66 #include <string> // std::string, std::to_string
67 #include <string_view> // std::string_view
68 #include <thread> // std::this_thread
69 #include <vector> // std::vector
70 
71 namespace crawlservpp::Network {
72 
73  /*
74  * CONSTANTS
75  */
76 
79 
81  inline constexpr auto encodedSpace{"%20"};
82 
84  inline constexpr auto encodedSpaceLength{3};
85 
87  inline constexpr auto checkEveryMilliseconds{100};
88 
90  inline constexpr auto reservedCharacters{";/?:@=&#"};
91 
93  inline constexpr auto versionDoH{0x073E00};
94 
96  inline constexpr auto versionDnsShuffle{0x073C00};
97 
99  inline constexpr auto versionBrotli{0x073900};
100 
102  inline constexpr auto versionZstd{0x074800};
103 
105  inline constexpr auto versionHttp2{0x072100};
106 
108  inline constexpr auto versionHttp2Only{0x073100};
109 
111  inline constexpr auto versionHttp2Tls{0x072F00};
112 
114  inline constexpr auto versionHttp3Only{0x074200};
115 
117  inline constexpr auto versionPreProxy{0x073400};
118 
120  inline constexpr auto versionProxyTlsAuth{0x073400};
121 
123  inline constexpr auto authTypeTlsSrp{"SRP"};
124 
126  inline constexpr auto versionProxySslVerify{0x073400};
127 
129  inline constexpr auto versionTcpFastOpen{0x073100};
130 
132  inline constexpr auto versionHappyEyeballs{0x073B00};
133 
135  inline constexpr auto getPublicIpFrom{"https://myexternalip.com/raw"};
136 
138  inline constexpr std::array getPublicIpErrors{403, 429, 502, 503, 504, 521, 522, 524};
139 
141  inline constexpr auto xTsHeaderName{"X-ts: "};
142 
144  inline constexpr auto xTsHeaderNameLen{6};
145 
147  inline constexpr std::array gzipMagicNumber{0x1f, 0x8b};
148 
150 
151  /*
152  * DECLARATION
153  */
154 
156 
168  class Curl {
169  // for convenience
171 
172  using CurlList = Wrapper::CurlList;
173 
174  using IsRunningCallback = std::function<bool()>;
175 
176  public:
179 
180  Curl(
181  std::string_view cookieDirectory,
182  const NetworkSettings& setNetworkSettings
183  );
184 
186  virtual ~Curl() = default;
187 
191 
192  void setConfigGlobal(
193  const Config& globalConfig,
194  bool limited,
195  std::queue<std::string>& warningsTo
196  );
197  void setConfigCurrent(const Config& currentConfig);
198  void setCookies(const std::string& cookies);
199  void setHeaders(const std::vector<std::string>& customHeaders);
200  void setVerbose(bool isVerbose);
201  void unsetCookies();
202  void unsetHeaders();
203 
207 
208  void getContent(
209  std::string_view url,
210  bool usePost,
211  std::string& contentTo,
212  const std::vector<std::uint32_t>& errors
213  );
214  [[nodiscard]] std::uint32_t getResponseCode() const noexcept;
215  [[nodiscard]] std::string getContentType() const noexcept;
216  [[nodiscard]] CURLcode getCurlCode() const noexcept;
217  [[nodiscard]] std::string getPublicIp();
218 
222 
223  void resetConnection(
224  std::uint64_t sleepForMilliseconds,
225  const IsRunningCallback& isRunningCallback
226  );
227 
231 
232  [[nodiscard]] std::string escape(
233  const std::string& stringToEscape,
234  bool usePlusForSpace
235  );
236  [[nodiscard]] std::string unescape(
237  const std::string& escapedString,
238  bool usePlusForSpace
239  );
240  [[nodiscard]] std::string escapeUrl(std::string_view urlToEscape);
241 
243 
245 
261 
265 
268  Curl(Curl&) = delete;
269 
271  Curl& operator=(Curl&) = delete;
272 
274  Curl(Curl&&) = delete;
275 
277  Curl& operator=(Curl&&) = delete;
278 
280 
281  protected:
284 
285  [[nodiscard]] static std::string curlStringToString(char * curlString);
286 
290 
291  static int header(char * data, std::size_t size, std::size_t nitems, void * thisPtr);
292  int headerInClass(char * data, std::size_t size);
293 
297 
298  static int writer(char * data, std::size_t size, std::size_t nmemb, void * thisPtr);
299  int writerInClass(char * data, std::size_t size);
300 
302 
303  private:
304  const std::string_view cookieDir;
305  CURLcode curlCode{CURLE_OK};
306  std::string content;
307  std::string contentType;
308  std::uint32_t responseCode{};
309  std::uint32_t xTsHeaderValue{};
310  bool limitedSettings{false};
311  bool post{false};
312  std::string tmpCookies;
313  std::string oldCookies;
314  const NetworkSettings networkSettings;
315  int features{};
316  unsigned int version{};
317 
318  // const pointer to network configuration
319  const Network::Config * config{nullptr};
320 
321  // libcurl object
322  Wrapper::Curl curl;
323 
324  // libcurl lists
325  CurlList dnsResolves;
326  CurlList headers;
327  CurlList tmpHeaders;
328  CurlList http200Aliases;
329  CurlList proxyHeaders;
330 
331  // internal helper functions
332  template<typename T> std::enable_if_t<std::is_integral_v<T>>
333  setOption(CURLoption option, T numericValue) {
334  //NOLINTNEXTLINE(google-runtime-int)
335  this->setOption(option, static_cast<long>(numericValue));
336  }
337 
338  void setOption(CURLoption option, const std::string& stringValue);
339  //NOLINTNEXTLINE(google-runtime-int)
340  void setOption(CURLoption option, long longValue);
341  void setOption(CURLoption option, CurlList& list);
342  void setOption(CURLoption option, void * pointer);
343 
344  [[nodiscard]] bool hasFeature(int feature) const noexcept;
345 
346  void checkCode();
347 
348  void clearContent();
349  [[nodiscard]] std::string preparePost(std::string_view url, std::string& postFieldsTo);
350  [[nodiscard]] std::string prepareGet(std::string_view url);
351  void checkResult(const std::array<char, CURL_ERROR_SIZE>& errorBuffer);
352  void checkResponseCode(const std::vector<std::uint32_t>& errors);
353  void retrieveContentType();
354  void processContentType();
355  void checkCompression();
356  void repairEncoding();
357  };
358 
359  /*
360  * IMPLEMENTATION
361  */
362 
363  /*
364  * CONSTRUCTION AND DESTRUCTION
365  */
366 
368 
381  inline Curl::Curl(
382  std::string_view cookieDirectory,
383  const NetworkSettings& setNetworkSettings
384  ) : cookieDir(cookieDirectory),
385  networkSettings(setNetworkSettings) {
386  // check pointer to libcurl instance
387  if(!(this->curl.valid())) {
388  throw Curl::Exception("Could not initialize the libcurl library");
389  }
390 
391  // get libcurl version information
392  const auto * versionInfo = curl_version_info(CURLVERSION_NOW);
393 
394  this->features = versionInfo->features;
395  this->version = versionInfo->version_num;
396 
397  // check for SSL support
398  if(!(this->hasFeature(CURL_VERSION_SSL))) { //NOLINT(hicpp-signed-bitwise)
399  throw Curl::Exception("The libcurl library does not support SSL");
400  }
401 
402  // configure libcurl
403  this->setOption(CURLOPT_NOSIGNAL, 1L);
404 
405  // set header function
406  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
407  this->curlCode = curl_easy_setopt(
408  this->curl.get(),
409  CURLOPT_HEADERFUNCTION,
411  );
412 
413  this->checkCode();
414 
415  // set write function
416  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
417  this->curlCode = curl_easy_setopt(
418  this->curl.get(),
419  CURLOPT_WRITEFUNCTION,
421  );
422 
423  this->checkCode();
424 
425  // set pointers to instance
426  this->setOption(CURLOPT_WRITEDATA, this);
427  this->setOption(CURLOPT_HEADERDATA, this);
428  }
429 
430  /*
431  * SETTERS
432  */
433 
435 
458  const Config& globalConfig,
459  bool limited,
460  std::queue<std::string>& warningsTo
461  ) {
462  // check libcurl handle
463  if(!(this->curl.valid())) {
464  throw Curl::Exception("libcurl has not been initialized");
465  }
466 
467  // set libcurl options
468  this->setOption(CURLOPT_MAXCONNECTS, globalConfig.networkConfig.connectionsMax);
469  this->setOption(
470  CURLOPT_IGNORE_CONTENT_LENGTH,
472  );
473 
474  if(globalConfig.networkConfig.cookies && !limited) {
475  // add cookie directory to cookie files
476  std::string loadCookiesFrom;
477  std::string saveCookiesTo;
478 
479  if(!globalConfig.networkConfig.cookiesLoad.empty()) {
480  loadCookiesFrom.reserve(
481  this->cookieDir.length()
482  + globalConfig.networkConfig.cookiesLoad.length()
483  + 1 // path separator
484  );
485 
486  loadCookiesFrom = this->cookieDir;
487 
488  loadCookiesFrom += Helper::FileSystem::getPathSeparator();
489  loadCookiesFrom += globalConfig.networkConfig.cookiesLoad;
490 
491  // check whether cookie file really is located in cookie directory
492  if(!Helper::FileSystem::contains(this->cookieDir, loadCookiesFrom)) {
493  std::string exceptionString{"Cookie file '"};
494 
495  exceptionString += loadCookiesFrom;
496  exceptionString += "is not in directory '";
497  exceptionString += this->cookieDir;
498  exceptionString += "'";
499 
500  throw Curl::Exception(exceptionString);
501  }
502  }
503 
504  if(!globalConfig.networkConfig.cookiesSave.empty()) {
505  saveCookiesTo.reserve(
506  this->cookieDir.length()
507  + globalConfig.networkConfig.cookiesSave.length()
508  + 1 // path separator
509  );
510 
511  saveCookiesTo = this->cookieDir;
512 
513  saveCookiesTo += Helper::FileSystem::getPathSeparator();
514  saveCookiesTo += globalConfig.networkConfig.cookiesSave;
515 
516  // check whether cookie file really is located in cookie directory
517  if(!Helper::FileSystem::contains(this->cookieDir, saveCookiesTo)) {
518  std::string exceptionString{"Cookie file '"};
519 
520  exceptionString += saveCookiesTo;
521  exceptionString += "is not in directory '";
522  exceptionString += this->cookieDir;
523  exceptionString += "'";
524 
525  throw Curl::Exception(exceptionString);
526  }
527  }
528 
529  this->setOption(CURLOPT_COOKIEFILE, loadCookiesFrom);
530 
531  if(!saveCookiesTo.empty()) {
532  this->setOption(CURLOPT_COOKIEJAR, saveCookiesTo);
533  }
534  }
535 
536  if(!globalConfig.networkConfig.cookiesSession && !limited) {
537  this->setOption(CURLOPT_COOKIESESSION, true);
538  }
539 
540  if(!globalConfig.networkConfig.cookiesSet.empty() && !limited) {
541  this->setCookies(globalConfig.networkConfig.cookiesSet);
542  }
543 
544  this->setOption(CURLOPT_DNS_CACHE_TIMEOUT, globalConfig.networkConfig.dnsCacheTimeOut);
545 
546  if(!globalConfig.networkConfig.dnsDoH.empty()) {
547  if(this->version >= versionDoH) {
548  this->setOption(CURLOPT_DOH_URL, globalConfig.networkConfig.dnsDoH);
549  }
550  else {
551  warningsTo.emplace(
552  "DNS-over-HTTPS currently not supported,"
553  " 'network.dns.doh' ignored."
554  );
555  }
556  }
557 
558  if(!globalConfig.networkConfig.dnsInterface.empty()) {
559  this->setOption(CURLOPT_DNS_INTERFACE, globalConfig.networkConfig.dnsInterface);
560  }
561 
562  if(!globalConfig.networkConfig.dnsResolves.empty()) {
563  this->dnsResolves.append(globalConfig.networkConfig.dnsResolves);
564 
565  this->setOption(CURLOPT_RESOLVE, this->dnsResolves);
566  }
567 
568  if(!globalConfig.networkConfig.dnsServers.empty()) {
569  std::string serverList;
570 
571  for(const auto& dnsServer : globalConfig.networkConfig.dnsServers) {
572  serverList += dnsServer + ",";
573  }
574 
575  serverList.pop_back();
576 
577  this->setOption(CURLOPT_DNS_SERVERS, serverList);
578  }
579 
580  if(this->version >= versionDnsShuffle) {
581  this->setOption(CURLOPT_DNS_SHUFFLE_ADDRESSES, globalConfig.networkConfig.dnsShuffle);
582  }
583  else {
584  if(globalConfig.networkConfig.dnsShuffle) {
585  warningsTo.emplace(
586  "DNS shuffling currently not supported,"
587  " 'network.dns.shuffle' ignored."
588  );
589  }
590  }
591 
592  if(
593  globalConfig.networkConfig.encodingBr
594  || globalConfig.networkConfig.encodingDeflate
595  || globalConfig.networkConfig.encodingGZip
596  || globalConfig.networkConfig.encodingIdentity
597  || globalConfig.networkConfig.encodingZstd
598  ) {
599  std::string encodingList;
600 
601  if(globalConfig.networkConfig.encodingBr) {
602  if(
603  this->version >= versionBrotli
604  && this->hasFeature(CURL_VERSION_BROTLI) //NOLINT(hicpp-signed-bitwise)
605  ) {
606  encodingList += "br,";
607  }
608  else {
609  warningsTo.emplace(
610  "brotli encoding currently not supported,"
611  " 'network.encoding.br' ignored."
612  );
613  }
614  }
615 
616  if(globalConfig.networkConfig.encodingDeflate) {
617  if(this->hasFeature(CURL_VERSION_LIBZ)) { //NOLINT(hicpp-signed-bitwise)
618  encodingList += "deflate,";
619  }
620  else {
621  warningsTo.emplace(
622  "deflate encoding currently not supported"
623  " (the libcurl library misses libz support),"
624  " 'network.encoding.deflate' ignored."
625  );
626  }
627  }
628 
629  if(globalConfig.networkConfig.encodingGZip) {
630  if(this->hasFeature(CURL_VERSION_LIBZ)) { //NOLINT(hicpp-signed-bitwise)
631  encodingList += "gzip,";
632  }
633  else {
634  warningsTo.emplace(
635  "deflate encoding currently not supported"
636  " (the libcurl library misses libz support),"
637  " 'network.encoding.gzip' ignored."
638  );
639  }
640  }
641 
642  if(globalConfig.networkConfig.encodingIdentity) {
643  encodingList += "identity,";
644  }
645 
646  if(globalConfig.networkConfig.encodingZstd) {
647  if(
648  this->version >= versionZstd
649  && this->hasFeature(CURL_VERSION_ZSTD) //NOLINT(hicpp-signed-bitwise)
650  ) {
651  encodingList += "zstd,";
652  }
653  else {
654  warningsTo.emplace(
655  "Zstandard encoding currently not supported,"
656  " 'network.encoding.zstd' ignored."
657  );
658  }
659  }
660 
661  encodingList.pop_back();
662 
663  this->setOption(CURLOPT_ACCEPT_ENCODING, encodingList);
664  }
665  else {
666  this->setOption(CURLOPT_ACCEPT_ENCODING, nullptr);
667  }
668 
669  if(globalConfig.networkConfig.encodingTransfer) {
670  this->setOption(CURLOPT_TRANSFER_ENCODING, globalConfig.networkConfig.encodingTransfer);
671  }
672 
673  if(!globalConfig.networkConfig.headers.empty() && !limited) {
674  this->headers.append(globalConfig.networkConfig.headers);
675 
676  this->setOption(CURLOPT_HTTPHEADER, this->headers);
677  }
678 
679  if(!globalConfig.networkConfig.http200Aliases.empty() && !limited) {
680  this->http200Aliases.append(globalConfig.networkConfig.http200Aliases);
681 
682  this->setOption(CURLOPT_HTTP200ALIASES, this->http200Aliases);
683  }
684 
685  if(!limited) {
686  switch(globalConfig.networkConfig.httpVersion) {
687  case httpVersionAny:
688  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_NONE);
689 
690  break;
691 
692  case httpVersion1:
693  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
694 
695  break;
696 
697  case httpVersion11:
698  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
699 
700  break;
701 
702  case httpVersion2:
703  if(
704  this->version >= versionHttp2
705  && this->hasFeature(CURL_VERSION_HTTP2) //NOLINT(hicpp-signed-bitwise)
706  ) {
707  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
708  }
709  else {
710  warningsTo.emplace(
711  "HTTP/2 currently not supported,"
712  " 'network.http.version' ignored."
713  );
714  }
715 
716  break;
717 
718  case httpVersion2Only:
719  if(
720  this->version >= versionHttp2Only
721  && this->hasFeature(CURL_VERSION_HTTP2) //NOLINT(hicpp-signed-bitwise)
722  ) {
723  this->setOption(
724  CURLOPT_HTTP_VERSION,
726  );
727  }
728  else {
729  warningsTo.emplace(
730  "HTTP/2 ONLY currently not supported,"
731  " 'network.http.version' ignored."
732  );
733  }
734 
735  break;
736 
737  case httpVersion2Tls:
738  if(
739  this->version >= versionHttp2Tls
740  && this->hasFeature(CURL_VERSION_HTTP2) //NOLINT(hicpp-signed-bitwise)
741  ) {
742  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS);
743  }
744  else {
745  warningsTo.emplace(
746  "HTTP/2 OVER TLS ONLY currently not supported,"
747  " 'network.http.version' ignored."
748  );
749  }
750 
751  break;
752 
753  case httpVersion3Only:
754  if(
755  this->version >= versionHttp3Only
756  && this->hasFeature(CURL_VERSION_HTTP3) //NOLINT(hicpp-signed-bitwise)
757  ) {
758  this->setOption(CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_3);
759  }
760  else {
761  warningsTo.emplace(
762  "HTTP/3 ONLY currently not supported,"
763  " 'network.http.version' ignored."
764  );
765  }
766 
767  break;
768 
769  default:
770  warningsTo.emplace(
771  "Enum value for HTTP version not recognized,"
772  " 'network.http.version' ignored."
773  );
774  }
775  }
776 
777  if(!globalConfig.networkConfig.localInterface.empty()) {
778  this->setOption(CURLOPT_INTERFACE, globalConfig.networkConfig.localInterface);
779  }
780 
781  this->setOption(CURLOPT_LOCALPORT, globalConfig.networkConfig.localPort);
782  this->setOption(CURLOPT_LOCALPORTRANGE, globalConfig.networkConfig.localPortRange);
783  this->setOption(CURLOPT_FORBID_REUSE, globalConfig.networkConfig.noReUse);
784 
785  if(globalConfig.networkConfig.proxy.empty()) {
786  if(!(this->networkSettings.defaultProxy.empty())) {
787  // no proxy is given, but default proxy is set: use default proxy
788  this->setOption(CURLOPT_PROXY, this->networkSettings.defaultProxy);
789  }
790  }
791  else {
792  this->setOption(CURLOPT_PROXY, globalConfig.networkConfig.proxy);
793  }
794 
795  if(!globalConfig.networkConfig.proxyAuth.empty()) {
796  this->setOption(CURLOPT_PROXYUSERPWD, globalConfig.networkConfig.proxyAuth);
797  }
798 
799  if(!globalConfig.networkConfig.proxyHeaders.empty()) {
800  this->proxyHeaders.append(globalConfig.networkConfig.proxyHeaders);
801 
802  this->setOption(CURLOPT_PROXYHEADER, this->proxyHeaders);
803  }
804 
805  if(!globalConfig.networkConfig.proxyPre.empty()) {
806  if(this->version >= versionPreProxy) {
807  this->setOption(CURLOPT_PRE_PROXY, globalConfig.networkConfig.proxyPre);
808  }
809  else {
810  warningsTo.emplace(
811  "Pre-Proxy currently not supported,"
812  " ' proxy.pre' ignored."
813  );
814  }
815  }
816 
817  if(
818  !globalConfig.networkConfig.proxyTlsSrpPassword.empty()
819  || !globalConfig.networkConfig.proxyTlsSrpUser.empty()
820  ) {
821  if(
822  this->version >= versionProxyTlsAuth
823  && this->hasFeature(CURL_VERSION_TLSAUTH_SRP) //NOLINT(hicpp-signed-bitwise)
824  ) {
825  this->setOption(CURLOPT_PROXY_TLSAUTH_TYPE, authTypeTlsSrp);
826  this->setOption(
828  globalConfig.networkConfig.proxyTlsSrpUser
829  );
830  this->setOption(
833  );
834  }
835  else {
836  warningsTo.emplace(
837  "Proxy TLS authentication currently not supported,"
838  " 'proxy.tls.srp.user' and 'proxy.tls.srp.password' ignored."
839  );
840  }
841  }
842 
843  this->setOption(CURLOPT_HTTPPROXYTUNNEL, globalConfig.networkConfig.proxyTunnelling);
844  this->setOption(CURLOPT_FOLLOWLOCATION, globalConfig.networkConfig.redirect);
845  this->setOption(CURLOPT_MAXREDIRS, globalConfig.networkConfig.redirectMax);
846 
847  if(
848  globalConfig.networkConfig.redirectPost301
849  && globalConfig.networkConfig.redirectPost302
850  && globalConfig.networkConfig.redirectPost303
851  ) {
852  //NOLINTNEXTLINE(hicpp-signed-bitwise)
853  this->setOption(CURLOPT_POSTREDIR, CURL_REDIR_POST_ALL);
854  }
855  else {
856  //NOLINTNEXTLINE(google-runtime-int)
857  long redirectPost{};
858 
859  if(globalConfig.networkConfig.redirectPost301) {
860  //NOLINTNEXTLINE(hicpp-signed-bitwise)
861  redirectPost |= CURL_REDIR_POST_301;
862  }
863 
864  if(globalConfig.networkConfig.redirectPost302) {
865  //NOLINTNEXTLINE(hicpp-signed-bitwise)
866  redirectPost |= CURL_REDIR_POST_302;
867  }
868 
869  if(globalConfig.networkConfig.redirectPost303) {
870  //NOLINTNEXTLINE(hicpp-signed-bitwise)
871  redirectPost |= CURL_REDIR_POST_303;
872  }
873 
874  this->setOption(CURLOPT_POSTREDIR, redirectPost);
875  }
876 
877  if(!globalConfig.networkConfig.referer.empty() && !limited) {
878  this->setOption(CURLOPT_REFERER, globalConfig.networkConfig.referer);
879  }
880 
881  this->setOption(CURLOPT_AUTOREFERER, globalConfig.networkConfig.refererAutomatic);
882  this->setOption(CURLOPT_MAX_RECV_SPEED_LARGE, globalConfig.networkConfig.speedDownLimit);
883  this->setOption(CURLOPT_LOW_SPEED_LIMIT, globalConfig.networkConfig.speedLowLimit);
884  this->setOption(CURLOPT_LOW_SPEED_TIME, globalConfig.networkConfig.speedLowTime);
885  this->setOption(CURLOPT_MAX_SEND_SPEED_LARGE, globalConfig.networkConfig.speedUpLimit);
886 
887  if(globalConfig.networkConfig.protocol != "http://") {
888  // secure connection: set SSL options
889  this->setOption(CURLOPT_SSL_VERIFYHOST, globalConfig.networkConfig.sslVerifyHost);
890  this->setOption(CURLOPT_SSL_VERIFYPEER, globalConfig.networkConfig.sslVerifyPeer);
891 
892  if(this->version >= versionProxySslVerify) {
893  this->setOption(CURLOPT_PROXY_SSL_VERIFYHOST, globalConfig.networkConfig.sslVerifyProxyHost);
894  this->setOption(CURLOPT_PROXY_SSL_VERIFYPEER, globalConfig.networkConfig.sslVerifyProxyPeer);
895  }
896  else {
897  if(
898  globalConfig.networkConfig.sslVerifyProxyHost
899  || globalConfig.networkConfig.sslVerifyProxyPeer
900  ) {
901  warningsTo.emplace(
902  "SSL verification of proxy host and peer currently not supported,"
903  " 'ssl.verify.proxy.host' and 'ssl.verify.proxy.peer' ignored."
904  );
905  }
906  }
907 
908  this->setOption(CURLOPT_SSL_VERIFYSTATUS, globalConfig.networkConfig.sslVerifyStatus);
909  }
910  else {
911  // INSECURE connection: ignore SSL options
912  this->setOption(CURLOPT_SSL_VERIFYHOST, false);
913  this->setOption(CURLOPT_SSL_VERIFYPEER, false);
914  this->setOption(CURLOPT_SSL_VERIFYSTATUS, false);
915  }
916 
917  if(this->version >= versionTcpFastOpen) {
918  this->setOption(CURLOPT_TCP_FASTOPEN, globalConfig.networkConfig.tcpFastOpen);
919  }
920  else {
921  if(globalConfig.networkConfig.tcpFastOpen) {
922  warningsTo.emplace(
923  "TCP Fast Open currently not supported,"
924  " 'tcp.fast.open' ignored."
925  );
926  }
927  }
928 
929  this->setOption(CURLOPT_TCP_KEEPALIVE, globalConfig.networkConfig.tcpKeepAlive);
930  this->setOption(CURLOPT_TCP_KEEPIDLE, globalConfig.networkConfig.tcpKeepAliveIdle);
931  this->setOption(CURLOPT_TCP_KEEPINTVL, globalConfig.networkConfig.tcpKeepAliveInterval);
932  this->setOption(CURLOPT_TCP_NODELAY, globalConfig.networkConfig.tcpNagle);
933  this->setOption(CURLOPT_CONNECTTIMEOUT, globalConfig.networkConfig.timeOut);
934 
935  if(this->version >= versionHappyEyeballs) {
936  if(globalConfig.networkConfig.timeOutHappyEyeballs > 0) {
937  this->setOption(
940  );
941  }
942  else {
944  }
945  }
946  else if(globalConfig.networkConfig.timeOutHappyEyeballs > 0) {
947  warningsTo.emplace(
948  "Happy Eyeballs Configuration currently not supported,"
949  " 'network.timeout.happyeyeballs' ignored."
950  );
951  }
952 
953  this->setOption(CURLOPT_TIMEOUT, globalConfig.networkConfig.timeOutRequest);
954 
955  if(
956  !globalConfig.networkConfig.tlsSrpPassword.empty()
957  || !globalConfig.networkConfig.tlsSrpUser.empty()
958  ) {
959  this->setOption(CURLOPT_TLSAUTH_TYPE, "SRP");
960  this->setOption(CURLOPT_TLSAUTH_USERNAME, globalConfig.networkConfig.tlsSrpUser);
961  this->setOption(CURLOPT_TLSAUTH_PASSWORD, globalConfig.networkConfig.tlsSrpPassword);
962  }
963 
964  if(!globalConfig.networkConfig.userAgent.empty()) {
965  this->setOption(CURLOPT_USERAGENT, globalConfig.networkConfig.userAgent);
966  }
967 
968  this->setOption(CURLOPT_VERBOSE, globalConfig.networkConfig.verbose);
969 
970  // save configuration
971  this->config = &globalConfig;
972  this->limitedSettings = limited;
973  }
974 
976 
986  inline void Curl::setConfigCurrent(const Config& currentConfig) {
987  // overwrite cookies
988  for(const auto& cookie : currentConfig.networkConfig.cookiesOverwrite) {
989  this->setOption(CURLOPT_COOKIELIST, "Set-Cookie:" + cookie);
990  }
991  }
992 
994 
1021  inline void Curl::setCookies(const std::string& cookies) {
1022  if(cookies.empty()) {
1023  // reset cookies if string is empty
1024  this->setOption(CURLOPT_COOKIE, nullptr);
1025  }
1026  else {
1027  this->setOption(CURLOPT_COOKIE, cookies);
1028  }
1029 
1030  this->oldCookies = this->tmpCookies;
1031  this->tmpCookies = cookies;
1032  }
1033 
1035 
1050  inline void Curl::setHeaders(const std::vector<std::string>& customHeaders) {
1051  // clear old temporary headers if necessary
1052  this->tmpHeaders.clear();
1053 
1054  if(customHeaders.empty()) {
1055  // reset headers if vector is empty
1056  this->setOption(CURLOPT_HTTPHEADER, this->headers);
1057  }
1058  else {
1059  // temporarily combine global and current headers
1060  this->tmpHeaders.append(this->headers);
1061  this->tmpHeaders.append(customHeaders);
1062 
1063  this->setOption(CURLOPT_HTTPHEADER, this->tmpHeaders);
1064  }
1065  }
1066 
1068 
1082  inline void Curl::setVerbose(bool isVerbose) {
1083  this->setOption(CURLOPT_VERBOSE, isVerbose);
1084  }
1085 
1087 
1097  inline void Curl::unsetCookies() {
1098  if(this->oldCookies.empty()) {
1099  this->setOption(CURLOPT_COOKIE, nullptr);
1100 
1101  this->tmpCookies.clear();
1102  }
1103  else {
1104  this->setOption(CURLOPT_COOKIE, this->oldCookies);
1105 
1106  this->tmpCookies = this->oldCookies;
1107 
1108  this->oldCookies.clear();
1109  }
1110  }
1111 
1113 
1120  inline void Curl::unsetHeaders() {
1121  // clear temporary headers if necessary
1122  this->tmpHeaders.clear();
1123 
1124  // reset headers
1125  this->setOption(CURLOPT_HTTPHEADER, this->headers);
1126  }
1127 
1128  /*
1129  * GETTERS
1130  */
1131 
1133 
1171  inline void Curl::getContent(
1172  std::string_view url,
1173  bool usePost,
1174  std::string& contentTo,
1175  const std::vector<std::uint32_t>& errors
1176  ) {
1177  this->clearContent();
1178 
1179  /*
1180  * NOTE: The following string needs to be available until after the request,
1181  * because libcurl does not create its own copy of it!
1182  */
1183  std::string postFields;
1184 
1185  const auto escapedUrl{
1186  usePost ? this->preparePost(url, postFields) : this->prepareGet(url)
1187  };
1188 
1189  // set URL
1190  this->setOption(CURLOPT_URL, escapedUrl);
1191 
1192  // set error buffer
1193  std::array<char, CURL_ERROR_SIZE> errorBuffer{};
1194 
1195  errorBuffer.at(0) = 0;
1196 
1197  this->setOption(CURLOPT_ERRORBUFFER, errorBuffer.data());
1198 
1199  // perform request
1200  try {
1201  this->curlCode = curl_easy_perform(this->curl.get());
1202  }
1203  catch(const std::exception& e) { /* handle exception */
1204  // reset error buffer
1205  this->setOption(CURLOPT_ERRORBUFFER, nullptr);
1206 
1207  throw Curl::Exception(e.what());
1208  }
1209 
1210  // process reply
1211  this->checkResult(errorBuffer);
1212  this->checkResponseCode(errors);
1213  this->retrieveContentType();
1214  this->processContentType();
1215  this->checkCompression();
1216  this->repairEncoding();
1217 
1218  contentTo.swap(this->content);
1219  }
1220 
1222 
1225  inline std::uint32_t Curl::getResponseCode() const noexcept {
1226  return this->responseCode;
1227  }
1228 
1230 
1234  inline std::string Curl::getContentType() const noexcept {
1235  return this->contentType;
1236  }
1237 
1239 
1247  inline CURLcode Curl::getCurlCode() const noexcept {
1248  return this->curlCode;
1249  }
1250 
1252 
1260  inline std::string Curl::getPublicIp() {
1261  std::string ip;
1262 
1263  try {
1264  this->getContent(
1266  false,
1267  ip,
1268  std::vector<std::uint32_t>(
1269  getPublicIpErrors.cbegin(),
1270  getPublicIpErrors.cend()
1271  )
1272  );
1273  }
1274  catch(const Curl::Exception& e) {
1275  return "N/A (" + std::string(e.view()) + ")";
1276  }
1277 
1278  if(ip.empty()) {
1279  return "N/A";
1280  }
1281 
1282  return ip;
1283  }
1284 
1285  /*
1286  * RESET
1287  */
1288 
1290 
1313  std::uint64_t sleepForMilliseconds,
1314  const IsRunningCallback& isRunningCallback
1315  ) {
1316  // cleanup lists
1317  this->dnsResolves.clear();
1318  this->headers.clear();
1319  this->tmpHeaders.clear();
1320  this->http200Aliases.clear();
1321  this->proxyHeaders.clear();
1322 
1323  // cleanup libcurl
1324  this->curl.clear();
1325 
1326  // sleep
1327  const auto sleepTill{
1329  + std::chrono::milliseconds(sleepForMilliseconds)
1330  };
1331 
1332  while(
1333  isRunningCallback()
1334  && std::chrono::steady_clock::now() < sleepTill
1335  ) {
1336  std::this_thread::sleep_for(std::chrono::milliseconds(checkEveryMilliseconds));
1337  }
1338 
1339  // re-initialize libcurl
1340  this->curl.init();
1341 
1342  // configure libcurl (global defaults)
1343  this->setOption(CURLOPT_NOSIGNAL, true);
1344 
1345  // set header function
1346  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1347  this->curlCode = curl_easy_setopt(
1348  this->curl.get(),
1349  CURLOPT_HEADERFUNCTION,
1350  Curl::header
1351  );
1352 
1353  this->checkCode();
1354 
1355  // set write function
1356  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1357  this->curlCode = curl_easy_setopt(
1358  this->curl.get(),
1359  CURLOPT_WRITEFUNCTION,
1360  Curl::writer
1361  );
1362 
1363  this->checkCode();
1364 
1365  // set pointers to instance
1366  this->setOption(CURLOPT_WRITEDATA, this);
1367  this->setOption(CURLOPT_HEADERDATA, this);
1368 
1369  // set configuration
1370  if(this->config != nullptr) {
1371  std::queue<std::string> logDump; // do not log warnings again
1372 
1373  this->setConfigGlobal(
1374  *(this->config),
1375  this->limitedSettings,
1376  logDump
1377  );
1378  }
1379  }
1380 
1381  /*
1382  * URL ENCODING
1383  */
1384 
1386 
1406  inline std::string Curl::escape(const std::string& stringToEscape, bool usePlusForSpace) {
1407  if(stringToEscape.empty()) {
1408  return "";
1409  }
1410 
1411  if(!(this->curl.valid())) {
1412  throw Curl::Exception(
1413  "Curl::escape():"
1414  " libcurl library has not been initialized"
1415  );
1416  }
1417 
1418  std::string result(
1420  curl_easy_escape(
1421  this->curl.get(),
1422  stringToEscape.c_str(),
1423  stringToEscape.length()
1424  )
1425  )
1426  );
1427 
1428  if(usePlusForSpace) {
1429  std::size_t pos{};
1430 
1431  while(true) {
1432  pos = result.find(encodedSpace, pos);
1433 
1434  if(pos < result.length()) {
1435  result = result.substr(0, pos)
1436  + '+' + result.substr(pos + encodedSpaceLength);
1437 
1438  ++pos;
1439  }
1440  else {
1441  break;
1442  }
1443  }
1444  }
1445  return result;
1446  }
1447 
1449 
1467  inline std::string Curl::unescape(const std::string& escapedString, bool usePlusForSpace) {
1468  if(escapedString.empty()) {
1469  return "";
1470  }
1471 
1472  if(!(this->curl.valid())) {
1473  throw Curl::Exception(
1474  "Curl::unescape():"
1475  " libcurl library had not been initialized"
1476  );
1477  }
1478 
1479  std::string result(
1481  curl_easy_unescape(
1482  this->curl.get(),
1483  escapedString.c_str(),
1484  escapedString.length(),
1485  nullptr
1486  )
1487  )
1488  );
1489 
1490  if(usePlusForSpace) {
1491  std::size_t pos{};
1492 
1493  while(true) {
1494  pos = result.find('+', pos);
1495 
1496  if(pos < result.length()) {
1497  result.replace(pos, 1, " ");
1498 
1499  ++pos;
1500  }
1501  else {
1502  break;
1503  }
1504  }
1505  }
1506 
1507  return result;
1508  }
1509 
1511 
1533  inline std::string Curl::escapeUrl(std::string_view urlToEscape) {
1534  if(
1535  urlToEscape.find_first_not_of(
1537  ) == std::string::npos
1538  ) {
1539  return "";
1540  }
1541 
1542  if(!(this->curl.valid())) {
1543  throw Curl::Exception(
1544  "Curl::unescape():"
1545  " libcurl library has not been initialized"
1546  );
1547  }
1548 
1549  std::size_t pos{};
1550  std::string result;
1551 
1552  while(pos < urlToEscape.length()) {
1553  auto end{urlToEscape.find_first_of(reservedCharacters, pos)};
1554 
1555  if(end == std::string::npos) {
1556  end = urlToEscape.length();
1557  }
1558  if(end - pos > 0) {
1559  const std::string part(urlToEscape, pos, end - pos);
1560 
1561  result += Curl::curlStringToString(
1562  curl_easy_escape(
1563  this->curl.get(),
1564  part.c_str(),
1565  part.length()
1566  )
1567  );
1568  }
1569 
1570  if(end < urlToEscape.length()) {
1571  result += urlToEscape.at(end);
1572  }
1573 
1574  pos = end + 1;
1575  }
1576 
1577  return result;
1578  }
1579 
1580  /*
1581  * HELPER (protected)
1582  */
1583 
1585 
1603  inline std::string Curl::curlStringToString(char * curlString) {
1604  if(curlString != nullptr) {
1605  std::string newString{curlString};
1606 
1607  curl_free(curlString);
1608 
1609  return newString;
1610  }
1611 
1612  return std::string();
1613  }
1614 
1615  /*
1616  * HEADER HANDLING (protected)
1617  */
1618 
1620 
1631  inline int Curl::header(char * data, std::size_t size, std::size_t nitems, void * thisPtr) {
1632  if(thisPtr == nullptr) {
1633  return 0;
1634  }
1635 
1636  return static_cast<Curl *>(thisPtr)->headerInClass(data, size * nitems);
1637  }
1638 
1640 
1648  inline int Curl::headerInClass(char * data, std::size_t size) {
1649  if(size > xTsHeaderNameLen) {
1650  bool found{true};
1651 
1652  for(std::size_t n{}; n < xTsHeaderNameLen; ++n) {
1653  if(data[n] != xTsHeaderName[n]) { //NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1654  found = false;
1655 
1656  break;
1657  }
1658  }
1659 
1660  if(found) {
1661  std::stringstream stringStream;
1662 
1663  for(std::size_t n{xTsHeaderNameLen}; n < size; ++n) {
1664  stringStream << data[n]; //NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1665  }
1666 
1667  stringStream >> this->xTsHeaderValue;
1668  }
1669  }
1670 
1671  return static_cast<int>(size);
1672  }
1673 
1674  /*
1675  * WRITERS (protected)
1676  */
1677 
1679 
1690  inline int Curl::writer(char * data, std::size_t size, std::size_t nmemb, void * thisPtr) {
1691  if(thisPtr == nullptr) {
1692  return 0;
1693  }
1694 
1695  return static_cast<Curl *>(thisPtr)->writerInClass(data, size * nmemb);
1696  }
1697 
1699 
1707  inline int Curl::writerInClass(char * data, std::size_t size) {
1708  this->content.append(data, size);
1709 
1710  return static_cast<int>(size);
1711  }
1712 
1713  /*
1714  * INTERNAL HELPER FUNCTIONS (private)
1715  */
1716 
1717  // set a libcurl option to a string value
1718  inline void Curl::setOption(CURLoption option, const std::string& stringValue) {
1719  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1720  this->curlCode = curl_easy_setopt(
1721  this->curl.get(),
1722  option,
1723  stringValue.c_str()
1724  );
1725 
1726  this->checkCode();
1727  }
1728 
1729  // set a libcurl option to a numeric value
1730  //NOLINTNEXTLINE(google-runtime-int)
1731  inline void Curl::setOption(CURLoption option, long longValue) {
1732  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1733  this->curlCode = curl_easy_setopt(
1734  this->curl.get(),
1735  option,
1736  longValue
1737  );
1738 
1739  this->checkCode();
1740  }
1741 
1742  // set a libcurl option to a libcurl list
1743  inline void Curl::setOption(CURLoption option, CurlList& list) {
1744  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1745  this->curlCode = curl_easy_setopt(
1746  this->curl.get(),
1747  option,
1748  list.get()
1749  );
1750 
1751  this->checkCode();
1752  }
1753 
1754  // set a libcurl option to a pointer
1755  inline void Curl::setOption(CURLoption option, void * pointer) {
1756  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1757  this->curlCode = curl_easy_setopt(
1758  this->curl.get(),
1759  option,
1760  pointer
1761  );
1762 
1763  this->checkCode();
1764  }
1765 
1766  // check whether a libcurl feature is supported
1767  inline bool Curl::hasFeature(int feature) const noexcept {
1768  return (this->features & feature) == feature; //NOLINT(hicpp-signed-bitwise)
1769  }
1770 
1771  // check the return code of ANY libcurl function call
1772  inline void Curl::checkCode() {
1773  if(this->curlCode != CURLE_OK) {
1774  throw Curl::Exception(curl_easy_strerror(this->curlCode));
1775  }
1776  }
1777 
1778  // clear previously received content, including type, response code, and 'X-ts' header value
1779  inline void Curl::clearContent() {
1780  this->content.clear();
1781  this->contentType.clear();
1782 
1783  this->responseCode = 0;
1784  this->xTsHeaderValue = 0;
1785  }
1786 
1787  // prepare POST request and fill POST fields, if necessary, returns escaped URL
1788  inline std::string Curl::preparePost(std::string_view url, std::string& postFieldsTo) {
1789  const auto delim{url.find('?')};
1790  const bool noFields{delim == std::string::npos};
1791 
1792  if(noFields) {
1793  // set POST method if necessary
1794  if(!(this->post)) {
1795  this->setOption(CURLOPT_POST, true);
1796 
1797  this->post = true;
1798  }
1799 
1800  // set POST field size to zero
1801  this->setOption(CURLOPT_POSTFIELDSIZE, 0);
1802 
1803  // escape whole URL
1804  return this->escapeUrl(url);
1805  }
1806 
1807  // split POST data from URL (and escape the latter)
1808  postFieldsTo = url.substr(delim + 1);
1809 
1810  // set POST data and its size
1811  this->setOption(CURLOPT_POSTFIELDSIZE, postFieldsTo.size());
1812  this->setOption(CURLOPT_POSTFIELDS, postFieldsTo);
1813 
1814  this->post = true;
1815 
1816  return this->escapeUrl(url.substr(0, delim));
1817  }
1818 
1819  // prepare GET request, returns escaped URL
1820  inline std::string Curl::prepareGet(std::string_view url) {
1821  if(this->post) {
1822  // unset POST method
1823  this->setOption(CURLOPT_POST, false);
1824 
1825  this->post = false;
1826  }
1827 
1828  // escape whole URL
1829  return this->escapeUrl(url);
1830  }
1831 
1832  // check the result of performing a HTTP request, throws Curl::Exception
1833  inline void Curl::checkResult(const std::array<char, CURL_ERROR_SIZE>& errorBuffer) {
1834  // store result
1835  const auto result{this->curlCode};
1836 
1837  if(result == CURLE_OK) {
1838  // reset error buffer
1839  this->setOption(CURLOPT_ERRORBUFFER, nullptr);
1840 
1841  return;
1842  }
1843 
1844  std::string curlError{curl_easy_strerror(result)};
1845 
1846  if(errorBuffer.at(0) > 0) {
1847  curlError += " [";
1848  curlError += errorBuffer.data();
1849  curlError += "]";
1850  }
1851 
1852  // reset error buffer
1853  this->setOption(CURLOPT_ERRORBUFFER, nullptr);
1854 
1855  // restore result (might have been changed by call to setOption())
1856  this->curlCode = result;
1857 
1858  throw Curl::Exception(curlError);
1859  }
1860 
1861  // check the response code after performing a HTTP request
1862  inline void Curl::checkResponseCode(const std::vector<std::uint32_t>& errors) {
1863  // get response code
1864  //NOLINTNEXTLINE(google-runtime-int)
1865  long responseCodeL{};
1866 
1867  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1868  this->curlCode = curl_easy_getinfo(
1869  this->curl.get(),
1870  CURLINFO_RESPONSE_CODE,
1871  &responseCodeL
1872  );
1873 
1874  this->checkCode();
1875 
1876  if(
1877  responseCodeL < 0
1878  || responseCodeL > std::numeric_limits<std::uint32_t>::max()
1879  ) {
1880  throw Curl::Exception("Invalid HTTP response code");
1881  }
1882 
1883  this->responseCode = static_cast<std::uint32_t>(responseCodeL);
1884 
1885  // check response code for errors that should throw an exception
1886  if(
1887  std::find(
1888  errors.cbegin(),
1889  errors.cend(),
1890  this->responseCode
1891  ) != errors.cend()
1892  && (
1893  this->xTsHeaderValue == 0
1894  || this->responseCode != this->xTsHeaderValue
1895  )
1896  ) {
1897  std::string exceptionString{"HTTP error "};
1898 
1899  exceptionString += std::to_string(this->responseCode);
1900 
1901  throw Curl::Exception(exceptionString);
1902  }
1903  }
1904 
1905  // get the content type after performing a HTTP request
1906  inline void Curl::retrieveContentType() {
1907  // get content type
1908  char * cString{nullptr};
1909 
1910  //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg, hicpp-vararg)
1911  this->curlCode = curl_easy_getinfo(
1912  this->curl.get(),
1913  CURLINFO_CONTENT_TYPE,
1914  &cString
1915  );
1916 
1917  this->checkCode();
1918 
1919  if(cString == nullptr) {
1920  this->contentType = std::string{};
1921 
1922  return;
1923  }
1924  this->contentType = cString;
1925  }
1926 
1927  // transform content type to lower case and remove spaces
1928  inline void Curl::processContentType() {
1929  std::transform(
1930  this->contentType.begin(),
1931  this->contentType.end(),
1932  this->contentType.begin(),
1933  [](const auto c) {
1934  return std::tolower(c);
1935  }
1936  );
1937 
1938  this->contentType.erase(
1939  std::remove_if(
1940  this->contentType.begin(),
1941  this->contentType.end(),
1942  [](const auto c) {
1943  return std::isspace(c);
1944  }
1945  ),
1946  this->contentType.end()
1947  );
1948  }
1949 
1950  // check for gzipped content that has not been detected by curl
1951  inline void Curl::checkCompression() {
1952  // check for gzipped content that curl could not decompress
1953  if(
1954  this->content.size() >= gzipMagicNumber.size()
1955  && this->contentType.find("gzip") != std::string::npos
1956  ) {
1957  for(std::size_t byte{}; byte < gzipMagicNumber.size(); ++byte) {
1958  if(static_cast<unsigned char>(this->content[byte]) != gzipMagicNumber[byte]) {
1959  return;
1960  }
1961  }
1962 
1963  this->content = Data::Compression::Gzip::decompress(this->content);
1964  }
1965  }
1966 
1967  // perform character encoding operations
1968  // (convert ISO-8859-1 to UTF-8, remove invalid UTF-8 characters)
1969  inline void Curl::repairEncoding() {
1970  std::string repairedContent;
1971 
1972  if(this->contentType.find("charset=iso-8859-1") != std::string::npos) {
1973  this->content = Helper::Utf8::iso88591ToUtf8(this->content);
1974  }
1975 
1976  if(Helper::Utf8::repairUtf8(this->content, repairedContent)) {
1977  this->content.swap(repairedContent);
1978  }
1979  }
1980 
1981 } /* namespace crawlservpp::Network */
1982 
1983 #endif /* NETWORK_CURL_HPP_ */
constexpr auto getPublicIpFrom
URL to retrieve the IP of the server from.
Definition: Curl.hpp:135
curl_slist * get() noexcept
Gets a pointer to the underlying list.
Definition: CurlList.hpp:146
#define CURL_VERSION_BROTLI
Definition: curl.h:113
bool encodingIdentity
Specifies whether to (also) request non-compressed encoding for requested content.
Definition: Config.hpp:283
void clear() noexcept
Clears the underlying libcurl handle.
Definition: Curl.hpp:255
std::vector< std::string > headers
Custom HTTP headers to be sent with every request.
Definition: Config.hpp:301
std::vector< std::string > http200Aliases
Aliases that will be treated like HTTP/1.0 200 OK.
Definition: Config.hpp:304
bool cookiesSession
Specifies whether to ignore obsolete session cookies.
Definition: Config.hpp:187
constexpr std::uint16_t httpVersion2
Attempt to use HTTP/2, fall back to HTTP/1.1.
Definition: Config.hpp:66
constexpr auto versionDnsShuffle
libcurl version needed for DNS shuffling, i.e. 7.60.0.
Definition: Curl.hpp:96
constexpr auto versionZstd
libcurl version needed for zstd encoding, i.e. 7.72.0.
Definition: Curl.hpp:102
std::uint64_t speedUpLimit
Maximum upload speed in bytes per second.
Definition: Config.hpp:499
#define CURLOPT_PRE_PROXY
Definition: curl.h:72
#define CURLOPT_DOH_URL
Definition: curl.h:85
std::string proxyAuth
Authentification for the proxy server used.
Definition: Config.hpp:367
bool sslVerifyProxyHost
Specifies whether to verify that the SSL certificate is for the proxy server it is known as...
Definition: Config.hpp:520
std::string tlsSrpPassword
User name used for TLS-SRP authentification.
Definition: Config.hpp:615
std::uint16_t httpVersion
HTTP version(s) to be used.
Definition: Config.hpp:313
std::string protocol
The protocol to be used for HTTP requests.
Definition: Config.hpp:637
#define CURL_VERSION_SSL
Definition: curl.h:97
#define CURLOPT_PROXY_TLSAUTH_PASSWORD
Definition: curl.h:70
constexpr auto xTsHeaderNameLen
Length of the X-ts header name, in bytes.
Definition: Curl.hpp:144
std::string proxy
Proxy server used.
Definition: Config.hpp:356
std::uint64_t speedDownLimit
Maximum download speed in bytes per second.
Definition: Config.hpp:476
constexpr auto versionHttp3Only
libcurl version needed for HTTP/3 ONLY support, i.e. 7.66.0.
Definition: Curl.hpp:114
std::int64_t dnsCacheTimeOut
The lifetime of DNS cache entries.
Definition: Config.hpp:203
constexpr std::uint16_t httpVersion3Only
Use HTTP/3 only.
Definition: Config.hpp:78
void setConfigGlobal(const Config &globalConfig, bool limited, std::queue< std::string > &warningsTo)
Sets the network options for the connection according to the given configuration. ...
Definition: Curl.hpp:457
#define CURL_VERSION_LIBZ
Definition: curl.h:101
constexpr std::uint16_t httpVersion2Tls
Attempt to use HTTP/2 over TLS, fall back to HTTP/1.1.
Definition: Config.hpp:72
CURLcode getCurlCode() const noexcept
Gets the libcurl return code received from the last API call.
Definition: Curl.hpp:1247
std::string localInterface
Interface to be used for outgoing traffic.
Definition: Config.hpp:319
bool dnsShuffle
Specifies whether to shuffle addresses when a host name returns more than one.
Definition: Config.hpp:235
RAII wrapper for lists used by the libcurl API.
Definition: CurlList.hpp:75
bool contains(std::string_view pathToDir, std::string_view pathToCheck)
Checks whether the given path is located inside the given directory.
Definition: FileSystem.hpp:270
std::string dnsDoH
The URL of a custom DNS-over-HTTPS (DoH) server.
Definition: Config.hpp:209
#define CURL_HTTP_VERSION_3
Definition: curl.h:89
static int writer(char *data, std::size_t size, std::size_t nmemb, void *thisPtr)
Static writer function to handle incoming network data.
Definition: Curl.hpp:1690
#define CURL_VERSION_ZSTD
Definition: curl.h:121
std::uint16_t localPort
Port to be used for outgoing traffic.
Definition: Config.hpp:327
Network settings containing the default proxy as well as host, port, and password of the TOR control ...
Definition: NetworkSettings.hpp:49
struct crawlservpp::Network::Config::Entries networkConfig
Configuration for networking.
void setVerbose(bool isVerbose)
Forces libcurl into or out of verbose mode.
Definition: Curl.hpp:1082
std::string tlsSrpUser
Password used for TLS-SRP authentification.
Definition: Config.hpp:606
constexpr auto checkEveryMilliseconds
The number of milliseconds to sleep before re-checking the status of the application.
Definition: Curl.hpp:87
std::string referer
The HTTP Referer header to be set.
Definition: Config.hpp:440
void unsetHeaders()
Unsets custom HTTP headers previously set.
Definition: Curl.hpp:1120
constexpr auto encodedSpaceLength
Length of a URL encoded space.
Definition: Curl.hpp:84
#define CURLOPT_PROXY_SSL_VERIFYPEER
Definition: curl.h:67
void append(const CurlList &other)
Appends another list to the list.
Definition: CurlList.hpp:230
#define CURL_HET_DEFAULT
Definition: curl.h:77
#define CURL_VERSION_TLSAUTH_SRP
Definition: curl.h:105
Abstract class containing the network-specific configuration for threads.
Definition: Config.hpp:121
bool contentLengthIgnore
Specifies whether the Content-Length header in HTTP responses will be ignored.
Definition: Config.hpp:140
void init()
Initializes the underlying libcurl handle.
Definition: Curl.hpp:230
bool encodingZstd
Specifies whether to request Zstandard encoding for requested content.
Definition: Config.hpp:298
constexpr auto xTsHeaderName
Name of the X-ts header.
Definition: Curl.hpp:141
bool tcpNagle
Specifies whether the TCP&#39;s Nagle algorithm is enabled on this connection.
Definition: Config.hpp:568
constexpr auto authTypeTlsSrp
libcurl authentification type for the proxy TLS-SRP authentification.
Definition: Curl.hpp:123
bool sslVerifyHost
Specifies whether to verify that the SSL certificate is for the server it is known as...
Definition: Config.hpp:506
bool sslVerifyPeer
Specifies whether to verify the authenticity of the server&#39;s SSL certificate.
Definition: Config.hpp:513
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
constexpr auto versionProxySslVerify
libcurl version needed for SSL verification of proxy host and peer, i.e. 7.52.0.
Definition: Curl.hpp:126
bool redirect
Specifies whether to follow HTTP Location headers for automatic redirects.
Definition: Config.hpp:405
std::string iso88591ToUtf8(std::string_view strIn)
Converts a string from ISO-8859-1 to UTF-8.
Definition: Utf8.hpp:139
constexpr std::uint16_t httpVersion2Only
Use non-TLS HTTP/2, even if HTTPS is not available.
Definition: Config.hpp:69
std::uint64_t speedLowLimit
Low speed limit in bytes per second.
Definition: Config.hpp:484
std::string proxyPre
Pre-proxy server to be used.
Definition: Config.hpp:379
bool valid() const noexcept
Checks whether the underlying libcurl handle is valid.
Definition: Curl.hpp:214
bool tcpFastOpen
Specifies whether TCP Fast Open will be enabled.
Definition: Config.hpp:545
std::uint16_t localPortRange
Number of ports to be tried for outgoing traffic.
Definition: Config.hpp:340
std::uint64_t timeOutRequest
The maximum amount of time a request is allowed to take, in seconds.
Definition: Config.hpp:597
#define CURL_HTTP_VERSION_2TLS
Definition: curl.h:58
int writerInClass(char *data, std::size_t size)
In-class writer function to handle incoming network data.
Definition: Curl.hpp:1707
int headerInClass(char *data, std::size_t size)
In-class header function to handle incoming header data.
Definition: Curl.hpp:1648
constexpr auto versionHttp2
libcurl version needed for HTTP/2 support, i.e. 7.33.0.
Definition: Curl.hpp:105
constexpr std::uint16_t httpVersion11
Use HTTP/1.1 only.
Definition: Config.hpp:63
std::string escapeUrl(std::string_view urlToEscape)
URL encodes the given string while leaving reserved characters (; / ? : @ = & #) intact.
Definition: Curl.hpp:1533
#define CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS
Definition: curl.h:76
std::string getContentType() const noexcept
Gets the content type of the HTTP reply received last.
Definition: Curl.hpp:1234
bool cookies
Specifies whether the internal cookie engine will be enabled.
Definition: Config.hpp:155
constexpr auto versionPreProxy
libcurl version needed for pre-proxy support, i.e. 7.52.0.
Definition: Curl.hpp:117
std::string now()
Formats the current date/time as string in the format YYYY-MM-DD HH:MM:SS.
Definition: DateTime.hpp:1045
void setCookies(const std::string &cookies)
Sets custom cookies.
Definition: Curl.hpp:1021
bool encodingGZip
Specifies whether to request gzip encoding for requested content.
Definition: Config.hpp:271
constexpr std::array getPublicIpErrors
Errors when retrieving the IP of the server.
Definition: Curl.hpp:138
constexpr std::uint16_t httpVersion1
Use HTTP/1 only.
Definition: Config.hpp:60
constexpr auto reservedCharacters
Reserved characters to be ignored when escaping a URL.
Definition: Curl.hpp:90
RAII wrapper for handles of the libcurl API.
Definition: Curl.hpp:70
constexpr auto versionTcpFastOpen
libcurl version needed for TCP Fast Open support, i.e. 7.49.0.
Definition: Curl.hpp:129
std::uint64_t speedLowTime
Number of seconds before a timeout occurs while the transfer speed is below the low speed limit...
Definition: Config.hpp:493
#define CURLOPT_TCP_FASTOPEN
Definition: curl.h:62
CURL * get() noexcept
Gets a pointer to the underlying libcurl handle.
Definition: Curl.hpp:193
bool repairUtf8(std::string_view strIn, std::string &strOut)
Replaces invalid UTF-8 characters in the given string and returns whether invalid characters occured...
Definition: Utf8.hpp:294
constexpr auto versionHttp2Tls
libcurl version needed for HTTP/2 OVER TLS ONLY support, i.e. 7.47.0.
Definition: Curl.hpp:111
std::string cookiesLoad
The file from which cookies will be read.
Definition: Config.hpp:164
constexpr auto versionDoH
libcurl version needed for DNS-over-HTTPS support, i.e. 7.62.0.
Definition: Curl.hpp:93
void unsetCookies()
Unsets custom cookies previously set.
Definition: Curl.hpp:1097
bool sslVerifyProxyPeer
Specifies whether to verify the authenticity of the proxy&#39;s SSL certificate.
Definition: Config.hpp:527
constexpr auto versionBrotli
libcurl version needed for Brotli encoding, i.e. 7.57.0.
Definition: Curl.hpp:99
bool verbose
Specifies whether libtidy should produce verbose output.
Definition: Config.hpp:630
bool redirectPost303
Specifies whether to NOT convert POST to GET requests when following 303 redirects.
Definition: Config.hpp:431
bool refererAutomatic
Specifies whether to send an updated HTTP Referer header when automatically redirected.
Definition: Config.hpp:446
bool redirectPost301
Specifies whether to NOT convert POST to GET requests when following 301 redirects.
Definition: Config.hpp:419
std::string decompress(const std::string &compressedContent)
Decompresses gzip-compressed content.
Definition: Gzip.hpp:111
void resetConnection(std::uint64_t sleepForMilliseconds, const IsRunningCallback &isRunningCallback)
Resets the connection.
Definition: Curl.hpp:1312
bool sslVerifyStatus
Specifies whether to verify the status of the server&#39;s SSL certificate.
Definition: Config.hpp:542
constexpr auto versionHttp2Only
libcurl version needed for HTTP/2 ONLY support, i.e. 7.49.0.
Definition: Curl.hpp:108
Class for libcurl exceptions.
Definition: Curl.hpp:260
#define CURLOPT_PROXY_TLSAUTH_USERNAME
Definition: curl.h:69
void setConfigCurrent(const Config &currentConfig)
Sets temporary network options for the connection according to the given configuration.
Definition: Curl.hpp:986
constexpr std::array gzipMagicNumber
GZIP magic number.
Definition: Curl.hpp:147
std::string getPublicIp()
Uses the connection to determine its public IP address.
Definition: Curl.hpp:1260
#define CURL_VERSION_HTTP3
Definition: curl.h:117
std::string escape(const std::string &stringToEscape, bool usePlusForSpace)
URL encodes the given string.
Definition: Curl.hpp:1406
bool encodingDeflate
Specifies whether to request DEFLATE encoding for requested content.
Definition: Config.hpp:259
char getPathSeparator()
Gets the preferred separator for file paths in the current operating system.
Definition: FileSystem.hpp:187
std::string proxyTlsSrpPassword
TSL-SRP password for the proxy server used.
Definition: Config.hpp:387
#define CURL_HTTP_VERSION_2_PRIOR_KNOWLEDGE
Definition: curl.h:63
constexpr auto versionProxyTlsAuth
libcurl version needed for proxy TLS authentification, i.e. 7.52.0.
Definition: Curl.hpp:120
bool proxyTunnelling
Specifies whether to enable proxy tunnelling.
Definition: Config.hpp:399
std::string unescape(const std::string &escapedString, bool usePlusForSpace)
URL decodes the given string.
Definition: Curl.hpp:1467
std::string_view view() const noexcept
Gets the description of the exception as a view to the underlying string.
Definition: Exception.hpp:158
bool redirectPost302
Specifies whether to NOT convert POST to GET requests when following 302 redirects.
Definition: Config.hpp:425
Provides an interface to the libcurl library for sending and receiving data over the network...
Definition: Curl.hpp:168
std::vector< std::string > proxyHeaders
Custom HTTP headers to be sent to the proxy server.
Definition: Config.hpp:373
#define CURLOPT_DNS_SHUFFLE_ADDRESSES
Definition: curl.h:81
static int header(char *data, std::size_t size, std::size_t nitems, void *thisPtr)
Static header function to handle incoming header data.
Definition: Curl.hpp:1631
std::uint64_t tcpKeepAliveIdle
The delay that will be waited before sending keep-alive probes, in seconds.
Definition: Config.hpp:554
void setHeaders(const std::vector< std::string > &customHeaders)
Sets custom HTTP headers.
Definition: Curl.hpp:1050
bool encodingTransfer
Specifies whether to request HTTP Transfer Encoding.
Definition: Config.hpp:286
std::string userAgent
Custom HTTP User-Agent header to be sent with all HTTP requests.
Definition: Config.hpp:621
static std::string curlStringToString(char *curlString)
Copies the given libcurl string into a std::string and releases its memory.
Definition: Curl.hpp:1603
Curl & operator=(Curl &)=delete
Deleted copy assignment operator.
std::vector< std::string > dnsResolves
DNS name resolves to be overwritten.
Definition: Config.hpp:229
Curl(std::string_view cookieDirectory, const NetworkSettings &setNetworkSettings)
Constructor setting the cookie directory and the network options.
Definition: Curl.hpp:381
std::string defaultProxy
The host name and the port of the default proxy to be used.
Definition: NetworkSettings.hpp:54
void getContent(std::string_view url, bool usePost, std::string &contentTo, const std::vector< std::uint32_t > &errors)
Uses the connection to get content by sending a HTTP request to the specified URL.
Definition: Curl.hpp:1171
virtual ~Curl()=default
Default destructor.
constexpr auto versionHappyEyeballs
libcurl version needed for the Happy Eyeballs algorithm, i.e. 7.59.0.
Definition: Curl.hpp:132
std::uint64_t tcpKeepAliveInterval
The interval time between keep-alive probes to sent, in seconds.
Definition: Config.hpp:560
#define CURL_HTTP_VERSION_2_0
Definition: curl.h:54
std::uint16_t timeOutHappyEyeballs
Number of milliseconds to try to connect only via IPv6 using the Happy Eyeballs algorithm.
Definition: Config.hpp:588
constexpr std::uint16_t httpVersionAny
Use any available HTTP version.
Definition: Config.hpp:57
std::string cookiesSet
Custom HTTP Cookie header independent from the internal cookie engine.
Definition: Config.hpp:197
std::uint16_t connectionsMax
The maximum number of parallel connections.
Definition: Config.hpp:137
std::string proxyTlsSrpUser
TSL-SRP user for the proxy server used.
Definition: Config.hpp:396
bool tcpKeepAlive
Specifies whether TCP keep-alive probing will be enabled.
Definition: Config.hpp:548
std::string dnsInterface
The interface that DNS name resolves should be bound to.
Definition: Config.hpp:215
bool noReUse
Specifies whether to prevent connections from re-using previous ones.
Definition: Config.hpp:348
std::uint64_t timeOut
The maximum amount of time a connection is allowed to take, in seconds.
Definition: Config.hpp:577
std::vector< std::string > dnsServers
DNS servers to be preffered.
Definition: Config.hpp:232
std::uint32_t getResponseCode() const noexcept
Gets the response code of the HTTP reply received last.
Definition: Curl.hpp:1225
#define CURLOPT_PROXY_TLSAUTH_TYPE
Definition: curl.h:71
std::uint64_t redirectMax
The maximum number of automatic redirects.
Definition: Config.hpp:413
bool encodingBr
Specifies whether to request brotli encoding for requested content.
Definition: Config.hpp:247
std::vector< std::string > cookiesOverwrite
Cookies to be overwritten.
Definition: Config.hpp:171
#define CURLOPT_PROXY_SSL_VERIFYHOST
Definition: curl.h:68
Namespace for networking classes.
Definition: Config.hpp:45
constexpr auto encodedSpace
URL encoding of a space.
Definition: Curl.hpp:81
std::string cookiesSave
The file to which cookies will be saved.
Definition: Config.hpp:180
void clear() noexcept
Resets the list and frees its memory.
Definition: CurlList.hpp:297
#define CURL_VERSION_HTTP2
Definition: curl.h:109