crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Downloader.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2022 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * Downloader.hpp
24  *
25  * Using the libcurl library to provide simple download functionality.
26  * NOT THREAD-SAFE! Use multiple instances for multiple threads.
27  *
28  * Created on: Feb 15, 2021
29  * Author: ans
30  */
31 
32 #ifndef NETWORK_DOWNLOADER_HPP_
33 #define NETWORK_DOWNLOADER_HPP_
34 
35 #include "../Wrapper/Curl.hpp"
36 
37 #ifndef CRAWLSERVPP_TESTING
38 
39 #include "../Helper/Portability/curl.h"
40 
41 #else
42 
43 #include "FakeCurl/FakeCurl.hpp"
44 
45 #endif
46 
47 #include <atomic> // std::atomic
48 #include <cstddef> // std::size_t
49 #include <memory> // std::unique_ptr
50 #include <stdexcept> // std::runtime_error
51 #include <string> // std::string
52 #include <thread> // std::thread
53 
54 namespace crawlservpp::Network {
55 
56  /*
57  * DECLARATION
58  */
59 
61  class Downloader {
62  public:
65 
66  Downloader(const std::string& url, const std::string& proxy = std::string{});
67  ~Downloader();
68 
72 
73  [[nodiscard]] bool isRunning() const noexcept;
74  [[nodiscard]] std::string getContent() const;
75  [[nodiscard]] std::string getError() const;
76 
78 
79  private:
80  std::thread thread;
81  std::atomic<bool> running{true};
82  std::string content;
83  std::string error;
84 
85  // thread function
86  void threadFunction(const std::string& url, const std::string& proxy);
87 
88  // internal helper functions
89  void configure(Wrapper::Curl& curl, const std::string& url, const std::string& proxy);
90  void download(Wrapper::Curl& curl);
91  void check(CURLcode result);
92 
93  // static internal helper function
94  static std::size_t writer(void * data, std::size_t size, std::size_t nmemb, void * ptr);
95  };
96 
97  /*
98  * IMPLEMENTATION
99  */
100 
101  /*
102  * CONSTRUCTION AND DESTRUCTION
103  */
104 
106 
112  inline Downloader::Downloader(const std::string& url, const std::string& proxy)
113  : thread(&Downloader::threadFunction, this, url, proxy) {}
114 
116 
120  if(this->thread.joinable()) {
121  this->thread.join();
122  }
123  }
124 
125  /*
126  * GETTERS
127  */
128 
130 
136  inline bool Downloader::isRunning() const noexcept {
137  return this->running;
138  }
139 
141 
150  inline std::string Downloader::getContent() const {
151  if(this->running) {
152  return std::string{};
153  }
154 
155  return this->content;
156  }
157 
159 
168  inline std::string Downloader::getError() const {
169  if(this->running) {
170  return std::string{};
171  }
172 
173  return this->error;
174  }
175 
176  /*
177  * THREAD FUNCTION (private)
178  */
179 
180  // downloads the specified URL using the specified proxy, blocks until download was complete or failed
181  inline void Downloader::threadFunction(const std::string& url, const std::string& proxy) {
182  Wrapper::Curl curl;
183 
184  if(curl.valid()) {
185  try {
186  this->configure(curl, url, proxy);
187  this->download(curl);
188  }
189  catch(const std::runtime_error& e) {
190  this->error = e.what();
191  }
192  }
193 
194  this->running = false;
195  }
196 
197  /*
198  * INTERNAL HELPER FUNCTIONS (private)
199  */
200 
201  // set download options
202  inline void Downloader::configure(Wrapper::Curl& curl, const std::string& url, const std::string& proxy) {
203  this->check(curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()));
204  this->check(curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, Downloader::writer));
205  this->check(curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, static_cast<void *>(&(this->content))));
206 
207  if(!proxy.empty()) {
208  this->check(curl_easy_setopt(curl.get(), CURLOPT_PROXY, proxy.c_str()));
209  }
210  }
211 
212  // download file
213  inline void Downloader::download(Wrapper::Curl& curl) {
214  this->check(curl_easy_perform(curl.get()));
215  }
216 
217  // check result and throw exception if an error occured
218  inline void Downloader::check(CURLcode code) {
219  if(code != CURLE_OK) {
220  throw std::runtime_error(curl_easy_strerror(code));
221  }
222  }
223 
224  /*
225  * STATIC INTERNAL HELPER FUNCTION (private)
226  */
227 
228  // downloads the given URL in an extra thread
229  inline std::size_t Downloader::writer(
230  void * data,
231  std::size_t size,
232  std::size_t nmemb,
233  void * ptr
234  ) {
235  // check arguments
236  if(
237  content == nullptr
238  || data == nullptr
239  || size == 0
240  || nmemb == 0
241  ) {
242  return 0;
243  }
244 
245  const auto bytes{size * nmemb};
246 
247  // append data to buffer
248  static_cast<std::string *>(content)->append(static_cast<const char *>(data), size * nmemb);
249 
250  // return written (i.e. all received) bytes
251  return bytes;
252  }
253 
254 } /* namespace crawlservpp::Network */
255 
256 #endif /* NETWORK_DOWNLOADER_HPP_ */
bool valid() const noexcept
Checks whether the underlying libcurl handle is valid.
Definition: Curl.hpp:214
bool isRunning() const noexcept
Returns whether the download is still in progress.
Definition: Downloader.hpp:136
RAII wrapper for handles of the libcurl API.
Definition: Curl.hpp:70
static T::size_type bytes(const T &container)
Returns the number of bytes in an iterable container.
Definition: Container.hpp:144
CURL * get() noexcept
Gets a pointer to the underlying libcurl handle.
Definition: Curl.hpp:193
static void append(T &to, const T &from, typename T::size_type startAt, typename T::size_type endAt)
Appends (part of) an iterable container to another container.
Definition: Container.hpp:51
std::string getError() const
Returns the download error, if one occured.
Definition: Downloader.hpp:168
Downloader(const std::string &url, const std::string &proxy=std::string{})
Constructor starting to download a URL using a specific proxy server.
Definition: Downloader.hpp:112
~Downloader()
Destructor.
Definition: Downloader.hpp:119
Downloader using the libcurl library to download a URL in an extra thread.
Definition: Downloader.hpp:61
Namespace for networking classes.
Definition: Config.hpp:45
std::string getContent() const
Returns the downloaded content, if successfully downloaded.
Definition: Downloader.hpp:150