crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
WebServer.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2022 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * WebServer.hpp
24  *
25  * Embedded web server using mongoose.
26  *
27  * Created on: Feb 1, 2019
28  * Author: ans
29  */
30 
31 #ifndef MAIN_WEBSERVER_HPP_
32 #define MAIN_WEBSERVER_HPP_
33 
34 #include "../Data/Compression/Gzip.hpp"
35 #include "../Helper/FileSystem.hpp"
36 #include "../Helper/Memory.hpp"
37 #include "../Helper/Strings.hpp"
38 #include "../Main/Exception.hpp"
39 
40 extern "C" {
41  #include "../_extern/mongoose/mongoose.h"
42 }
43 
44 #include <algorithm> // std::copy, std::swap, std::transform
45 #include <array> // std::array
46 #include <cctype> // std::tolower
47 #include <cstddef> // std::size_t
48 #include <cstdint> // std::uint16_t, std::uint64_t
49 #include <fstream> // std::ofstream
50 #include <functional> // std::function
51 #include <ios> // std::ios_base
52 #include <stdexcept> // std::logic_error
53 #include <string> // std::stoull, std::string
54 #include <string_view> // std::string_view, std::string_view_literals
55 #include <utility> // std::pair
56 #include <vector> // std::vector
57 
58 namespace crawlservpp::Main {
59 
60  using std::string_view_literals::operator""sv;
61 
62  /*
63  * CONSTANTS
64  */
65 
68 
70 
73  inline constexpr auto listenToAddress{"tcp://0.0.0.0:"sv};
74 
76  inline constexpr auto headerContentType{"content-type"sv};
77 
79  inline constexpr auto headerContentSize{"content-length"sv};
80 
82  inline constexpr auto headerContentEncoding{"content-encoding"sv};
83 
85  inline constexpr auto headerContentTypeValue{"multipart/form-data"sv};
86 
88  inline constexpr auto headerBoundaryBegin{"boundary="sv};
89 
91  inline constexpr auto httpOk{200};
92 
94  inline constexpr auto filePartHeaderBegin{"content-"sv};
95 
97  inline constexpr auto filePartBoundaryBegin{"--"sv};
98 
100  inline constexpr auto filePartBoundaryFinalEnd{"--"sv};
101 
103  inline constexpr auto filePartUploadHeader{"content-disposition"sv};
104 
106  inline constexpr auto filePartUploadName{"name="sv};
107 
109  inline constexpr auto filePartUploadFileName{"filename="sv};
110 
112  inline constexpr auto filePartUploadField{"filetoupload"sv};
113 
115  inline constexpr auto randFileNameLength{64};
116 
118  inline constexpr auto quotesLength{2};
119 
121  inline constexpr auto gzipMinBytes{2000};
122 
124 
125  /*
126  * DECLARATION
127  */
128 
130 
142  class WebServer final {
143  // for convenience
144  using ConnectionPtr = mg_connection *;
145  using ConstConnectionPtr = const mg_connection *;
146  using StringString = std::pair<std::string, std::string>;
147 
148  using AcceptCallback =
149  std::function<void(
150  ConnectionPtr
151  )>;
152  using LogCallback = std::function<void(const std::string&)>;
153  using RequestCallback =
154  std::function<void(
155  ConnectionPtr,
156  const std::string&,
157  const std::string&,
158  void *
159  )>;
160 
161  public:
164 
165  explicit WebServer(std::string_view fileCacheDirectory);
166  virtual ~WebServer();
167 
171 
176  void initHTTP(const std::string& port);
177 
181 
182  void setAcceptCallback(AcceptCallback callback);
183  void setLogCallback(LogCallback callback);
184  void setRequestCallback(RequestCallback callback);
185 
189 
190  void poll(int timeOut);
191  void send(
192  ConnectionPtr connection,
193  uint16_t code,
194  const std::string& type,
195  const std::string& content
196  );
197  void sendFile(
198  ConnectionPtr connection,
199  const std::string& fileName,
200  bool isGzipped,
201  void * data
202  );
203  static void sendError(
204  ConnectionPtr connection,
205  const std::string& error
206  );
207  static void close(
208  ConnectionPtr connection,
209  bool immediately
210  );
211 
215 
216  static std::string getIP(ConstConnectionPtr connection);
217 
219 
222 
226 
229  WebServer(WebServer&) = delete;
230 
232  WebServer& operator=(WebServer&) = delete;
233 
235  WebServer(WebServer&&) = delete;
236 
238  WebServer& operator=(WebServer&&) = delete;
239 
241 
242  private:
243  const std::string fileCache;
244  mg_mgr eventManager{};
245  bool isShutdown{false};
246 
247  // callback functions
248  AcceptCallback onAccept;
249  LogCallback onLog;
250  RequestCallback onRequest;
251 
252  // event handlers
253  static void eventHandler(
254  ConnectionPtr connection,
255  int event,
256  void * data,
257  void * arg
258  );
259  void eventHandlerInClass(
260  ConnectionPtr connection,
261  int event,
262  void * data
263  );
264  void uploadHandler(ConnectionPtr connection, mg_http_message * msg);
265  void requestHandler(ConnectionPtr connection, mg_http_message * msg, void * data);
266 
267  // internal helper functions
268  void fileReceived(ConnectionPtr from, const std::string& name, const std::string& content);
269 
270  // static internal helper functions
271  static void parseHttpHeaders(
272  const std::array<mg_http_header, MG_MAX_HTTP_HEADERS>& headers,
273  std::string& contentEncodingTo
274  );
275  [[nodiscard]] static bool parseHttpHeaders(
276  const std::array<mg_http_header, MG_MAX_HTTP_HEADERS>& headers,
277  std::string& boundaryTo,
278  std::uint64_t& sizeTo,
279  std::string& contentEncodingTo
280  );
281  [[nodiscard]] static bool getLine(struct mg_str& str, std::size_t& pos, std::string& to);
282  [[nodiscard]] static bool isBoundary(const std::string& line, const std::string& boundary);
283  [[nodiscard]] static bool isFinalBoundary(const std::string& line, const std::string& boundary);
284  [[nodiscard]] static bool getUploadHeaders(struct mg_str& str, std::size_t& pos, std::vector<StringString>& to);
285  [[nodiscard]] static bool getUploadHeader(const std::string& from, StringString& to);
286 
287  [[nodiscard]] static bool parseContentType(
288  const std::string& headerName,
289  const struct mg_str& headerValue,
290  std::string& boundaryTo,
291  bool& isBoundaryFoundTo
292  );
293  [[nodiscard]] static bool parseContentSize(
294  const std::string& headerName,
295  const struct mg_str& headerValue,
296  std::uint64_t& sizeTo,
297  bool& isFoundSizeTo
298  );
299  static void parseContentEncoding(
300  const std::string& headerName,
301  const struct mg_str& headerValue,
302  std::string& contentEncodingTo
303  );
304 
305  [[nodiscard]] static bool parseContentTypeHeader(const std::string& value, std::string& boundaryTo);
306  [[nodiscard]] static bool parseUploadHeaders(const std::vector<StringString>& uploadHeaders, std::string& fileNameTo);
307  [[nodiscard]] static bool parseNextHeaderPart(const std::string& value, std::size_t& pos, std::string& to);
308 
309  [[nodiscard]] static bool checkFileName(bool inFile, const std::string& currentFile, std::string& fileName);
310 
311  [[nodiscard]] static std::string getDefaultHeaders();
312  [[nodiscard]] static std::string getCorsHeaders();
313  [[nodiscard]] static std::string toString(const struct mg_str& str);
314  static void removeQuotes(std::string& str);
315 
316  [[nodiscard]] static const char * statusCodeToString(int status_code);
317  };
318 
319 } /* namespace crawlservpp::Main */
320 
321 #endif /* MAIN_WEBSERVER_HPP_ */
void setRequestCallback(RequestCallback callback)
Sets callback function for HTTP requests.
Definition: WebServer.cpp:135
constexpr auto headerContentType
The name of a (lower-case) content type header.
Definition: WebServer.hpp:76
constexpr auto filePartBoundaryBegin
Required beginning of a HTTP multipart boundary.
Definition: WebServer.hpp:97
void initHTTP(const std::string &port)
Initializes the web server for usage over HTTP.
Definition: WebServer.cpp:83
void send(ConnectionPtr connection, uint16_t code, const std::string &type, const std::string &content)
Sends a HTTP reply to a previously established connection.
Definition: WebServer.cpp:170
constexpr auto filePartUploadField
The (lower-case) name of the content containing file content to upload.
Definition: WebServer.hpp:112
constexpr auto headerBoundaryBegin
The beginning of the header part that contains the boundary.
Definition: WebServer.hpp:88
constexpr auto quotesLength
The length of two encapsulating quotes, in bytes.
Definition: WebServer.hpp:118
static void close(ConnectionPtr connection, bool immediately)
Closes a connection immediately.
Definition: WebServer.cpp:370
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
constexpr auto filePartUploadHeader
The name of the upload header containing content information.
Definition: WebServer.hpp:103
static void sendError(ConnectionPtr connection, const std::string &error)
Sends an internal server error (HTTP code 500) with a custom message and closes the connection...
Definition: WebServer.cpp:332
constexpr auto filePartUploadFileName
The name of the content containing the original name of the file to upload.
Definition: WebServer.hpp:109
constexpr auto filePartHeaderBegin
Required beginning of (lower-case) file part header.
Definition: WebServer.hpp:94
void poll(int timeOut)
Polls the web server.
Definition: WebServer.cpp:149
constexpr auto filePartBoundaryFinalEnd
The end of the final HTTP multipart boundary.
Definition: WebServer.hpp:100
constexpr auto headerContentEncoding
The name of a (lower-case) content encoding header.
Definition: WebServer.hpp:82
virtual ~WebServer()
Destructor freeing the web server.
Definition: WebServer.cpp:61
constexpr auto listenToAddress
The address at which to listen for incoming connections.
Definition: WebServer.hpp:73
Embedded web server class using the mongoose library.
Definition: WebServer.hpp:142
void setAcceptCallback(AcceptCallback callback)
Sets callback function for accepted connections.
Definition: WebServer.cpp:115
WebServer(std::string_view fileCacheDirectory)
Constructor setting the file cache and initializing the web server.
Definition: WebServer.cpp:48
constexpr auto filePartUploadName
The beginning of the field in the content information containing the name of the content.
Definition: WebServer.hpp:106
void setLogCallback(LogCallback callback)
Sets callback function for logging.
Definition: WebServer.cpp:125
WebServer & operator=(WebServer &)=delete
Deleted copy assignment operator.
void sendFile(ConnectionPtr connection, const std::string &fileName, bool isGzipped, void *data)
Sends a file located in the file cache.
Definition: WebServer.cpp:251
static std::string getIP(ConstConnectionPtr connection)
Static helper function retrieving the client IP from a connection.
Definition: WebServer.cpp:404
Namespace for the main classes of the program.
Definition: App.cpp:34
constexpr auto httpOk
HTTP OK response code.
Definition: WebServer.hpp:91
constexpr auto gzipMinBytes
The number of minimum bytes for gzip to be used.
Definition: WebServer.hpp:121
constexpr auto headerContentSize
The name of a (lower-case) content size header.
Definition: WebServer.hpp:79
constexpr auto headerContentTypeValue
The expected content type for HTTP multipart requests.
Definition: WebServer.hpp:85
constexpr auto randFileNameLength
The length of randomly generated file names.
Definition: WebServer.hpp:115