crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
FileSystem.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2020 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * FileSystem.hpp
24  *
25  * Namespace for global file system helper functions.
26  *
27  * Created on: Dec 10, 2018
28  * Author: ans
29  */
30 
31 #ifndef HELPER_FILESYSTEM_HPP_
32 #define HELPER_FILESYSTEM_HPP_
33 
34 #include "../Main/Exception.hpp"
35 
36 #include "Portability/filesystem.h"
37 
38 #include <algorithm> // std::equal
39 #include <cstdint> // std::uintmax_t
40 #include <iterator> // std::distance
41 #include <string> // std::string
42 #include <string_view> // std::string_view
43 #include <vector> // std::vector
44 
47 
48  /*
49  * DECLARATION
50  */
51 
54 
55  bool exists(std::string_view path);
56  bool isValidDirectory(std::string_view path);
57  bool isValidFile(std::string_view path);
58 
62 
63  char getPathSeparator();
64  std::vector<std::string> listFilesInPath(
65  std::string_view pathToDir,
66  std::string_view fileExtension
67  );
68  bool contains(std::string_view pathToDir, std::string_view pathToCheck);
69  void createDirectory(std::string_view pathToDir);
70  void createDirectoryIfNotExists(std::string_view pathToDir);
71  void clearDirectory(std::string_view pathToDir);
72 
76 
77  std::uintmax_t getFreeSpace(std::string_view path);
78 
80 
81  /*
82  * CLASS FOR FILE SYSTEM EXCEPTIONS
83  */
84 
86 
98 
99  /*
100  * IMPLEMENTATION
101  */
102 
104 
113  inline bool exists(std::string_view path) {
114  try {
115  return stdfs::exists(path);
116  }
117  catch(const stdfs::filesystem_error& e) {
118  std::string exceptionString{"Could not check the existence of the path '"};
119 
120  exceptionString += path;
121  exceptionString += "': ";
122  exceptionString += e.what();
123 
124  throw Exception(exceptionString);
125  }
126  }
127 
129 
138  inline bool isValidDirectory(std::string_view path) {
139  const stdfs::path dir(path);
140 
141  try {
142  return stdfs::exists(dir) && stdfs::is_directory(dir);
143  }
144  catch(const stdfs::filesystem_error& e) {
145  std::string exceptionString{"Could not check the existence of the directory '"};
146 
147  exceptionString += path;
148  exceptionString += "': ";
149  exceptionString += e.what();
150 
151  throw Exception(exceptionString);
152  }
153  }
154 
156 
165  inline bool isValidFile(std::string_view path) {
166  const stdfs::path file(path);
167 
168  try {
169  return stdfs::exists(file) && stdfs::is_regular_file(file);
170  }
171  catch(const stdfs::filesystem_error& e) {
172  std::string exceptionString{"Could not check the validity of the file '"};
173 
174  exceptionString += path;
175  exceptionString += "': ";
176  exceptionString += e.what();
177 
178  throw Exception(exceptionString);
179  }
180  }
181 
183 
187  inline char getPathSeparator() {
188  return stdfs::path::preferred_separator;
189  }
190 
192 
209  inline std::vector<std::string> listFilesInPath(
210  std::string_view pathToDir,
211  std::string_view fileExtension
212  ) {
213  std::vector<std::string> result;
214 
215  // open path
216  const stdfs::path path(pathToDir);
217 
218  if(!stdfs::exists(path)) {
219  std::string exceptionString{"'"};
220 
221  exceptionString += pathToDir;
222  exceptionString += "' does not exist";
223 
224  throw Exception(exceptionString);
225  }
226 
227  if(!stdfs::is_directory(path)) {
228  std::string exceptionString{"'"};
229 
230  exceptionString += pathToDir;
231  exceptionString += "' is not a directory";
232 
233  throw Exception(exceptionString);
234  }
235 
236  // iterate through items
237  try {
238  for(const auto& it: stdfs::recursive_directory_iterator(path)) {
239  if(fileExtension.empty() || it.path().extension().string() == fileExtension) {
240  result.emplace_back(it.path().string());
241  }
242  }
243  }
244  catch(const stdfs::filesystem_error& e) {
245  std::string exceptionString{"Could not iterate over the files in '"};
246 
247  exceptionString += pathToDir;
248  exceptionString += "': ";
249  exceptionString += e.what();
250 
251  throw Exception(exceptionString);
252  }
253 
254  return result;
255  }
256 
258 
270  inline bool contains(std::string_view pathToDir, std::string_view pathToCheck) {
271  if(!FileSystem::isValidDirectory(pathToDir)) {
272  std::string exceptionString{"'"};
273 
274  exceptionString += pathToDir;
275  exceptionString += "' is not a valid directory";
276 
277  throw Exception(exceptionString);
278  }
279 
280  // make paths absolute
281  try {
282  const auto absPathToDir{
283  stdfs::canonical(pathToDir)
284  };
285 
286 #ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL
287  // create weakly canonical path
288  auto absPathToCheck{
289  stdfs::weakly_canonical(pathToCheck)
290  };
291 #else
292  // create approximate canonical path – no symlinks supported!
293  auto completePathToCheck{
294  stdfs::absolute(pathToCheck)
295  };
296 
297  stdfs::path absPathToCheck;
298 
299  for(const auto& segment : completePathToCheck) {
300  if(segment == "..") {
301  absPathToCheck = absPathToCheck.parent_path();
302  }
303  else if(segment == ".") {
304  continue;
305  }
306  else {
307  absPathToCheck /= segment;
308  }
309 
310  if(stdfs::is_symlink(absPathToCheck)) {
311  std::string exceptionString{
312  "Path contains unsupported symlink: '"
313  };
314 
315  exceptionString += absPathToCheck;
316  exceptionString += "'";
317 
318  throw Exception(exceptionString);
319  }
320  }
321 #endif
322 
323  // remove filename if necessary
324  if(absPathToCheck.has_filename()) {
325  absPathToCheck.remove_filename();
326  }
327 
328  // compare number of path components
329  if(
330  std::distance(absPathToDir.begin(), absPathToDir.end())
331  > std::distance(absPathToCheck.begin(), absPathToCheck.end())
332  ) {
333  // pathToCheck cannot be contained in a path with more components
334  return false;
335  }
336 
337  // compare as many components as there are in pathToDir
338  return std::equal(absPathToDir.begin(), absPathToDir.end(), absPathToCheck.begin());
339  }
340  catch(const stdfs::filesystem_error& e) {
341  std::string exceptionString{
342  "Could not make paths absolute: '"
343  };
344 
345  exceptionString += pathToDir;
346  exceptionString += "' and '";
347  exceptionString += pathToCheck;
348  exceptionString += "': ";
349  exceptionString += e.what();
350 
351  throw Exception(exceptionString);
352  }
353  }
354 
356 
365  inline void createDirectory(std::string_view pathToDir) {
366  try {
367  stdfs::create_directory(pathToDir);
368  }
369  catch(const stdfs::filesystem_error& e) {
370  std::string exceptionString{"Could not create directory '"};
371 
372  exceptionString += pathToDir;
373  exceptionString += "': ";
374  exceptionString += e.what();
375 
376  throw Exception(exceptionString);
377  }
378  }
379 
381 
390  inline void createDirectoryIfNotExists(std::string_view pathToDir) {
391  if(!FileSystem::isValidDirectory(pathToDir)) {
392  FileSystem::createDirectory(pathToDir);
393  }
394  }
395 
397 
405  inline void clearDirectory(std::string_view pathToDir) {
406  if(!FileSystem::isValidDirectory(pathToDir)) {
407  std::string exceptionString{"'"};
408 
409  exceptionString += pathToDir;
410  exceptionString += "' is not a valid directory";
411 
412  throw Exception(exceptionString);
413  }
414 
415  const stdfs::path dir(pathToDir);
416 
417  for(stdfs::directory_iterator it(pathToDir), endIt; it != endIt; ++it) {
418  try {
419  stdfs::remove_all(it->path());
420  }
421  catch(const stdfs::filesystem_error& e) {
422  std::string exceptionString{"Could not remove '"};
423 
424  exceptionString += it->path();
425  exceptionString += "' with all its subdirectories: ";
426  exceptionString += e.what();
427 
428  throw Exception(exceptionString);
429  }
430  }
431  }
432 
434 
443  inline std::uintmax_t getFreeSpace(std::string_view path) {
444  try {
445  return stdfs::space(path).available;
446  }
447  catch(const stdfs::filesystem_error& e) {
448  std::string exceptionString{"Could not get the available disk space at '"};
449 
450  exceptionString += path;
451  exceptionString += "': ";
452  exceptionString += e.what();
453 
454  throw Exception(exceptionString);
455  }
456  }
457 
458 } /* namespace crawlservpp::Helper::FileSystem */
459 
460 #endif /* HELPER_FILESYSTEM_HPP_ */
bool isValidFile(std::string_view path)
Checks whether the given path points to a valid file.
Definition: FileSystem.hpp:165
void createDirectory(std::string_view pathToDir)
Creates a directory at the given path.
Definition: FileSystem.hpp:365
bool contains(std::string_view pathToDir, std::string_view pathToCheck)
Checks whether the given path is located inside the given directory.
Definition: FileSystem.hpp:270
Class for file system exceptions.
Definition: FileSystem.hpp:97
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
Namespace for global file system helper functions.
Definition: FileSystem.hpp:46
bool isValidDirectory(std::string_view path)
Checks whether the given path points to a valid directory.
Definition: FileSystem.hpp:138
bool exists(std::string_view path)
Checks whether the specified path exists.
Definition: FileSystem.hpp:113
void clearDirectory(std::string_view pathToDir)
Deletes all files and folders in the given directory.
Definition: FileSystem.hpp:405
char getPathSeparator()
Gets the preferred separator for file paths in the current operating system.
Definition: FileSystem.hpp:187
std::uintmax_t getFreeSpace(std::string_view path)
Gets the available disk space at the given location in bytes.
Definition: FileSystem.hpp:443
std::vector< std::string > listFilesInPath(std::string_view pathToDir, std::string_view fileExtension)
Lists all files with the given extension in the given directory and its subdirectories.
Definition: FileSystem.hpp:209
void createDirectoryIfNotExists(std::string_view pathToDir)
Creates a directory at the given path, if it does not exist already.
Definition: FileSystem.hpp:390