crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
Bytes.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2021 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * Bytes.hpp
24  *
25  * Helper functions for byte operations.
26  *
27  * Created on: Feb 3, 2021
28  * Author: ans
29  */
30 
31 #ifndef HELPER_BYTES_HPP_
32 #define HELPER_BYTES_HPP_
33 
34 #include <algorithm> // std::copy, std::reverse
35 #include <array> // std::array
36 #include <cctype> // std::isprint
37 #include <cstddef> // std::size_t
38 #include <cstdint> // std::[u]int[8,16,32,54]_t
39 #include <stdexcept> // std::invalid_argument
40 #include <string> // std::to_string
41 #include <vector> // std::vector
42 
45 
46  // for convenience
47  using Bytes = std::vector<std::uint8_t>;
48 
49  /*
50  * CONSTANTS
51  */
52 
55 
57  constexpr auto first{0};
58 
60  constexpr auto second{1};
61 
63  constexpr auto third{2};
64 
66  constexpr auto fourth{3};
67 
69  constexpr auto fifth{4};
70 
72  constexpr auto sixth{5};
73 
75  constexpr auto seventh{6};
76 
78  constexpr auto eighth{7};
79 
81  constexpr auto oneByteBits{8};
82 
84  constexpr auto twoBytesBits{16};
85 
87  constexpr auto threeBytesBits{24};
88 
90  constexpr auto fourBytesBits{32};
91 
93  constexpr auto fiveBytesBits{40};
94 
96  constexpr auto sixBytesBits{48};
97 
99  constexpr auto sevenBytesBits{56};
100 
102  constexpr auto sizeEight{8};
103 
105  constexpr auto sizeFour{4};
106 
108  constexpr auto sizeTwo{2};
109 
111 
112  /*
113  * DECLARATION
114  */
115 
118 
119  bool isBigEndian() noexcept;
120  bool isFloatBigEndian() noexcept;
121 
125 
126  std::uint64_t bytesToUInt64(const Bytes& bytes, std::size_t& pos);
127  std::uint64_t bytesToUInt64(const Bytes& bytes, std::size_t& pos, std::size_t len);
128  std::int64_t bytesToInt64(const Bytes& bytes, std::size_t& pos);
129  std::int64_t bytesToInt64(const Bytes& bytes, std::size_t& pos, std::size_t len);
130  std::uint32_t bytesToUInt32(const Bytes& bytes, std::size_t& pos);
131  std::int32_t bytesToInt32(const Bytes& bytes, std::size_t& pos);
132  std::uint16_t bytesToUInt16(const Bytes& bytes, std::size_t& pos);
133  std::int16_t bytesToInt16(const Bytes& bytes, std::size_t& pos);
134  double bytesToDouble(const Bytes& bytes, std::size_t& pos);
135 
139 
140  std::array<std::uint8_t, sizeEight> uInt64ToBytes(std::uint64_t number);
141  std::array<std::uint8_t, sizeEight> int64ToBytes(std::int64_t number);
142  std::array<std::uint8_t, sizeFour> uInt32ToBytes(std::uint32_t number);
143  std::array<std::uint8_t, sizeFour> int32ToBytes(std::int32_t number);
144  std::array<std::uint8_t, sizeTwo> uInt16ToBytes(std::uint16_t number);
145  std::array<std::uint8_t, sizeTwo> int16ToBytes(std::int16_t number);
146  std::array<std::uint8_t, sizeEight> doubleToBytes(double number);
147 
151 
152  std::string byteToHexString(std::uint8_t byte);
153  std::string charToString(char c);
154 
156 
157  /*
158  * IMPLEMENTATION
159  */
160 
161  /*
162  * ENDIANNESS
163  */
164 
166 
170  inline bool isBigEndian() noexcept {
171  const std::uint8_t one{1};
172 
173  return *(reinterpret_cast<const char *>(&one)) != 1;
174  }
175 
177 
182  inline bool isFloatBigEndian() noexcept {
183  const auto minusOne{-1.F};
184 
185  return *(reinterpret_cast<const std::uint8_t *>(&minusOne)) > 0;
186  }
187 
188  /*
189  * BYTES-TO-NUMBER CONVERSION
190  */
191 
193 
203  inline std::uint64_t bytesToUInt64(const Bytes& bytes, std::size_t& pos) {
204  std::array<std::uint8_t, sizeEight> numberBytes{
205  bytes[pos + first], bytes[pos + second], bytes[pos + third], bytes[pos + fourth],
206  bytes[pos + fifth], bytes[pos + sixth], bytes[pos + seventh], bytes[pos + eighth]
207  };
208 
209  pos += sizeEight;
210 
212  std::reverse(numberBytes.begin(), numberBytes.end());
213  }
214 
215  return static_cast<std::uint64_t>(numberBytes[eighth]) << sevenBytesBits //NOLINT(hicpp-signed-bitwise)
216  | static_cast<std::uint64_t>(numberBytes[seventh]) << sixBytesBits //NOLINT(hicpp-signed-bitwise)
217  | static_cast<std::uint64_t>(numberBytes[sixth]) << fiveBytesBits //NOLINT(hicpp-signed-bitwise)
218  | static_cast<std::uint64_t>(numberBytes[fifth]) << fourBytesBits //NOLINT(hicpp-signed-bitwise)
219  | static_cast<std::uint64_t>(numberBytes[fourth]) << threeBytesBits //NOLINT(hicpp-signed-bitwise)
220  | static_cast<std::uint64_t>(numberBytes[third]) << twoBytesBits //NOLINT(hicpp-signed-bitwise)
221  | static_cast<std::uint64_t>(numberBytes[second]) << oneByteBits //NOLINT(hicpp-signed-bitwise)
222  | static_cast<std::uint64_t>(numberBytes[first]);
223  }
224 
226 
238  inline std::uint64_t bytesToUInt64(const Bytes& bytes, std::size_t& pos, std::size_t len) {
239  if(len > sizeEight) {
240  throw std::invalid_argument(
241  "Bytes::bytesToUInt64():"
242  " Only numbers up to a length of eight bytes are supported (len="
243  + std::to_string(len)
244  + ")"
245  );
246  }
247 
248  std::array<std::uint8_t, sizeEight> numberBytes{};
249 
250  for(std::size_t n{}; n < len; ++n) {
251  numberBytes.at(n) = bytes[pos + n];
252  };
253 
254  pos += len;
255 
257  std::reverse(numberBytes.begin(), numberBytes.end());
258  }
259 
260  return static_cast<std::uint64_t>(numberBytes[eighth]) << sevenBytesBits //NOLINT(hicpp-signed-bitwise)
261  | static_cast<std::uint64_t>(numberBytes[seventh]) << sixBytesBits //NOLINT(hicpp-signed-bitwise)
262  | static_cast<std::uint64_t>(numberBytes[sixth]) << fiveBytesBits //NOLINT(hicpp-signed-bitwise)
263  | static_cast<std::uint64_t>(numberBytes[fifth]) << fourBytesBits //NOLINT(hicpp-signed-bitwise)
264  | static_cast<std::uint64_t>(numberBytes[fourth]) << threeBytesBits //NOLINT(hicpp-signed-bitwise)
265  | static_cast<std::uint64_t>(numberBytes[third]) << twoBytesBits //NOLINT(hicpp-signed-bitwise)
266  | static_cast<std::uint64_t>(numberBytes[second]) << oneByteBits //NOLINT(hicpp-signed-bitwise)
267  | static_cast<std::uint64_t>(numberBytes[first]);
268  }
269 
271 
283  inline std::int64_t bytesToInt64(const Bytes& bytes, std::size_t& pos, std::size_t len) {
284  const std::uint64_t withoutSign{bytesToUInt64(bytes, pos, len)};
285 
286  return *reinterpret_cast<const std::int64_t *>(&withoutSign);
287  }
288 
290 
300  inline std::int64_t bytesToInt64(const Bytes& bytes, std::size_t& pos) {
301  const std::uint64_t withoutSign{bytesToUInt64(bytes, pos)};
302 
303  return *reinterpret_cast<const std::int64_t *>(&withoutSign);
304  }
305 
307 
317  inline std::uint32_t bytesToUInt32(const Bytes& bytes, std::size_t& pos) {
318  std::array<std::uint8_t, sizeFour> numberBytes{
319  bytes[pos + first], bytes[pos + second], bytes[pos + third], bytes[pos + fourth]
320  };
321 
322  pos += sizeFour;
323 
325  std::reverse(numberBytes.begin(), numberBytes.end());
326  }
327 
328  return static_cast<std::uint32_t>(numberBytes[fourth]) << threeBytesBits //NOLINT(hicpp-signed-bitwise)
329  | static_cast<std::uint32_t>(numberBytes[third]) << twoBytesBits //NOLINT(hicpp-signed-bitwise)
330  | static_cast<std::uint32_t>(numberBytes[second]) << oneByteBits //NOLINT(hicpp-signed-bitwise)
331  | static_cast<std::uint32_t>(numberBytes[first]);
332  }
333 
335 
345  inline std::int32_t bytesToInt32(const Bytes& bytes, std::size_t& pos) {
346  const std::uint32_t withoutSign{bytesToUInt32(bytes, pos)};
347 
348  return *reinterpret_cast<const std::int32_t *>(&withoutSign);
349  }
350 
352 
362  inline std::uint16_t bytesToUInt16(const Bytes& bytes, std::size_t& pos) {
363  std::array<std::uint8_t, sizeTwo> numberBytes{
364  bytes[pos + first], bytes[pos + second]
365  };
366 
367  pos += sizeTwo;
368 
370  std::reverse(numberBytes.begin(), numberBytes.end());
371  }
372 
373  return static_cast<std::uint16_t>(numberBytes[second]) << oneByteBits //NOLINT(hicpp-signed-bitwise)
374  | static_cast<std::uint16_t>(numberBytes[first]);
375  }
376 
378 
388  inline std::int16_t bytesToInt16(const Bytes& bytes, std::size_t& pos) {
389  const std::uint16_t withoutSign{bytesToUInt16(bytes, pos)};
390 
391  return *reinterpret_cast<const std::int16_t *>(&withoutSign);
392  }
393 
395 
406  inline double bytesToDouble(const Bytes& bytes, std::size_t& pos) {
407  std::array<std::uint8_t, sizeEight> numberBytes{
408  bytes[pos + eighth], bytes[pos + seventh], bytes[pos + sixth], bytes[pos + fifth],
409  bytes[pos + fourth], bytes[pos + third], bytes[pos + second], bytes[pos + first]
410  };
411 
412  pos += sizeEight;
413 
415  std::reverse(numberBytes.begin(), numberBytes.end());
416  }
417 
418  return reinterpret_cast<const double&>(*(numberBytes.data()));
419  }
420 
421  /*
422  * NUMBER-TO-BYTES CONVERSION
423  */
424 
426 
432  inline std::array<std::uint8_t, sizeEight> uInt64ToBytes(std::uint64_t number) {
433  std::array<std::uint8_t, sizeEight> result{};
434 
435  reinterpret_cast<std::uint64_t&>(*result.data()) = number;
436 
438  std::reverse(result.begin(), result.end());
439  }
440 
441  return result;
442  }
443 
445 
451  inline std::array<std::uint8_t, sizeEight> int64ToBytes(std::int64_t number) {
452  std::array<std::uint8_t, sizeEight> result{};
453 
454  reinterpret_cast<std::int64_t&>(*result.data()) = number;
455 
457  std::reverse(result.begin(), result.end());
458  }
459 
460  return result;
461  }
462 
464 
470  inline std::array<std::uint8_t, sizeFour> uInt32ToBytes(std::uint32_t number) {
471  std::array<std::uint8_t, sizeFour> result{};
472 
473  reinterpret_cast<std::uint32_t&>(*result.data()) = number;
474 
476  std::reverse(result.begin(), result.end());
477  }
478 
479  return result;
480  }
481 
483 
489  inline std::array<std::uint8_t, sizeFour> int32ToBytes(std::int32_t number) {
490  std::array<std::uint8_t, sizeFour> result{};
491 
492  reinterpret_cast<std::int32_t&>(*result.data()) = number;
493 
495  std::reverse(result.begin(), result.end());
496  }
497 
498  return result;
499  }
500 
502 
508  inline std::array<std::uint8_t, sizeTwo> uInt16ToBytes(std::uint16_t number) {
509  std::array<std::uint8_t, sizeTwo> result{};
510 
511  reinterpret_cast<std::uint16_t&>(*result.data()) = number;
512 
514  std::reverse(result.begin(), result.end());
515  }
516 
517  return result;
518  }
519 
521 
527  inline std::array<std::uint8_t, sizeTwo> int16ToBytes(std::int16_t number) {
528  std::array<std::uint8_t, sizeTwo> result{};
529 
530  reinterpret_cast<std::int16_t&>(*result.data()) = number;
531 
533  std::reverse(result.begin(), result.end());
534  }
535 
536  return result;
537  }
538 
540 
547  inline std::array<std::uint8_t, sizeEight> doubleToBytes(double number) {
548  std::array<std::uint8_t, sizeEight> result{};
549 
550  reinterpret_cast<double&>(*result.data()) = number;
551 
553  std::reverse(result.begin(), result.end());
554  }
555 
556  return result;
557  }
558 
559  /*
560  * STRING REPRESENTATION
561  */
562 
564 
572  inline std::string byteToHexString(std::uint8_t byte) {
573  constexpr std::array hexChars{
574  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
575  };
576 
577  constexpr auto resultSize{4};
578  constexpr auto hex240{0xF0};
579  constexpr auto hex15{0xF};
580  constexpr auto hexShift{4};
581 
582  std::string result{"0x"};
583 
584  result.reserve(resultSize);
585 
586  result.push_back(hexChars.at(((byte & hex240) >> hexShift))); //NOLINT(hicpp-signed-bitwise)
587  result.push_back(hexChars.at(byte & hex15)); //NOLINT(hicpp-signed-bitwise)
588 
589  return result;
590  }
591 
593 
609  inline std::string charToString(char c) {
610  switch(c) {
611  case '\0':
612  return "\\0";
613 
614  case '\a':
615  return "\\a";
616 
617  case '\f':
618  return "\\f";
619 
620  case '\n':
621  return "\\n";
622 
623  case '\r':
624  return "\\r";
625 
626  case '\v':
627  return "\\v";
628 
629  case '\t':
630  return "\\t";
631 
632  default:
633  if(std::isprint(c) != 0) {
634  return std::string(c, 1);
635  }
636 
637  break;
638  }
639 
640  return byteToHexString(static_cast<std::uint8_t>(c));
641  }
642 
643 } /* namespace crawlservpp::Helper::Bytes */
644 
645 #endif /* HELPER_BYTES_HPP_ */
std::array< std::uint8_t, sizeFour > uInt32ToBytes(std::uint32_t number)
Converts an unsigned 32-bit number to an array of four bytes.
Definition: Bytes.hpp:470
constexpr auto sizeTwo
Size of two bytes.
Definition: Bytes.hpp:108
constexpr auto fifth
Index of the fifth byte.
Definition: Bytes.hpp:69
constexpr auto threeBytesBits
Three bytes in bits.
Definition: Bytes.hpp:87
constexpr auto fourBytesBits
Four bytes in bits.
Definition: Bytes.hpp:90
constexpr auto third
Index of the third byte.
Definition: Bytes.hpp:63
std::int32_t bytesToInt32(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 32-bit number from a vector of bytes.
Definition: Bytes.hpp:345
Namespace for global byte operation helper functions.
Definition: Bytes.hpp:44
bool isBigEndian() noexcept
Returns whether the machine running this code uses big endianness.
Definition: Bytes.hpp:170
std::array< std::uint8_t, sizeFour > int32ToBytes(std::int32_t number)
Converts an signed 32-bit number to an array of four bytes.
Definition: Bytes.hpp:489
constexpr auto fourth
Index of the fourth byte.
Definition: Bytes.hpp:66
double bytesToDouble(const Bytes &bytes, std::size_t &pos)
Retrieves a IEEE 754 double-precision binary floating-point number from a vector of bytes...
Definition: Bytes.hpp:406
std::vector< std::uint8_t > Bytes
Definition: Bytes.hpp:47
constexpr auto seventh
Index of the seventh byte.
Definition: Bytes.hpp:75
std::uint16_t bytesToUInt16(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 16-bit number from a vector of bytes.
Definition: Bytes.hpp:362
constexpr auto fiveBytesBits
Five bytes in bits.
Definition: Bytes.hpp:93
constexpr auto sixBytesBits
Six bytes in bits.
Definition: Bytes.hpp:96
bool isFloatBigEndian() noexcept
Returns whether the machine running this code uses big endianness for floating-point numbers...
Definition: Bytes.hpp:182
constexpr auto first
Index of the first byte.
Definition: Bytes.hpp:57
std::string byteToHexString(std::uint8_t byte)
Converts a byte to a string containing the byte in hexadecimal format.
Definition: Bytes.hpp:572
std::array< std::uint8_t, sizeEight > int64ToBytes(std::int64_t number)
Converts a signed 64-bit number to an array of eight bytes.
Definition: Bytes.hpp:451
static T::size_type bytes(const T &container)
Returns the number of bytes in an iterable container.
Definition: Container.hpp:144
std::string charToString(char c)
Converts a character to a string.
Definition: Bytes.hpp:609
constexpr auto second
Index of the second byte.
Definition: Bytes.hpp:60
std::int64_t bytesToInt64(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 64-bit number from a vector of bytes.
Definition: Bytes.hpp:300
std::uint64_t bytesToUInt64(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 64-bit number from a vector of bytes.
Definition: Bytes.hpp:203
std::array< std::uint8_t, sizeTwo > uInt16ToBytes(std::uint16_t number)
Converts an unsigned 16-bit number to an array of two bytes.
Definition: Bytes.hpp:508
constexpr auto sevenBytesBits
Seven bytes in bits.
Definition: Bytes.hpp:99
std::array< std::uint8_t, sizeEight > doubleToBytes(double number)
Converts a floating-point number with double precision to an array of four bytes. ...
Definition: Bytes.hpp:547
constexpr auto sizeEight
Size of eight bytes.
Definition: Bytes.hpp:102
std::int16_t bytesToInt16(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 16-bit number from a vector of bytes.
Definition: Bytes.hpp:388
constexpr auto eighth
Index of the eighth byte.
Definition: Bytes.hpp:78
constexpr auto sixth
Index of the sixth byte.
Definition: Bytes.hpp:72
void reverse(std::queue< T > &queue)
Reverses the given queue.
Definition: Queue.hpp:53
std::array< std::uint8_t, sizeEight > uInt64ToBytes(std::uint64_t number)
Converts an unsigned 64-bit number to an array of eight bytes.
Definition: Bytes.hpp:432
std::array< std::uint8_t, sizeTwo > int16ToBytes(std::int16_t number)
Converts an signed 16-bit number to an array of two bytes.
Definition: Bytes.hpp:527
constexpr auto twoBytesBits
Two bytes in bits.
Definition: Bytes.hpp:84
std::uint32_t bytesToUInt32(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 32-bit number from a vector of bytes.
Definition: Bytes.hpp:317
constexpr auto sizeFour
Size of four bytes.
Definition: Bytes.hpp:105
constexpr auto oneByteBits
One byte in bits.
Definition: Bytes.hpp:81