39 #ifndef HELPER_PICKLEDICT_HPP_ 40 #define HELPER_PICKLEDICT_HPP_ 42 #include "../Helper/Bytes.hpp" 43 #include "../Helper/Container.hpp" 44 #include "../Main/Exception.hpp" 54 #include <unordered_map> 62 using Bytes = std::vector<std::uint8_t>;
150 [[nodiscard]] std::optional<std::int64_t>
getNumber(
const std::string& key)
const;
151 [[nodiscard]] std::optional<double>
getFloat(
const std::string& key)
const;
152 [[nodiscard]] std::optional<std::string>
getString(
const std::string& key)
const;
159 const std::string& key,
163 const std::string& key,
167 const std::string& key,
168 const std::string& value
185 std::unordered_map<std::string, std::string> strings;
186 std::unordered_map<std::string, std::int64_t> numbers;
187 std::unordered_map<std::string, double> floats;
213 SHORT_BINSTRING =
'U',
254 SHORT_BINBYTES =
'C',
257 SHORT_BINUNICODE =
'\x8c',
258 BINUNICODE8 =
'\x8d',
264 STACK_GLOBAL =
'\x93',
270 NEXT_BUFFER =
'\x97',
271 READONLY_BUFFER =
'\x98',
279 std::uint8_t opCode{};
292 const std::string& key
302 static bool extractNextFrame(
307 static Bytes unpackFrame(
const Frame& frame);
309 static bool skipMemoize(
314 static void checkLength(
315 std::size_t dataLength,
316 std::size_t currentEnd
318 static std::size_t readValueLength(
323 static std::size_t getLengthByTermination(
326 char terminatingCharacter
334 static void writeHead(
Bytes& to);
335 static void writeFrame(
const Bytes& frameBytes,
Bytes& to,
bool isLast);
336 static void writeDictHead(
Bytes& to);
337 static void writeDictTail(
Bytes& to);
339 static void writeNumberEntry(
340 const std::pair<std::string, std::int64_t>& entry,
343 static void writeFloatEntry(
344 const std::pair<std::string, double>& entry,
347 static void writeStringEntry(
348 const std::pair<std::string, std::string>& entry,
352 static void writeKey(
const std::string& key,
Bytes& to);
354 static void writeBinInt1(std::uint8_t value,
Bytes& to);
355 static void writeBinInt2(std::uint16_t value,
Bytes& to);
356 static void writeLong1(std::int64_t value,
Bytes& to);
357 static void writeBinFloat(
double value,
Bytes& to);
358 static void writeShortBinUnicode(
const std::string& value,
Bytes& to);
359 static void writeBinUnicode(
const std::string& value,
Bytes& to);
360 static void writeBinUnicode8(
const std::string& value,
Bytes& to);
363 template<
typename T>
static void writeBytes(
const T&
bytes,
Bytes& to) {
364 to.reserve(to.size() + bytes.size());
366 for(
const auto byte : bytes) {
367 to.emplace_back(byte);
372 template<
typename T> [[nodiscard]]
static bool inRange(std::int64_t number) {
373 return number >= std::numeric_limits<T>::min()
374 && number <= std::numeric_limits<T>::max();
407 std::optional<std::int64_t> result;
409 const auto it{this->numbers.find(key)};
411 if(it != this->numbers.end()) {
427 std::optional<double> result;
429 const auto it{this->floats.find(key)};
431 if(it != this->floats.end()) {
447 std::optional<std::string> result;
449 const auto it{this->strings.find(key)};
451 if(it != this->strings.end()) {
472 this->numbers[key] = value;
485 this->floats[key] = value;
498 this->strings[key] = value;
528 const auto unpackedData{
529 PickleDict::unpack(data)
534 while(pos < unpackedData.size()) {
539 PickleDict::readKey(unpackedData, pos, key)
540 && PickleDict::skipMemoize(unpackedData, pos)
542 this->readValue(unpackedData, pos, key);
550 this->readValue(unpackedData, pos,
"");
569 std::vector<std::uint8_t> frame;
571 PickleDict::writeDictHead(frame);
573 for(
const auto& entry : this->numbers) {
574 PickleDict::writeNumberEntry(entry, frame);
577 for(
const auto& entry : this->floats) {
578 PickleDict::writeFloatEntry(entry, frame);
581 for(
const auto& entry : this->strings) {
582 PickleDict::writeStringEntry(entry, frame);
585 PickleDict::writeDictTail(frame);
588 PickleDict::writeHead(dataTo);
589 PickleDict::writeFrame(frame, dataTo,
true);
597 inline void PickleDict::readValue(
600 const std::string& key
602 std::size_t valueLength{};
607 if(pos >= data.size()) {
609 "SimpleDict::readValue():" 610 " Unexpected end of data" 611 " (invalid position)" 614 if(pos == data.size() - 1) {
615 if(data[pos] == static_cast<std::uint8_t>(OpCode::STOP)) {
623 "SimpleDict::readValue():" 624 " Unexpected end of data" 625 " (no STOP at the end)" 632 switch(static_cast<int8_t>(data[pos - 1])) {
636 case OpCode::ADDITEMS:
638 case OpCode::APPENDS:
639 case OpCode::BINPERSID:
643 case OpCode::EMPTY_DICT:
644 case OpCode::EMPTY_LIST:
645 case OpCode::EMPTY_SET:
646 case OpCode::EMPTY_TUPLE:
647 case OpCode::FROZENSET:
650 case OpCode::MEMOIZE:
652 case OpCode::NEWOBJ_EX:
653 case OpCode::NEXT_BUFFER:
656 case OpCode::POP_MARK:
657 case OpCode::READONLY_BUFFER:
659 case OpCode::SETITEM:
660 case OpCode::SETITEMS:
661 case OpCode::STACK_GLOBAL:
690 case OpCode::NEWFALSE:
699 case OpCode::NEWTRUE:
708 case OpCode::BININT1:
711 this->
setNumber(key, static_cast<std::uint8_t>(data[pos]));
717 case OpCode::BININT2:
725 case OpCode::LONG_BINGET:
726 case OpCode::LONG_BINPUT:
736 valueLength = PickleDict::readValueLength(data, pos,
pickleOneByte);
738 switch(valueLength) {
740 this->
setNumber(key, static_cast<std::int8_t>(data[pos]));
764 "Pickle::readValue(): Value lengths consisting of " 765 + std::to_string(valueLength)
766 +
" bytes are not supported" 787 valueLength = PickleDict::getLengthByTermination(data, pos,
'\n');
800 case OpCode::BINFLOAT:
810 valueLength = PickleDict::getLengthByTermination(data, pos,
'\n');
816 this->
setFloat(key, std::strtod(s1.c_str(),
nullptr));
823 case OpCode::SHORT_BINBYTES:
824 case OpCode::SHORT_BINSTRING:
825 case OpCode::SHORT_BINUNICODE:
827 valueLength = PickleDict::readValueLength(data, pos,
pickleOneByte);
833 case OpCode::BINBYTES:
834 case OpCode::BINSTRING:
835 case OpCode::BINUNICODE:
843 case OpCode::BINBYTES8:
844 case OpCode::BINUNICODE8:
845 case OpCode::BYTEARRAY8:
857 case OpCode::UNICODE:
859 valueLength = PickleDict::getLengthByTermination(data, pos,
'\n');
870 valueLength = PickleDict::getLengthByTermination(data, pos,
'\n');
876 valueLength = PickleDict::getLengthByTermination(data, pos,
'\n');
892 "SimpleDict::ReadValue():" 893 " Unexpected frame still found after unpacking" 898 "SimpleDict::ReadValue():" 899 " Unexpected 'STOP' before the end of the data" 904 "SimpleDict::ReadValue():" 905 " Unexpected 'PROTO' after the beginning of the data" 910 "SimpleDict::ReadValue():" 911 " Unknown Python pickle op-code encountered" 921 inline bool PickleDict::readKey(
928 data[pos] == static_cast<std::uint8_t>(OpCode::SHORT_BINSTRING)
929 || data[pos] == static_cast<std::uint8_t>(OpCode::SHORT_BINUNICODE)
935 const auto keyLength{data[pos]};
941 const auto keyEnd{pos + keyLength};
943 if(keyEnd > data.size()) {
945 "SimpleDict::readKey():" 946 " Unexpected end of data (expected >" 947 + std::to_string(keyEnd - data.size())
954 keyTo.reserve(keyLength);
957 for(; pos < keyEnd; ++pos) {
958 keyTo.push_back(static_cast<char>(data[pos]));
968 inline Bytes PickleDict::unpack(
const Bytes& data) {
973 while(PickleDict::extractNextFrame(data, pos, frame)) {
974 auto frameData{PickleDict::unpackFrame(frame)};
983 inline bool PickleDict::extractNextFrame(
992 "Pickle::extractFirstFrame():" 993 " No Python pickle found (only " 994 + std::to_string(bytes.size())
999 if(bytes[
pickleProtoByte] != static_cast<std::uint8_t>(OpCode::PROTO)) {
1001 "Pickle::extractFirstFrame():" 1002 " No Python pickle found (invalid first byte: " 1012 "Pickle::extractFirstFrame():" 1013 " Python pickle of unsupported version (" 1024 if(pos == bytes.size()) {
1029 const auto remaining{bytes.size() - pos};
1033 "Pickle::extractFirstFrame():" 1034 " No frame found in Python pickle (only " 1035 + std::to_string(remaining)
1041 frameTo.opCode = bytes[pos];
1046 const auto it{bytes.cbegin() + pos};
1050 frameTo.data =
Bytes(it, it + size);
1056 inline Bytes PickleDict::unpackFrame(
const Frame& frame) {
1057 if(frame.opCode == static_cast<std::uint8_t>(OpCode::FRAME)) {
1063 complete.reserve(frame.data.size() + 1);
1064 complete.push_back(frame.opCode);
1073 inline bool PickleDict::skipMemoize(
1079 && data[pos] ==
static_cast<std::uint8_t
>(OpCode::MEMOIZE)
1090 inline void PickleDict::checkLength(
1091 std::size_t dataLength,
1092 std::size_t currentEnd
1094 if(currentEnd > dataLength) {
1096 "Pickle::readValue(): Unexpected end of data (expected >" 1097 + std::to_string(currentEnd - dataLength)
1104 inline std::size_t PickleDict::readValueLength(
1107 std::size_t numBytes
1109 PickleDict::checkLength(data.size(), pos + numBytes);
1111 std::size_t result{};
1137 "Pickle::readValue(): Value lengths consisting of " 1138 + std::to_string(numBytes)
1139 +
" bytes are not supported" 1150 PickleDict::checkLength(data.size(), pos + result);
1156 inline std::size_t PickleDict::getLengthByTermination(
1159 char terminatingCharacter
1161 for(std::size_t end{pos}; end < data.size(); ++end) {
1162 if(data[end] == static_cast<std::uint8_t>(terminatingCharacter)) {
1169 "SimpleDict::getLengthByTermination():" 1170 " Could not find terminating character '" 1172 +
"' after position #" 1173 + std::to_string(pos)
1184 const auto end{pos + length};
1186 result.reserve(length);
1188 for(; pos < end; ++pos) {
1189 result.push_back(static_cast<char>(data[pos]));
1196 inline void PickleDict::writeHead(
Bytes& to) {
1197 to.push_back(static_cast<std::uint8_t>(OpCode::PROTO));
1202 inline void PickleDict::writeFrame(
const Bytes& frameBytes,
Bytes& to,
bool isLast) {
1204 std::uint64_t frameSize{frameBytes.size()};
1214 to.push_back(static_cast<std::uint8_t>(OpCode::FRAME));
1223 to.push_back(static_cast<std::uint8_t>(OpCode::STOP));
1228 inline void PickleDict::writeDictHead(
Bytes& to) {
1229 to.push_back(static_cast<std::uint8_t>(OpCode::EMPTY_DICT));
1230 to.push_back(static_cast<std::uint8_t>(OpCode::MEMOIZE));
1231 to.push_back(static_cast<std::uint8_t>(OpCode::MARK));
1235 inline void PickleDict::writeDictTail(
Bytes& to) {
1236 to.push_back(static_cast<std::uint8_t>(OpCode::MEMOIZE));
1237 to.push_back(static_cast<std::uint8_t>(OpCode::SETITEMS));
1241 inline void PickleDict::writeNumberEntry(
1242 const std::pair<std::string, std::int64_t>& entry,
1245 PickleDict::writeKey(entry.first, to);
1247 if(entry.second >= 0) {
1249 PickleDict::writeBinInt1(static_cast<std::uint8_t>(entry.second), to);
1255 PickleDict::writeBinInt2(static_cast<std::uint16_t>(entry.second), to);
1261 PickleDict::writeLong1(entry.second, to);
1265 inline void PickleDict::writeFloatEntry(
1266 const std::pair<std::string, double>& entry,
1269 PickleDict::writeKey(entry.first, to);
1270 PickleDict::writeBinFloat(entry.second, to);
1274 inline void PickleDict::writeStringEntry(
1275 const std::pair<std::string, std::string>& entry,
1278 PickleDict::writeKey(entry.first, to);
1281 PickleDict::writeShortBinUnicode(entry.second, to);
1284 PickleDict::writeBinUnicode(entry.second, to);
1287 PickleDict::writeBinUnicode8(entry.second, to);
1292 inline void PickleDict::writeKey(
const std::string& key,
Bytes& to) {
1293 PickleDict::writeShortBinUnicode(key, to);
1295 to.push_back(static_cast<std::uint8_t>(OpCode::MEMOIZE));
1299 inline void PickleDict::writeBinInt1(std::uint8_t value,
Bytes& to) {
1300 to.push_back(static_cast<std::uint8_t>(OpCode::BININT1));
1301 to.push_back(value);
1305 inline void PickleDict::writeBinInt2(std::uint16_t value,
Bytes& to) {
1306 to.push_back(static_cast<std::uint8_t>(OpCode::BININT2));
1312 inline void PickleDict::writeLong1(std::int64_t value,
Bytes& to) {
1313 to.push_back(static_cast<std::uint8_t>(OpCode::LONG1));
1315 if(PickleDict::inRange<std::int8_t>(value)) {
1317 to.push_back(static_cast<std::int8_t>(value));
1319 else if(PickleDict::inRange<std::int16_t>(value)) {
1322 PickleDict::writeBytes(
1324 static_cast<std::int16_t>(value)
1329 else if(PickleDict::inRange<std::int32_t>(value)) {
1332 PickleDict::writeBytes(
1334 static_cast<std::int32_t>(value)
1347 inline void PickleDict::writeBinFloat(
double value,
Bytes& to) {
1348 to.push_back(static_cast<std::uint8_t>(OpCode::BINFLOAT));
1354 inline void PickleDict::writeShortBinUnicode(
const std::string& value,
Bytes& to) {
1355 to.push_back(static_cast<std::uint8_t>(OpCode::SHORT_BINUNICODE));
1358 std::uint8_t length{};
1364 length =
static_cast<std::uint8_t
>(value.size());
1371 to.push_back(length);
1374 for(std::size_t index{}; index <
length; ++index) {
1375 to.push_back(static_cast<std::uint8_t>(value[index]));
1380 inline void PickleDict::writeBinUnicode(
const std::string& value,
Bytes& to) {
1381 to.push_back(static_cast<std::uint8_t>(OpCode::BINUNICODE));
1384 std::uint32_t length{};
1390 length =
static_cast<std::uint32_t
>(value.size());
1400 for(std::size_t index{}; index <
length; ++index) {
1401 to.push_back(static_cast<std::uint8_t>(value[index]));
1406 inline void PickleDict::writeBinUnicode8(
const std::string& value,
Bytes& to) {
1407 to.push_back(static_cast<std::uint8_t>(OpCode::BINUNICODE8));
1416 for(std::size_t index{}; index < value.size(); ++index) {
1417 to.push_back(static_cast<std::uint8_t>(value[index]));
constexpr auto pickleMinFrameSize
The minimum size of a Python pickle frame.
Definition: PickleDict.hpp:102
std::array< std::uint8_t, sizeFour > uInt32ToBytes(std::uint32_t number)
Converts an unsigned 32-bit number to an array of four bytes.
Definition: Bytes.hpp:470
std::vector< std::uint8_t > Bytes
Definition: PickleDict.hpp:62
constexpr auto pickleNineBytes
Nine bytes (eight bytes and an op-code).
Definition: PickleDict.hpp:84
PickleDict()=default
Default constructor.
std::int32_t bytesToInt32(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 32-bit number from a vector of bytes.
Definition: Bytes.hpp:345
Class for Python pickle exceptions.
Definition: PickleDict.hpp:181
std::array< std::uint8_t, sizeFour > int32ToBytes(std::int32_t number)
Converts an signed 32-bit number to an array of four bytes.
Definition: Bytes.hpp:489
std::optional< double > getFloat(const std::string &key) const
Gets a floating-point number from the dictionary, if avaible.
Definition: PickleDict.hpp:426
static void moveInto(T &to, T &from)
Moves the elements of an iterable container into another iterable container.
Definition: Container.hpp:99
constexpr auto pickleOneByte
One byte.
Definition: PickleDict.hpp:72
double bytesToDouble(const Bytes &bytes, std::size_t &pos)
Retrieves a IEEE 754 double-precision binary floating-point number from a vector of bytes...
Definition: Bytes.hpp:406
Simple Python pickle dictionary.
Definition: PickleDict.hpp:136
#define MAIN_EXCEPTION_CLASS()
Macro used to easily define classes for general exceptions.
Definition: Exception.hpp:50
std::uint16_t bytesToUInt16(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 16-bit number from a vector of bytes.
Definition: Bytes.hpp:362
constexpr std::uint32_t pickleMaxUFourByteNumber
Maximum number in unsigned four-byte number.
Definition: PickleDict.hpp:111
void setNumber(const std::string &key, std::int64_t value)
Adds or overwrite a number in the dictionary.
Definition: PickleDict.hpp:467
constexpr auto pickleEightBytes
Eight bytes.
Definition: PickleDict.hpp:81
void setFloat(const std::string &key, double value)
Adds or overwrites a floating-point number in the dictionary.
Definition: PickleDict.hpp:480
std::string byteToHexString(std::uint8_t byte)
Converts a byte to a string containing the byte in hexadecimal format.
Definition: Bytes.hpp:572
std::array< std::uint8_t, sizeEight > int64ToBytes(std::int64_t number)
Converts a signed 64-bit number to an array of eight bytes.
Definition: Bytes.hpp:451
static T::size_type bytes(const T &container)
Returns the number of bytes in an iterable container.
Definition: Container.hpp:144
constexpr auto pickleHeadSize
The size of the Python pickle header, in bytes.
Definition: PickleDict.hpp:99
std::string charToString(char c)
Converts a character to a string.
Definition: Bytes.hpp:609
void writeTo(Bytes &dataTo) const
Writes dictionary to Python pickle data.
Definition: PickleDict.hpp:565
constexpr auto pickleVersionByte
The position of the version byte in a Python pickle.
Definition: PickleDict.hpp:96
std::optional< std::int64_t > getNumber(const std::string &key) const
Gets a number from the dictionary, if avaible.
Definition: PickleDict.hpp:406
constexpr auto pickleBase
The base used for converting strings to numbers.
Definition: PickleDict.hpp:114
constexpr auto pickleProtocolVersion
The protocol version of Python pickles used.
Definition: PickleDict.hpp:90
std::int64_t bytesToInt64(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 64-bit number from a vector of bytes.
Definition: Bytes.hpp:300
static void append(T &to, const T &from, typename T::size_type startAt, typename T::size_type endAt)
Appends (part of) an iterable container to another container.
Definition: Container.hpp:51
constexpr auto pickleMinSize
The minimum size of a Python pickle to extract a frame.
Definition: PickleDict.hpp:87
std::optional< std::string > getString(const std::string &key) const
Gets a string from the dictionary, if avaible.
Definition: PickleDict.hpp:446
std::uint64_t bytesToUInt64(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 64-bit number from a vector of bytes.
Definition: Bytes.hpp:203
std::array< std::uint8_t, sizeTwo > uInt16ToBytes(std::uint16_t number)
Converts an unsigned 16-bit number to an array of two bytes.
Definition: Bytes.hpp:508
std::array< std::uint8_t, sizeEight > doubleToBytes(double number)
Converts a floating-point number with double precision to an array of four bytes. ...
Definition: Bytes.hpp:547
constexpr auto pickleFourBytes
Four bytes.
Definition: PickleDict.hpp:78
std::size_t length(std::string_view str)
Definition: Utf8.hpp:327
std::int16_t bytesToInt16(const Bytes &bytes, std::size_t &pos)
Retrieve a signed 16-bit number from a vector of bytes.
Definition: Bytes.hpp:388
constexpr std::uint8_t pickleMaxUOneByteNumber
Maximum number in unsigned one-byte number.
Definition: PickleDict.hpp:105
constexpr auto pickleTwoBytes
Two bytes.
Definition: PickleDict.hpp:75
std::array< std::uint8_t, sizeEight > uInt64ToBytes(std::uint64_t number)
Converts an unsigned 64-bit number to an array of eight bytes.
Definition: Bytes.hpp:432
void readFrom(const Bytes &data)
Creates a simple dictionary from Python pickle data.
Definition: PickleDict.hpp:526
std::array< std::uint8_t, sizeTwo > int16ToBytes(std::int16_t number)
Converts an signed 16-bit number to an array of two bytes.
Definition: Bytes.hpp:527
constexpr std::uint16_t pickleMaxUTwoByteNumber
Maximum number in unsigned two-byte number.
Definition: PickleDict.hpp:108
Namespace for different types of data.
constexpr auto pickleProtoByte
The position of the protocol byte in a Python pickle.
Definition: PickleDict.hpp:93
std::uint32_t bytesToUInt32(const Bytes &bytes, std::size_t &pos)
Retrieve an unsigned 32-bit number from a vector of bytes.
Definition: Bytes.hpp:317
void setString(const std::string &key, const std::string &value)
Add or overwrites a string in the dictionary.
Definition: PickleDict.hpp:493