|
|
| compress_integer_variable_byte () |
| | Constructor.
|
| |
|
virtual | ~compress_integer_variable_byte () |
| | Constructor.
|
| |
| virtual size_t | encode (void *encoded, size_t encoded_buffer_length, const integer *source, size_t source_integers) |
| | Encode a sequence of integers returning the number of bytes used for the encoding, or 0 if the encoded sequence doesn't fit in the buffer. More...
|
| |
| virtual void | decode (integer *decoded, size_t integers_to_decode, const void *source, size_t source_length) |
| | Decode a sequence of integers encoded with this codex. More...
|
| |
|
virtual long long | compress (unsigned char *destination, long long destination_length, uint32_t *source, long long source_integers) |
| |
|
virtual void | decompress (uint32_t *destination, unsigned char *source, long long destination_integers) |
| |
|
| static void | static_decode (integer *decoded, size_t integers_to_decode, const void *source_as_void, size_t source_length) |
| | Decode a sequence of integers encoded with this codex. More...
|
| |
| static size_t | bytes_needed_for (integer value) |
| | Decode a sequence of integers encoded with this codex, calling add_rsv for each SIMD register. More...
|
| |
| template<typename DESTINATION > |
| static forceinline void | compress_into (DESTINATION &destination, integer value) |
| | Encode the given integer placing the encoding into destination (whose size is not validated). More...
|
| |
| template<typename SOURCE > |
| static forceinline void | decompress_into (integer *decoded, SOURCE &source) |
| | Decode the given integer placing the encoding into destination (whose size is not validated). More...
|
| |
Variable byte compression for integer sequences.
Variable byte compression is a whole suite of different techniques, for details see: A. Trotman (2014), Compression, SIMD, and Postings Lists. In Proceedings of the 2014 Australasian Document Computing Symposium (ADCS 2014), Pages 50-58. DOI=http://dx.doi.org/10.1145/2682862.2682870 This particular version uses a stop-bit in the high bit of the last byte of the encoded integer, stores the integer big-endian (high byte first), and uses loop unwinding for decoding efficiency. The encoding is straight forward. An integer is broken into 7-bit chunks with the top bit of each chunk being 0, except the last byte which has a 1 in the top bit. So, the integer 1905 (0x771) is the binary sequence 011101110001, which broken into 7-bit chunks is 0001110 1110001. These then get the high bits added, 0 for all except the last byte, [0]0001110 [1]1110001, then write out the byte sequence high byte first 0x0E 0xF1. This implementation works with 32-bit and 64-bit integers. To encode 64-bit integers ensure #define JASS_COMPRESS_INTEGER_BITS_PER_INTEGER 64 is set at compile time.