Zippy level 2 for denser compression and faster decompression

We also increased the hashtable size by 1 bit as it significantly degraded the ratio. Thus even level 1 might slightly improve.

PiperOrigin-RevId: 621456036
This commit is contained in:
Snappy Team 2024-04-03 09:40:00 +00:00 committed by Danila Kutenin
parent 4f5cf9a8d6
commit 766d24c95e
5 changed files with 309 additions and 50 deletions

View file

@ -334,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
}
#endif
static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
const char* s2_limit) {
// Implementation based on the x86-64 version, above.
assert(s2_limit >= s2);
int matched = 0;
while (s2 <= s2_limit - 8 &&
UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
s2 += 8;
matched += 8;
}
if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
int matching_bits = Bits::FindLSBSetNonZero64(x);
matched += matching_bits >> 3;
s2 += matching_bits >> 3;
} else {
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
++s2;
++matched;
}
}
return matched;
}
// Lookup tables for decompression code. Give --snappy_dump_decompression_table
// to the unit test to recompute char_table.

215
snappy.cc
View file

@ -175,6 +175,22 @@ inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
(hash & mask));
}
inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
uint32_t mask) {
constexpr uint32_t kMagic = 2654435761U;
const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
(hash & mask));
}
inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
uint32_t mask) {
constexpr uint64_t kMagic = 58295818150454627ULL;
const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
(hash & mask));
}
} // namespace
size_t MaxCompressedLength(size_t source_bytes) {
@ -931,6 +947,172 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
}
}
emit_remainder:
// Emit the remaining bytes as a literal
if (ip < ip_end) {
op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
}
return op;
}
char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
uint16_t* table, const int table_size,
uint16_t* table2, const int table_size2) {
// "ip" is the input pointer, and "op" is the output pointer.
const char* ip = input;
assert(input_size <= kBlockSize);
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const uint32_t mask = 2 * (table_size - 1);
const char* ip_end = input + input_size;
const char* base_ip = ip;
const size_t kInputMarginBytes = 15;
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
const char* ip_limit = input + input_size - kInputMarginBytes;
for (;;) {
const char* next_emit = ip++;
uint64_t data = LittleEndian::Load64(ip);
uint32_t skip = 512;
const char* candidate;
uint32_t candidate_length;
while (true) {
assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
uint32_t bytes_between_hash_lookups = skip >> 9;
skip++;
const char* next_ip = ip + bytes_between_hash_lookups;
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
ip = next_emit;
goto emit_remainder;
}
candidate = base_ip + *table_entry2;
assert(candidate >= base_ip);
assert(candidate < ip);
*table_entry2 = ip - base_ip;
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
LittleEndian::Load32(candidate))) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
break;
}
uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
candidate = base_ip + *table_entry;
assert(candidate >= base_ip);
assert(candidate < ip);
*table_entry = ip - base_ip;
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
LittleEndian::Load32(candidate))) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
table_entry2 =
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
auto candidate2 = base_ip + *table_entry2;
size_t candidate_length2 =
FindMatchLengthPlain(candidate2, ip + 1, ip_end);
if (candidate_length2 > candidate_length) {
*table_entry2 = ip - base_ip;
candidate = candidate2;
candidate_length = candidate_length2;
++ip;
}
break;
}
data = LittleEndian::Load64(next_ip);
ip = next_ip;
}
// Backtrack to the point it matches fully.
while (ip > next_emit && candidate > base_ip &&
*(ip - 1) == *(candidate - 1)) {
--ip;
--candidate;
++candidate_length;
}
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
ip - base_ip + 1;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
ip - base_ip + 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
ip - base_ip + 1;
// Step 2: A 4-byte or 8-byte match has been found.
// We'll later see if more than 4 bytes match. But, prior to the match,
// input bytes [next_emit, ip) are unmatched. Emit them as
// "literal bytes."
assert(next_emit + 16 <= ip_end);
if (ip - next_emit > 0) {
op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
ip - next_emit);
}
// Step 3: Call EmitCopy, and then see if another EmitCopy could
// be our next move. Repeat until we find no match for the
// input immediately after what was consumed by the last EmitCopy call.
//
// If we exit this loop normally then we need to call EmitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can exit
// this loop via goto if we get close to exhausting the input.
do {
// We have a 4-byte match at ip, and no need to emit any
// "literal bytes" prior to ip.
const char* base = ip;
ip += candidate_length;
size_t offset = base - candidate;
if (candidate_length < 12) {
op =
EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
} else {
op = EmitCopy</*len_less_than_12=*/false>(op, offset,
candidate_length);
}
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We are now looking for a 4-byte match again. We read
// table[Hash(ip, mask)] for that. To improve compression,
// we also update several previous table entries.
if (ip - base_ip > 7) {
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
ip - base_ip - 7;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
ip - base_ip - 4;
}
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
ip - base_ip - 3;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
ip - base_ip - 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
ip - base_ip - 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
ip - base_ip - 1;
uint16_t* table_entry =
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
candidate = base_ip + *table_entry;
*table_entry = ip - base_ip;
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
continue;
}
table_entry =
TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
candidate = base_ip + *table_entry;
*table_entry = ip - base_ip;
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
continue;
}
break;
} while (true);
}
}
emit_remainder:
// Emit the remaining bytes as a literal
if (ip < ip_end) {
@ -1608,7 +1790,8 @@ bool GetUncompressedLength(Source* source, uint32_t* result) {
return decompressor.ReadUncompressedLength(result);
}
size_t Compress(Source* reader, Sink* writer) {
size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
CHECK(options.level == 1 || options.level == 2);
int token = 0;
size_t written = 0;
size_t N = reader->Available();
@ -1664,8 +1847,15 @@ size_t Compress(Source* reader, Sink* writer) {
// Need a scratch buffer for the output, in case the byte sink doesn't
// have room for us directly.
char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
table_size);
char* end = nullptr;
if (options.level == 1) {
end = internal::CompressFragment(fragment, fragment_size, dest, table,
table_size);
} else if (options.level == 2) {
end = internal::CompressFragmentDoubleHash(
fragment, fragment_size, dest, table, table_size >> 1,
table + (table_size >> 1), table_size >> 1);
}
writer->Append(dest, end - dest);
written += (end - dest);
@ -2107,39 +2297,40 @@ bool IsValidCompressed(Source* compressed) {
}
void RawCompress(const char* input, size_t input_length, char* compressed,
size_t* compressed_length) {
size_t* compressed_length, CompressionOptions options) {
ByteArraySource reader(input, input_length);
UncheckedByteArraySink writer(compressed);
Compress(&reader, &writer);
Compress(&reader, &writer, options);
// Compute how many bytes were added
*compressed_length = (writer.CurrentDestination() - compressed);
}
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
char* compressed, size_t* compressed_length) {
char* compressed, size_t* compressed_length,
CompressionOptions options) {
SnappyIOVecReader reader(iov, uncompressed_length);
UncheckedByteArraySink writer(compressed);
Compress(&reader, &writer);
Compress(&reader, &writer, options);
// Compute how many bytes were added.
*compressed_length = writer.CurrentDestination() - compressed;
}
size_t Compress(const char* input, size_t input_length,
std::string* compressed) {
size_t Compress(const char* input, size_t input_length, std::string* compressed,
CompressionOptions options) {
// Pre-grow the buffer to the max length of the compressed output
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
size_t compressed_length;
RawCompress(input, input_length, string_as_array(compressed),
&compressed_length);
&compressed_length, options);
compressed->erase(compressed_length);
return compressed_length;
}
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
std::string* compressed) {
std::string* compressed, CompressionOptions options) {
// Compute the number of bytes to be compressed.
size_t uncompressed_length = 0;
for (size_t i = 0; i < iov_cnt; ++i) {
@ -2152,7 +2343,7 @@ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
size_t compressed_length;
RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
&compressed_length);
&compressed_length, options);
compressed->erase(compressed_length);
return compressed_length;
}

View file

@ -50,13 +50,33 @@ namespace snappy {
class Source;
class Sink;
struct CompressionOptions {
// Compression level.
// Level 1 is the fastest
// Level 2 is a little slower but provides better compression. Level 2 is
// **EXPERIMENTAL** for the time being. It might happen that we decide to
// fall back to level 1 in the future.
// Levels 3+ are currently not supported. We plan to support levels up to
// 9 in the future.
// If you played with other compression algorithms, level 1 is equivalent to
// fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
// and compresses somewhere around zstd:-3 and zstd:-2 but generally with
// faster decompression speeds than snappy:1 and zstd:-3.
int level = DefaultCompressionLevel();
static constexpr int MinCompressionLevel() { return 1; }
static constexpr int MaxCompressionLevel() { return 2; }
static constexpr int DefaultCompressionLevel() { return 1; }
};
// ------------------------------------------------------------------------
// Generic compression/decompression routines.
// ------------------------------------------------------------------------
// Compress the bytes read from "*source" and append to "*sink". Return the
// Compress the bytes read from "*reader" and append to "*writer". Return the
// number of bytes written.
size_t Compress(Source* source, Sink* sink);
size_t Compress(Source* reader, Sink* writer,
CompressionOptions options = {});
// Find the uncompressed length of the given stream, as given by the header.
// Note that the true length could deviate from this; the stream could e.g.
@ -76,14 +96,15 @@ namespace snappy {
//
// REQUIRES: "input[]" is not an alias of "*compressed".
size_t Compress(const char* input, size_t input_length,
std::string* compressed);
std::string* compressed, CompressionOptions options = {});
// Same as `Compress` above but taking an `iovec` array as input. Note that
// this function preprocesses the inputs to compute the sum of
// `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
// `RawCompressFromIOVec` below.
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
std::string* compressed);
std::string* compressed,
CompressionOptions options = {});
// Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
// Original contents of "*uncompressed" are lost.
@ -126,16 +147,15 @@ namespace snappy {
// RawCompress(input, input_length, output, &output_length);
// ... Process(output, output_length) ...
// delete [] output;
void RawCompress(const char* input,
size_t input_length,
char* compressed,
size_t* compressed_length);
void RawCompress(const char* input, size_t input_length, char* compressed,
size_t* compressed_length, CompressionOptions options = {});
// Same as `RawCompress` above but taking an `iovec` array as input. Note that
// `uncompressed_length` is the total number of bytes to be read from the
// elements of `iov` (_not_ the number of elements in `iov`).
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
char* compressed, size_t* compressed_length);
char* compressed, size_t* compressed_length,
CompressionOptions options = {});
// Given data in "compressed[0..compressed_length-1]" generated by
// calling the Snappy::Compress routine, this routine
@ -215,7 +235,7 @@ namespace snappy {
static constexpr int kMinHashTableBits = 8;
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
static constexpr int kMaxHashTableBits = 14;
static constexpr int kMaxHashTableBits = 15;
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
} // end namespace snappy

View file

@ -44,6 +44,15 @@ namespace snappy {
namespace {
void FilesAndLevels(::testing::Benchmark* benchmark) {
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
for (int level = snappy::CompressionOptions::MinCompressionLevel();
level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
benchmark->ArgPair(i, level);
}
}
}
void BM_UFlat(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
@ -55,7 +64,8 @@ void BM_UFlat(benchmark::State& state) {
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
snappy::Compress(contents.data(), contents.size(), &zcontents,
snappy::CompressionOptions{.level = state.range(1)});
char* dst = new char[contents.size()];
for (auto s : state) {
@ -68,7 +78,7 @@ void BM_UFlat(benchmark::State& state) {
delete[] dst;
}
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
BENCHMARK(BM_UFlat)->Apply(FilesAndLevels);
struct SourceFiles {
SourceFiles() {
@ -119,7 +129,8 @@ void BM_UValidate(benchmark::State& state) {
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
snappy::Compress(contents.data(), contents.size(), &zcontents,
snappy::CompressionOptions{.level = state.range(1)});
for (auto s : state) {
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
@ -128,7 +139,7 @@ void BM_UValidate(benchmark::State& state) {
static_cast<int64_t>(contents.size()));
state.SetLabel(kTestDataFiles[file_index].label);
}
BENCHMARK(BM_UValidate)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
BENCHMARK(BM_UValidate)->Apply(FilesAndLevels);
void BM_UValidateMedley(benchmark::State& state) {
static const SourceFiles* const source = new SourceFiles();
@ -152,6 +163,7 @@ BENCHMARK(BM_UValidateMedley);
void BM_UIOVecSource(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
int level = state.range(1);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
@ -180,7 +192,8 @@ void BM_UIOVecSource(benchmark::State& state) {
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
size_t zsize = 0;
for (auto s : state) {
snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize);
snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize,
snappy::CompressionOptions{.level = level});
benchmark::DoNotOptimize(iov);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
@ -195,7 +208,7 @@ void BM_UIOVecSource(benchmark::State& state) {
delete[] dst;
}
BENCHMARK(BM_UIOVecSource)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
BENCHMARK(BM_UIOVecSource)->Apply(FilesAndLevels);
void BM_UIOVecSink(benchmark::State& state) {
// Pick file to process based on state.range(0).
@ -254,7 +267,8 @@ void BM_UFlatSink(benchmark::State& state) {
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
snappy::Compress(contents.data(), contents.size(), &zcontents,
snappy::CompressionOptions{.level = state.range(1)});
char* dst = new char[contents.size()];
for (auto s : state) {
@ -273,11 +287,12 @@ void BM_UFlatSink(benchmark::State& state) {
delete[] dst;
}
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
BENCHMARK(BM_UFlatSink)->Apply(FilesAndLevels);
void BM_ZFlat(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
int level = state.range(1);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
@ -288,7 +303,8 @@ void BM_ZFlat(benchmark::State& state) {
size_t zsize = 0;
for (auto s : state) {
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize,
snappy::CompressionOptions{.level = level});
benchmark::DoNotOptimize(dst);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
@ -302,10 +318,12 @@ void BM_ZFlat(benchmark::State& state) {
zsize);
delete[] dst;
}
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
BENCHMARK(BM_ZFlat)->Apply(FilesAndLevels);
void BM_ZFlatAll(benchmark::State& state) {
const int num_files = ARRAYSIZE(kTestDataFiles);
int level = state.range(0);
std::vector<std::string> contents(num_files);
std::vector<char*> dst(num_files);
@ -322,7 +340,7 @@ void BM_ZFlatAll(benchmark::State& state) {
for (auto s : state) {
for (int i = 0; i < num_files; ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
&zsize, snappy::CompressionOptions{.level = level});
benchmark::DoNotOptimize(dst);
}
}
@ -335,10 +353,11 @@ void BM_ZFlatAll(benchmark::State& state) {
}
state.SetLabel(StrFormat("%d kTestDataFiles", num_files));
}
BENCHMARK(BM_ZFlatAll);
BENCHMARK(BM_ZFlatAll)->DenseRange(1, 2);
void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
CHECK_GT(ARRAYSIZE(kTestDataFiles), 0);
int level = state.range(0);
const std::string base_content = ReadTestDataFile(
kTestDataFiles[0].filename, kTestDataFiles[0].size_limit);
@ -358,7 +377,7 @@ void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
for (auto s : state) {
for (size_t i = 0; i < contents.size(); ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
&zsize, snappy::CompressionOptions{.level = level});
benchmark::DoNotOptimize(dst);
}
}
@ -371,7 +390,7 @@ void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
}
state.SetLabel(StrFormat("%d tables", contents.size()));
}
BENCHMARK(BM_ZFlatIncreasingTableSize);
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(1, 2);
} // namespace

View file

@ -39,22 +39,26 @@
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
std::string input(reinterpret_cast<const char*>(data), size);
for (int level = snappy::CompressionOptions::MinCompressionLevel();
level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
std::string compressed;
size_t compressed_size =
snappy::Compress(input.data(), input.size(), &compressed,
snappy::CompressionOptions{.level = level});
std::string compressed;
size_t compressed_size =
snappy::Compress(input.data(), input.size(), &compressed);
(void)compressed_size; // Variable only used in debug builds.
assert(compressed_size == compressed.size());
assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
assert(
snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
(void)compressed_size; // Variable only used in debug builds.
assert(compressed_size == compressed.size());
assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
std::string uncompressed_after_compress;
bool uncompress_succeeded = snappy::Uncompress(
compressed.data(), compressed.size(), &uncompressed_after_compress);
std::string uncompressed_after_compress;
bool uncompress_succeeded = snappy::Uncompress(
compressed.data(), compressed.size(), &uncompressed_after_compress);
(void)uncompress_succeeded; // Variable only used in debug builds.
assert(uncompress_succeeded);
assert(input == uncompressed_after_compress);
(void)uncompress_succeeded; // Variable only used in debug builds.
assert(uncompress_succeeded);
assert(input == uncompressed_after_compress);
}
return 0;
}