Add BM_ZFlatAll, BM_ZFlatIncreasingTableSize benchmarks to see how good zippy performs when it is processing different data one after the other.

PiperOrigin-RevId: 257518137
2019-07-10 17:38:22 -07:00 · 2019-07-10 17:38:22 -07:00 · 4c7f2d5dfb
parent 156cd8939c
commit 4c7f2d5dfb
3 changed files with 75 additions and 7 deletions
--- a/snappy.cc
+++ b/snappy.cc
@ -444,12 +444,14 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {

 namespace {
 uint32 CalculateTableSize(uint32 input_size) {
-  assert(kMaxHashTableSize >= 256);
+  static_assert(
+      kMaxHashTableSize >= kMinHashTableSize,
+      "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
  if (input_size > kMaxHashTableSize) {
    return kMaxHashTableSize;
  }
-  if (input_size < 256) {
-    return 256;
+  if (input_size < kMinHashTableSize) {
+    return kMinHashTableSize;
  }
  // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
  // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
--- a/snappy.h
+++ b/snappy.h
@ -193,11 +193,14 @@ namespace snappy {
  // Note that there might be older data around that is compressed with larger
  // block sizes, so the decompression code should not rely on the
  // non-existence of long backreferences.
-  static const int kBlockLog = 16;
-  static const size_t kBlockSize = 1 << kBlockLog;
+  static constexpr int kBlockLog = 16;
+  static constexpr size_t kBlockSize = 1 << kBlockLog;

-  static const int kMaxHashTableBits = 14;
-  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
+  static constexpr int kMinHashTableBits = 8;
+  static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
+
+  static constexpr int kMaxHashTableBits = 14;
+  static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
 }  // end namespace snappy

 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_H__
--- a/snappy_unittest.cc
+++ b/snappy_unittest.cc
@ -1410,6 +1410,69 @@ static void BM_ZFlat(int iters, int arg) {
 }
 BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);

+static void BM_ZFlatAll(int iters) {
+  StopBenchmarkTiming();
+
+  const int num_files = ARRAYSIZE(files);
+
+  std::vector<std::string> contents(num_files);
+  std::vector<char*> dst(num_files);
+
+  for (int i = 0; i < num_files; ++i) {
+    contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
+    dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
+  }
+
+  StartBenchmarkTiming();
+
+  size_t zsize = 0;
+  while (iters-- > 0) {
+    for (int i = 0; i < num_files; ++i) {
+      snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
+                          &zsize);
+    }
+  }
+  StopBenchmarkTiming();
+
+  for (int i = 0; i < num_files; ++i) {
+    delete[] dst[i];
+  }
+}
+BENCHMARK(BM_ZFlatAll);
+
+static void BM_ZFlatIncreasingTableSize(int iters) {
+  StopBenchmarkTiming();
+
+  QCHECK_GT(ARRAYSIZE(files), 0);
+  const std::string base_content =
+      ReadTestDataFile(files[0].filename, files[0].size_limit);
+
+  std::vector<std::string> contents;
+  std::vector<char*> dst;
+  for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
+       ++table_bits) {
+    std::string content = base_content;
+    content.resize(1 << table_bits);
+    dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
+    contents.push_back(std::move(content));
+  }
+
+  size_t zsize = 0;
+  StartBenchmarkTiming();
+  while (iters-- > 0) {
+    for (int i = 0; i < contents.size(); ++i) {
+      snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
+                          &zsize);
+    }
+  }
+  StopBenchmarkTiming();
+
+  for (int i = 0; i < dst.size(); ++i) {
+    delete[] dst[i];
+  }
+}
+BENCHMARK(BM_ZFlatIncreasingTableSize);
+
 }  // namespace snappy

 int main(int argc, char** argv) {