Use sized deallocation when releasing Zippy's scratch buffers.

name old time/op new time/op delta BM_UFlat/0 [html ] 41.7µs ± 0% 41.7µs ± 0% ~ (p=0.222 n=5+5) BM_UFlat/1 [urls ] 587µs ± 0% 574µs ± 0% -2.31% (p=0.008 n=5+5) BM_UFlat/2 [jpg ] 7.24µs ± 2% 7.25µs ± 2% ~ (p=0.690 n=5+5) BM_UFlat/3 [jpg_200 ] 130ns ± 0% 131ns ± 1% ~ (p=0.556 n=4+5) BM_UFlat/4 [pdf ] 8.21µs ± 0% 8.24µs ± 1% ~ (p=0.278 n=5+5) BM_UFlat/5 [html4 ] 219µs ± 0% 220µs ± 0% +0.45% (p=0.008 n=5+5) BM_UFlat/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.86% (p=0.008 n=5+5) BM_UFlat/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.54% (p=0.008 n=5+5) BM_UFlat/8 [txt3 ] 509µs ± 0% 505µs ± 0% -0.66% (p=0.008 n=5+5) BM_UFlat/9 [txt4 ] 710µs ± 0% 702µs ± 0% -1.14% (p=0.008 n=5+5) BM_UFlat/10 [pb ] 38.2µs ± 0% 37.9µs ± 0% -0.82% (p=0.008 n=5+5) BM_UFlat/11 [gaviota ] 189µs ± 0% 189µs ± 0% ~ (p=0.746 n=5+5) BM_UFlat/12 [cp ] 14.2µs ± 0% 14.2µs ± 1% ~ (p=0.421 n=5+5) BM_UFlat/13 [c ] 7.29µs ± 0% 7.34µs ± 1% +0.69% (p=0.016 n=5+5) BM_UFlat/14 [lsp ] 2.27µs ± 0% 2.28µs ± 0% +0.34% (p=0.008 n=5+5) BM_UFlat/15 [xls ] 954µs ± 0% 900µs ± 0% -5.67% (p=0.008 n=5+5) BM_UFlat/16 [xls_200 ] 213ns ± 1% 217ns ± 2% ~ (p=0.056 n=5+5) BM_UFlat/17 [bin ] 276µs ± 0% 274µs ± 0% -0.94% (p=0.008 n=5+5) BM_UFlat/18 [bin_200 ] 101ns ± 1% 101ns ± 1% ~ (p=0.524 n=5+5) BM_UFlat/19 [sum ] 29.3µs ± 0% 27.3µs ± 0% -6.98% (p=0.008 n=5+5) BM_UFlat/20 [man ] 2.95µs ± 0% 2.95µs ± 0% ~ (p=0.651 n=5+5) For microbenchmarks, the overhead of allocating/deallocating should be small (the relevant metadata for TCMalloc's PageMap will be in cache), but this helps demonstrate that the refactoring does not adversely impact performance.
2018-10-15 13:26:59 -07:00 · 2018-10-15 13:26:59 -07:00 · df5548c0b3
parent 1b7466e143
commit df5548c0b3
1 changed files with 21 additions and 11 deletions
--- a/snappy.cc
+++ b/snappy.cc
@ -942,6 +942,17 @@ bool GetUncompressedLength(Source* source, uint32* result) {
  return decompressor.ReadUncompressedLength(result);
 }

+struct Deleter {
+  Deleter() : size_(0) {}
+  explicit Deleter(size_t size) : size_(size) {}
+
+  void operator()(char* ptr) const {
+    std::allocator<char>().deallocate(ptr, size_);
+  }
+
+  size_t size_;
+};
+
 size_t Compress(Source* reader, Sink* writer) {
  size_t written = 0;
  size_t N = reader->Available();
@ -952,8 +963,8 @@ size_t Compress(Source* reader, Sink* writer) {
  written += (p - ulength);

  internal::WorkingMemory wmem;
-  char* scratch = NULL;
-  char* scratch_output = NULL;
+  std::unique_ptr<char, Deleter> scratch;
+  std::unique_ptr<char, Deleter> scratch_output;

  while (N > 0) {
    // Get next block to compress (without copying if possible)
@ -974,20 +985,21 @@ size_t Compress(Source* reader, Sink* writer) {
        // If this is the last iteration, we want to allocate N bytes
        // of space, otherwise the max possible kBlockSize space.
        // num_to_read contains exactly the correct value
-        scratch = new char[num_to_read];
+        scratch = {
+            std::allocator<char>().allocate(num_to_read), Deleter(num_to_read)};
      }
-      memcpy(scratch, fragment, bytes_read);
+      memcpy(scratch.get(), fragment, bytes_read);
      reader->Skip(bytes_read);

      while (bytes_read < num_to_read) {
        fragment = reader->Peek(&fragment_size);
        size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
-        memcpy(scratch + bytes_read, fragment, n);
+        memcpy(scratch.get() + bytes_read, fragment, n);
        bytes_read += n;
        reader->Skip(n);
      }
      assert(bytes_read == num_to_read);
-      fragment = scratch;
+      fragment = scratch.get();
      fragment_size = num_to_read;
    }
    assert(fragment_size == num_to_read);
@ -1002,13 +1014,14 @@ size_t Compress(Source* reader, Sink* writer) {
    // Need a scratch buffer for the output, in case the byte sink doesn't
    // have room for us directly.
    if (scratch_output == NULL) {
-      scratch_output = new char[max_output];
+      scratch_output =
+          {std::allocator<char>().allocate(max_output), Deleter(max_output)};
    } else {
      // Since we encode kBlockSize regions followed by a region
      // which is <= kBlockSize in length, a previously allocated
      // scratch_output[] region is big enough for this iteration.
    }
-    char* dest = writer->GetAppendBuffer(max_output, scratch_output);
+    char* dest = writer->GetAppendBuffer(max_output, scratch_output.get());
    char* end = internal::CompressFragment(fragment, fragment_size,
                                           dest, table, table_size);
    writer->Append(dest, end - dest);
@ -1020,9 +1033,6 @@ size_t Compress(Source* reader, Sink* writer) {

  Report("snappy_compress", written, uncompressed_size);

-  delete[] scratch;
-  delete[] scratch_output;
-
  return written;
 }