From df5548c0b36f90b71575dd96bc10387893e20b3f Mon Sep 17 00:00:00 2001 From: ckennelly Date: Mon, 15 Oct 2018 13:26:59 -0700 Subject: [PATCH] Use sized deallocation when releasing Zippy's scratch buffers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta BM_UFlat/0 [html ] 41.7µs ± 0% 41.7µs ± 0% ~ (p=0.222 n=5+5) BM_UFlat/1 [urls ] 587µs ± 0% 574µs ± 0% -2.31% (p=0.008 n=5+5) BM_UFlat/2 [jpg ] 7.24µs ± 2% 7.25µs ± 2% ~ (p=0.690 n=5+5) BM_UFlat/3 [jpg_200 ] 130ns ± 0% 131ns ± 1% ~ (p=0.556 n=4+5) BM_UFlat/4 [pdf ] 8.21µs ± 0% 8.24µs ± 1% ~ (p=0.278 n=5+5) BM_UFlat/5 [html4 ] 219µs ± 0% 220µs ± 0% +0.45% (p=0.008 n=5+5) BM_UFlat/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.86% (p=0.008 n=5+5) BM_UFlat/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.54% (p=0.008 n=5+5) BM_UFlat/8 [txt3 ] 509µs ± 0% 505µs ± 0% -0.66% (p=0.008 n=5+5) BM_UFlat/9 [txt4 ] 710µs ± 0% 702µs ± 0% -1.14% (p=0.008 n=5+5) BM_UFlat/10 [pb ] 38.2µs ± 0% 37.9µs ± 0% -0.82% (p=0.008 n=5+5) BM_UFlat/11 [gaviota ] 189µs ± 0% 189µs ± 0% ~ (p=0.746 n=5+5) BM_UFlat/12 [cp ] 14.2µs ± 0% 14.2µs ± 1% ~ (p=0.421 n=5+5) BM_UFlat/13 [c ] 7.29µs ± 0% 7.34µs ± 1% +0.69% (p=0.016 n=5+5) BM_UFlat/14 [lsp ] 2.27µs ± 0% 2.28µs ± 0% +0.34% (p=0.008 n=5+5) BM_UFlat/15 [xls ] 954µs ± 0% 900µs ± 0% -5.67% (p=0.008 n=5+5) BM_UFlat/16 [xls_200 ] 213ns ± 1% 217ns ± 2% ~ (p=0.056 n=5+5) BM_UFlat/17 [bin ] 276µs ± 0% 274µs ± 0% -0.94% (p=0.008 n=5+5) BM_UFlat/18 [bin_200 ] 101ns ± 1% 101ns ± 1% ~ (p=0.524 n=5+5) BM_UFlat/19 [sum ] 29.3µs ± 0% 27.3µs ± 0% -6.98% (p=0.008 n=5+5) BM_UFlat/20 [man ] 2.95µs ± 0% 2.95µs ± 0% ~ (p=0.651 n=5+5) For microbenchmarks, the overhead of allocating/deallocating should be small (the relevant metadata for TCMalloc's PageMap will be in cache), but this helps demonstrate that the refactoring does not adversely impact performance. --- snappy.cc | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/snappy.cc b/snappy.cc index 5352c24..afb9f6f 100644 --- a/snappy.cc +++ b/snappy.cc @@ -942,6 +942,17 @@ bool GetUncompressedLength(Source* source, uint32* result) { return decompressor.ReadUncompressedLength(result); } +struct Deleter { + Deleter() : size_(0) {} + explicit Deleter(size_t size) : size_(size) {} + + void operator()(char* ptr) const { + std::allocator().deallocate(ptr, size_); + } + + size_t size_; +}; + size_t Compress(Source* reader, Sink* writer) { size_t written = 0; size_t N = reader->Available(); @@ -952,8 +963,8 @@ size_t Compress(Source* reader, Sink* writer) { written += (p - ulength); internal::WorkingMemory wmem; - char* scratch = NULL; - char* scratch_output = NULL; + std::unique_ptr scratch; + std::unique_ptr scratch_output; while (N > 0) { // Get next block to compress (without copying if possible) @@ -974,20 +985,21 @@ size_t Compress(Source* reader, Sink* writer) { // If this is the last iteration, we want to allocate N bytes // of space, otherwise the max possible kBlockSize space. // num_to_read contains exactly the correct value - scratch = new char[num_to_read]; + scratch = { + std::allocator().allocate(num_to_read), Deleter(num_to_read)}; } - memcpy(scratch, fragment, bytes_read); + memcpy(scratch.get(), fragment, bytes_read); reader->Skip(bytes_read); while (bytes_read < num_to_read) { fragment = reader->Peek(&fragment_size); size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); + memcpy(scratch.get() + bytes_read, fragment, n); bytes_read += n; reader->Skip(n); } assert(bytes_read == num_to_read); - fragment = scratch; + fragment = scratch.get(); fragment_size = num_to_read; } assert(fragment_size == num_to_read); @@ -1002,13 +1014,14 @@ size_t Compress(Source* reader, Sink* writer) { // Need a scratch buffer for the output, in case the byte sink doesn't // have room for us directly. if (scratch_output == NULL) { - scratch_output = new char[max_output]; + scratch_output = + {std::allocator().allocate(max_output), Deleter(max_output)}; } else { // Since we encode kBlockSize regions followed by a region // which is <= kBlockSize in length, a previously allocated // scratch_output[] region is big enough for this iteration. } - char* dest = writer->GetAppendBuffer(max_output, scratch_output); + char* dest = writer->GetAppendBuffer(max_output, scratch_output.get()); char* end = internal::CompressFragment(fragment, fragment_size, dest, table, table_size); writer->Append(dest, end - dest); @@ -1020,9 +1033,6 @@ size_t Compress(Source* reader, Sink* writer) { Report("snappy_compress", written, uncompressed_size); - delete[] scratch; - delete[] scratch_output; - return written; }