mirror of https://github.com/google/snappy.git
Use sized deallocation when releasing Zippy's scratch buffers.
name old time/op new time/op delta BM_UFlat/0 [html ] 41.7µs ± 0% 41.7µs ± 0% ~ (p=0.222 n=5+5) BM_UFlat/1 [urls ] 587µs ± 0% 574µs ± 0% -2.31% (p=0.008 n=5+5) BM_UFlat/2 [jpg ] 7.24µs ± 2% 7.25µs ± 2% ~ (p=0.690 n=5+5) BM_UFlat/3 [jpg_200 ] 130ns ± 0% 131ns ± 1% ~ (p=0.556 n=4+5) BM_UFlat/4 [pdf ] 8.21µs ± 0% 8.24µs ± 1% ~ (p=0.278 n=5+5) BM_UFlat/5 [html4 ] 219µs ± 0% 220µs ± 0% +0.45% (p=0.008 n=5+5) BM_UFlat/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.86% (p=0.008 n=5+5) BM_UFlat/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.54% (p=0.008 n=5+5) BM_UFlat/8 [txt3 ] 509µs ± 0% 505µs ± 0% -0.66% (p=0.008 n=5+5) BM_UFlat/9 [txt4 ] 710µs ± 0% 702µs ± 0% -1.14% (p=0.008 n=5+5) BM_UFlat/10 [pb ] 38.2µs ± 0% 37.9µs ± 0% -0.82% (p=0.008 n=5+5) BM_UFlat/11 [gaviota ] 189µs ± 0% 189µs ± 0% ~ (p=0.746 n=5+5) BM_UFlat/12 [cp ] 14.2µs ± 0% 14.2µs ± 1% ~ (p=0.421 n=5+5) BM_UFlat/13 [c ] 7.29µs ± 0% 7.34µs ± 1% +0.69% (p=0.016 n=5+5) BM_UFlat/14 [lsp ] 2.27µs ± 0% 2.28µs ± 0% +0.34% (p=0.008 n=5+5) BM_UFlat/15 [xls ] 954µs ± 0% 900µs ± 0% -5.67% (p=0.008 n=5+5) BM_UFlat/16 [xls_200 ] 213ns ± 1% 217ns ± 2% ~ (p=0.056 n=5+5) BM_UFlat/17 [bin ] 276µs ± 0% 274µs ± 0% -0.94% (p=0.008 n=5+5) BM_UFlat/18 [bin_200 ] 101ns ± 1% 101ns ± 1% ~ (p=0.524 n=5+5) BM_UFlat/19 [sum ] 29.3µs ± 0% 27.3µs ± 0% -6.98% (p=0.008 n=5+5) BM_UFlat/20 [man ] 2.95µs ± 0% 2.95µs ± 0% ~ (p=0.651 n=5+5) For microbenchmarks, the overhead of allocating/deallocating should be small (the relevant metadata for TCMalloc's PageMap will be in cache), but this helps demonstrate that the refactoring does not adversely impact performance.
This commit is contained in:
parent
1b7466e143
commit
df5548c0b3
32
snappy.cc
32
snappy.cc
|
@ -942,6 +942,17 @@ bool GetUncompressedLength(Source* source, uint32* result) {
|
|||
return decompressor.ReadUncompressedLength(result);
|
||||
}
|
||||
|
||||
struct Deleter {
|
||||
Deleter() : size_(0) {}
|
||||
explicit Deleter(size_t size) : size_(size) {}
|
||||
|
||||
void operator()(char* ptr) const {
|
||||
std::allocator<char>().deallocate(ptr, size_);
|
||||
}
|
||||
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
size_t Compress(Source* reader, Sink* writer) {
|
||||
size_t written = 0;
|
||||
size_t N = reader->Available();
|
||||
|
@ -952,8 +963,8 @@ size_t Compress(Source* reader, Sink* writer) {
|
|||
written += (p - ulength);
|
||||
|
||||
internal::WorkingMemory wmem;
|
||||
char* scratch = NULL;
|
||||
char* scratch_output = NULL;
|
||||
std::unique_ptr<char, Deleter> scratch;
|
||||
std::unique_ptr<char, Deleter> scratch_output;
|
||||
|
||||
while (N > 0) {
|
||||
// Get next block to compress (without copying if possible)
|
||||
|
@ -974,20 +985,21 @@ size_t Compress(Source* reader, Sink* writer) {
|
|||
// If this is the last iteration, we want to allocate N bytes
|
||||
// of space, otherwise the max possible kBlockSize space.
|
||||
// num_to_read contains exactly the correct value
|
||||
scratch = new char[num_to_read];
|
||||
scratch = {
|
||||
std::allocator<char>().allocate(num_to_read), Deleter(num_to_read)};
|
||||
}
|
||||
memcpy(scratch, fragment, bytes_read);
|
||||
memcpy(scratch.get(), fragment, bytes_read);
|
||||
reader->Skip(bytes_read);
|
||||
|
||||
while (bytes_read < num_to_read) {
|
||||
fragment = reader->Peek(&fragment_size);
|
||||
size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
|
||||
memcpy(scratch + bytes_read, fragment, n);
|
||||
memcpy(scratch.get() + bytes_read, fragment, n);
|
||||
bytes_read += n;
|
||||
reader->Skip(n);
|
||||
}
|
||||
assert(bytes_read == num_to_read);
|
||||
fragment = scratch;
|
||||
fragment = scratch.get();
|
||||
fragment_size = num_to_read;
|
||||
}
|
||||
assert(fragment_size == num_to_read);
|
||||
|
@ -1002,13 +1014,14 @@ size_t Compress(Source* reader, Sink* writer) {
|
|||
// Need a scratch buffer for the output, in case the byte sink doesn't
|
||||
// have room for us directly.
|
||||
if (scratch_output == NULL) {
|
||||
scratch_output = new char[max_output];
|
||||
scratch_output =
|
||||
{std::allocator<char>().allocate(max_output), Deleter(max_output)};
|
||||
} else {
|
||||
// Since we encode kBlockSize regions followed by a region
|
||||
// which is <= kBlockSize in length, a previously allocated
|
||||
// scratch_output[] region is big enough for this iteration.
|
||||
}
|
||||
char* dest = writer->GetAppendBuffer(max_output, scratch_output);
|
||||
char* dest = writer->GetAppendBuffer(max_output, scratch_output.get());
|
||||
char* end = internal::CompressFragment(fragment, fragment_size,
|
||||
dest, table, table_size);
|
||||
writer->Append(dest, end - dest);
|
||||
|
@ -1020,9 +1033,6 @@ size_t Compress(Source* reader, Sink* writer) {
|
|||
|
||||
Report("snappy_compress", written, uncompressed_size);
|
||||
|
||||
delete[] scratch;
|
||||
delete[] scratch_output;
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue