Merge branch 'master' into explicitly_ask_for_avx2

This commit is contained in:
Andrzej Potepa 2021-09-16 17:10:10 +02:00 committed by GitHub
commit 9627a64a5e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 2223 additions and 1524 deletions

View file

@ -1,36 +0,0 @@
# Build matrix / environment variables are explained on:
# https://www.appveyor.com/docs/appveyor-yml/
# This file can be validated on: https://ci.appveyor.com/tools/validate-yaml
version: "{build}"
environment:
matrix:
# AppVeyor currently has no custom job name feature.
# http://help.appveyor.com/discussions/questions/1623-can-i-provide-a-friendly-name-for-jobs
- JOB: Visual Studio 2019
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
CMAKE_GENERATOR: Visual Studio 16 2019
platform:
- x86
- x64
configuration:
- RelWithDebInfo
- Debug
build_script:
- git submodule update --init --recursive
- mkdir build
- cd build
- if "%platform%"=="x86" (set CMAKE_GENERATOR_PLATFORM="Win32")
else (set CMAKE_GENERATOR_PLATFORM="%platform%")
- cmake --version
- cmake .. -G "%CMAKE_GENERATOR%" -A "%CMAKE_GENERATOR_PLATFORM%"
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX2=ON
- cmake --build . --config %CONFIGURATION%
- cd ..
test_script:
- build\%CONFIGURATION%\snappy_unittest

135
.github/workflows/build.yml vendored Normal file
View file

@ -0,0 +1,135 @@
# Copyright 2021 Google Inc. All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: ci
on: [push, pull_request]
permissions:
contents: read
jobs:
build-and-test:
name: >-
CI
${{ matrix.os }}
${{ matrix.cpu_level }}
${{ matrix.compiler }}
${{ matrix.optimized && 'release' || 'debug' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
compiler: [clang, gcc, msvc]
os: [ubuntu-latest, macos-latest, windows-latest]
cpu_level: [baseline, avx, avx2]
optimized: [true, false]
exclude:
# MSVC only works on Windows.
- os: ubuntu-latest
compiler: msvc
- os: macos-latest
compiler: msvc
# GitHub servers seem to run on pre-Haswell CPUs. Attempting to use AVX2
# results in crashes.
- os: macos-latest
cpu_level: avx2
# Not testing with GCC on macOS.
- os: macos-latest
compiler: gcc
# Only testing with MSVC on Windows.
- os: windows-latest
compiler: clang
- os: windows-latest
compiler: gcc
include:
- compiler: clang
CC: clang
CXX: clang++
- compiler: gcc
CC: gcc
CXX: g++
- compiler: msvc
CC:
CXX:
env:
CMAKE_BUILD_DIR: ${{ github.workspace }}/build
CMAKE_BUILD_TYPE: ${{ matrix.optimized && 'RelWithDebInfo' || 'Debug' }}
CC: ${{ matrix.CC }}
CXX: ${{ matrix.CXX }}
SNAPPY_REQUIRE_AVX: ${{ matrix.cpu_level == 'baseline' && '0' || '1' }}
SNAPPY_REQUIRE_AVX2: ${{ matrix.cpu_level == 'avx2' && '1' || '0' }}
SNAPPY_FUZZING_BUILD: >-
${{ (startsWith(matrix.os, 'ubuntu') && matrix.compiler == 'clang' &&
!matrix.optimized) && '1' || '0' }}
BINARY_SUFFIX: ${{ startsWith(matrix.os, 'windows') && '.exe' || '' }}
BINARY_PATH: >-
${{ format(
startsWith(matrix.os, 'windows') && '{0}\build\{1}\' || '{0}/build/',
github.workspace,
matrix.optimized && 'RelWithDebInfo' || 'Debug') }}
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Generate build config
run: >-
cmake -S "${{ github.workspace }}" -B "${{ env.CMAKE_BUILD_DIR }}"
-DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }}
-DCMAKE_INSTALL_PREFIX=${{ runner.temp }}/install_test/
-DSNAPPY_FUZZING_BUILD=${{ env.SNAPPY_FUZZING_BUILD }}
-DSNAPPY_REQUIRE_AVX=${{ env.SNAPPY_REQUIRE_AVX }}
-DSNAPPY_REQUIRE_AVX2=${{ env.SNAPPY_REQUIRE_AVX2 }}
- name: Build
run: >-
cmake --build "${{ env.CMAKE_BUILD_DIR }}"
--config "${{ env.CMAKE_BUILD_TYPE }}"
- name: Run C++ API Tests
run: ${{ env.BINARY_PATH }}snappy_unittest${{ env.BINARY_SUFFIX }}
- name: Run Compression Fuzzer
if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }}
run: >-
${{ env.BINARY_PATH }}snappy_compress_fuzzer${{ env.BINARY_SUFFIX }}
-runs=1000 -close_fd_mask=3
- name: Run Decompression Fuzzer
if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }}
run: >-
${{ env.BINARY_PATH }}snappy_uncompress_fuzzer${{ env.BINARY_SUFFIX }}
-runs=1000 -close_fd_mask=3
- name: Run Benchmarks
run: ${{ env.BINARY_PATH }}snappy_benchmark${{ env.BINARY_SUFFIX }}
- name: Test CMake installation
run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --target install

6
.gitmodules vendored Normal file
View file

@ -0,0 +1,6 @@
[submodule "third_party/benchmark"]
path = third_party/benchmark
url = https://github.com/google/benchmark.git
[submodule "third_party/googletest"]
path = third_party/googletest
url = https://github.com/google/googletest.git

View file

@ -1,98 +0,0 @@
# Build matrix / environment variables are explained on:
# http://about.travis-ci.org/docs/user/build-configuration/
# This file can be validated on: http://lint.travis-ci.org/
language: cpp
dist: bionic
osx_image: xcode11.3
compiler:
- gcc
- clang
os:
- linux
- osx
env:
- BUILD_TYPE=Debug CPU_LEVEL=AVX
- BUILD_TYPE=Debug CPU_LEVEL=AVX2
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
jobs:
exclude:
# Travis OSX servers seem to run on pre-Haswell CPUs. Attempting to use AVX2
# results in crashes.
- env: BUILD_TYPE=Debug CPU_LEVEL=AVX2
os: osx
- env: BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
os: osx
allow_failures:
# Homebrew's GCC is currently broken on XCode 11.
- compiler: gcc
os: osx
addons:
apt:
sources:
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main'
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
- sourceline: 'ppa:ubuntu-toolchain-r/test'
packages:
- clang-10
- cmake
- gcc-9
- g++-9
- ninja-build
homebrew:
packages:
- cmake
- gcc@9
- llvm@10
- ninja
update: true
install:
# The following Homebrew packages aren't linked by default, and need to be
# prepended to the path explicitly.
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then
export PATH="$(brew --prefix llvm)/bin:$PATH";
fi
# Fuzzing is only supported on Clang. Perform fuzzing on Debug builds.
- if [ "$CXX" = "clang++" ] && [ "$BUILD_TYPE" = "Debug" ]; then
export FUZZING=1;
else
export FUZZING=0;
fi
# /usr/bin/gcc points to an older compiler on both Linux and macOS.
- if [ "$CXX" = "g++" ]; then export CXX="g++-9" CC="gcc-9"; fi
# /usr/bin/clang points to an older compiler on both Linux and macOS.
#
# Homebrew's llvm package doesn't ship a versioned clang++ binary, so the values
# below don't work on macOS. Fortunately, the path change above makes the
# default values (clang and clang++) resolve to the correct compiler on macOS.
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then
if [ "$CXX" = "clang++" ]; then export CXX="clang++-10" CC="clang-10"; fi;
fi
- echo ${CC}
- echo ${CXX}
- ${CXX} --version
- cmake --version
before_script:
- mkdir -p build && cd build
- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE
-DSNAPPY_REQUIRE_${CPU_LEVEL}=ON -DSNAPPY_FUZZING_BUILD=${FUZZING}
-DCMAKE_INSTALL_PREFIX=$HOME/.local
- cmake --build .
- cd ..
script:
- build/snappy_unittest
- if [ -f build/snappy_compress_fuzzer ]; then
build/snappy_compress_fuzzer -runs=1000 -close_fd_mask=3;
fi
- if [ -f build/snappy_uncompress_fuzzer ]; then
build/snappy_uncompress_fuzzer -runs=1000 -close_fd_mask=3;
fi
- cd build && cmake --build . --target install

View file

@ -27,7 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.1)
project(Snappy VERSION 1.1.8 LANGUAGES C CXX)
project(Snappy VERSION 1.1.9 LANGUAGES C CXX)
# C++ standard can be overridden when this is used as a sub-project.
if(NOT CMAKE_CXX_STANDARD)
@ -66,10 +66,12 @@ else(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra")
# Use -Werror for clang and gcc.
if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
# Use -Werror for clang only.
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# Disable C++ exceptions.
string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@ -86,6 +88,8 @@ option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF)
option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON)
option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" ON)
option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF)
option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
@ -108,6 +112,7 @@ check_include_file("windows.h" HAVE_WINDOWS_H)
include(CheckLibraryExists)
check_library_exists(z zlibVersion "" HAVE_LIBZ)
check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4)
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
@ -133,6 +138,10 @@ elseif (SNAPPY_REQUIRE_AVX)
endif(HAVE_CLANG_MAVX)
endif(SNAPPY_REQUIRE_AVX2)
# Used by googletest.
check_cxx_compiler_flag(-Wno-missing-field-initializers
SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
int main() {
@ -171,20 +180,19 @@ if(SNAPPY_REQUIRE_AVX2)
}" SNAPPY_HAVE_BMI2)
endif(SNAPPY_REQUIRE_AVX2)
check_cxx_source_compiles("
#include <arm_neon.h>
int main() {
uint8_t val = 3, dup[8];
uint8x16_t v = vld1q_dup_u8(&val);
vst1q_u8(dup, v);
return 0;
}" SNAPPY_HAVE_NEON)
include(CheckSymbolExists)
check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)
check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF)
find_package(GTest QUIET)
if(GTEST_FOUND)
set(HAVE_GTEST 1)
endif(GTEST_FOUND)
find_package(Gflags QUIET)
if(GFLAGS_FOUND)
set(HAVE_GFLAGS 1)
endif(GFLAGS_FOUND)
configure_file(
"cmake/config.h.in"
"${PROJECT_BINARY_DIR}/config.h"
@ -252,38 +260,92 @@ if(BUILD_SHARED_LIBS)
set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif(BUILD_SHARED_LIBS)
if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
add_library(snappy_test_support "")
target_sources(snappy_test_support
PRIVATE
"snappy-test.cc"
"snappy-test.h"
"snappy_test_data.cc"
"snappy_test_data.h"
"${PROJECT_BINARY_DIR}/config.h"
)
# Test files include snappy-test.h, HAVE_CONFIG_H must be defined.
target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H)
target_link_libraries(snappy_test_support snappy)
if(HAVE_LIBZ)
target_link_libraries(snappy_test_support z)
endif(HAVE_LIBZ)
if(HAVE_LIBLZO2)
target_link_libraries(snappy_test_support lzo2)
endif(HAVE_LIBLZO2)
if(HAVE_LIBLZ4)
target_link_libraries(snappy_test_support lz4)
endif(HAVE_LIBLZ4)
target_include_directories(snappy_test_support
BEFORE PUBLIC
"${PROJECT_SOURCE_DIR}"
)
endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
if(SNAPPY_BUILD_TESTS)
enable_testing()
# Prevent overriding the parent project's compiler/linker settings on Windows.
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
set(install_gtest OFF)
set(install_gmock OFF)
set(build_gmock ON)
# This project is tested using GoogleTest.
add_subdirectory("third_party/googletest")
# GoogleTest triggers a missing field initializers warning.
if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
set_property(TARGET gtest
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
set_property(TARGET gmock
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
add_executable(snappy_unittest "")
target_sources(snappy_unittest
PRIVATE
"snappy_unittest.cc"
"snappy-test.cc"
)
target_compile_definitions(snappy_unittest PRIVATE -DHAVE_CONFIG_H)
target_link_libraries(snappy_unittest snappy ${GFLAGS_LIBRARIES})
if(HAVE_LIBZ)
target_link_libraries(snappy_unittest z)
endif(HAVE_LIBZ)
if(HAVE_LIBLZO2)
target_link_libraries(snappy_unittest lzo2)
endif(HAVE_LIBLZO2)
target_include_directories(snappy_unittest
BEFORE PRIVATE
"${PROJECT_SOURCE_DIR}"
"${GTEST_INCLUDE_DIRS}"
"${GFLAGS_INCLUDE_DIRS}"
)
target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest)
add_test(
NAME snappy_unittest
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest")
add_executable(snappy_test_tool "")
target_sources(snappy_test_tool
PRIVATE
"snappy_test_tool.cc"
)
target_link_libraries(snappy_test_tool snappy_test_support)
endif(SNAPPY_BUILD_TESTS)
if(SNAPPY_BUILD_BENCHMARKS)
add_executable(snappy_benchmark "")
target_sources(snappy_benchmark
PRIVATE
"snappy_benchmark.cc"
)
target_link_libraries(snappy_benchmark snappy_test_support benchmark_main)
# This project uses Google benchmark for benchmarking.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE)
add_subdirectory("third_party/benchmark")
endif(SNAPPY_BUILD_BENCHMARKS)
if(SNAPPY_FUZZING_BUILD)
add_executable(snappy_compress_fuzzer "")
target_sources(snappy_compress_fuzzer

View file

@ -3,6 +3,26 @@
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Project Goals
In addition to the aims listed at the top of the [README](README.md) Snappy
explicitly supports the following:
1. C++11
2. Clang (gcc and MSVC are best-effort).
3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
1. [x86](https://en.wikipedia.org/wiki/X86)
2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
3. ARMv7 (32-bit)
4. ARMv8 (AArch64)
4. Supports only the Snappy compression scheme as described in
[format_description.txt](format_description.txt).
5. CMake for building
Changes adding features or dependencies outside of the core area of focus listed
above might not be accepted. If in doubt post a message to the
[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License

6
NEWS
View file

@ -1,3 +1,9 @@
Snappy v1.1.9, May 4th 2021:
* Performance improvements.
* Google Test and Google Benchmark are now bundled in third_party/.
Snappy v1.1.8, January 15th 2020:
* Small performance improvements.

View file

@ -1,5 +1,6 @@
Snappy, a fast compressor/decompressor.
[![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml)
Introduction
============
@ -69,6 +70,7 @@ You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
or later to build:
```bash
git submodule update --init
mkdir build
cd build && cmake ../ && make
```
@ -107,42 +109,31 @@ information.
Tests and benchmarks
====================
When you compile Snappy, snappy_unittest is compiled in addition to the
library itself. You do not need it to use the compressor from your own library,
but it contains several useful components for Snappy development.
When you compile Snappy, the following binaries are compiled in addition to the
library itself. You do not need them to use the compressor from your own
library, but they are useful for Snappy development.
First of all, it contains unit tests, verifying correctness on your machine in
various scenarios. If you want to change or optimize Snappy, please run the
tests to verify you have not broken anything. Note that if you have the
Google Test library installed, unit test behavior (especially failures) will be
significantly more user-friendly. You can find Google Test at
* `snappy_benchmark` contains microbenchmarks used to tune compression and
decompression performance.
* `snappy_unittests` contains unit tests, verifying correctness on your machine
in various scenarios.
* `snappy_test_tool` can benchmark Snappy against a few other compression
libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure
time. To benchmark using a given file, give the compression algorithm you want
to test Snappy against (e.g. --zlib) and then a list of one or more file names
on the command line.
https://github.com/google/googletest
If you want to change or optimize Snappy, please run the tests and benchmarks to
verify you have not broken anything.
You probably also want the gflags library for handling of command-line flags;
you can find it at
https://gflags.github.io/gflags/
In addition to the unit tests, snappy contains microbenchmarks used to
tune compression and decompression performance. These are automatically run
before the unit tests, but you can disable them using the flag
--run_microbenchmarks=false if you have gflags installed (otherwise you will
need to edit the source).
Finally, snappy can benchmark Snappy against a few other compression libraries
(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
To benchmark using a given file, give the compression algorithm you want to test
Snappy against (e.g. --zlib) and then a list of one or more file names on the
command line. The testdata/ directory contains the files used by the
microbenchmark, which should provide a reasonably balanced starting point for
benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
are used to verify correctness in the presence of corrupted data in the unit
test.)
The testdata/ directory contains the files used by the microbenchmarks, which
should provide a reasonably balanced starting point for benchmarking. (Note that
baddata[1-3].snappy are not intended as benchmarks; they are used to verify
correctness in the presence of corrupted data in the unit test.)
Contact
=======
Snappy is distributed through GitHub. For the latest version, a bug tracker,
and other information, see https://github.com/google/snappy.
Snappy is distributed through GitHub. For the latest version and other
information, see https://github.com/google/snappy.

View file

@ -2,49 +2,46 @@
#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
/* Define to 1 if the compiler supports __attribute__((always_inline)). */
#cmakedefine HAVE_ATTRIBUTE_ALWAYS_INLINE 1
#cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE
/* Define to 1 if the compiler supports __builtin_ctz and friends. */
#cmakedefine HAVE_BUILTIN_CTZ 1
#cmakedefine01 HAVE_BUILTIN_CTZ
/* Define to 1 if the compiler supports __builtin_expect. */
#cmakedefine HAVE_BUILTIN_EXPECT 1
#cmakedefine01 HAVE_BUILTIN_EXPECT
/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
#cmakedefine HAVE_FUNC_MMAP 1
#cmakedefine01 HAVE_FUNC_MMAP
/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
#cmakedefine HAVE_FUNC_SYSCONF 1
/* Define to 1 to use the gflags package for command-line parsing. */
#cmakedefine HAVE_GFLAGS 1
/* Define to 1 if you have Google Test. */
#cmakedefine HAVE_GTEST 1
#cmakedefine01 HAVE_FUNC_SYSCONF
/* Define to 1 if you have the `lzo2' library (-llzo2). */
#cmakedefine HAVE_LIBLZO2 1
#cmakedefine01 HAVE_LIBLZO2
/* Define to 1 if you have the `z' library (-lz). */
#cmakedefine HAVE_LIBZ 1
#cmakedefine01 HAVE_LIBZ
/* Define to 1 if you have the `lz4' library (-llz4). */
#cmakedefine01 HAVE_LIBLZ4
/* Define to 1 if you have the <sys/mman.h> header file. */
#cmakedefine HAVE_SYS_MMAN_H 1
#cmakedefine01 HAVE_SYS_MMAN_H
/* Define to 1 if you have the <sys/resource.h> header file. */
#cmakedefine HAVE_SYS_RESOURCE_H 1
#cmakedefine01 HAVE_SYS_RESOURCE_H
/* Define to 1 if you have the <sys/time.h> header file. */
#cmakedefine HAVE_SYS_TIME_H 1
#cmakedefine01 HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#cmakedefine HAVE_SYS_UIO_H 1
#cmakedefine01 HAVE_SYS_UIO_H
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine01 HAVE_UNISTD_H
/* Define to 1 if you have the <windows.h> header file. */
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine01 HAVE_WINDOWS_H
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
#cmakedefine01 SNAPPY_HAVE_SSSE3
@ -52,8 +49,11 @@
/* Define if you target processors with BMI2+ and have <bmi2intrin.h>. */
#cmakedefine SNAPPY_HAVE_BMI2
/* Define to 1 if you target processors with NEON and have <arm_neon.h>. */
#cmakedefine01 SNAPPY_HAVE_NEON
/* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1
#cmakedefine01 SNAPPY_IS_BIG_ENDIAN
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_

View file

@ -33,9 +33,84 @@
#include "snappy-stubs-internal.h"
#if SNAPPY_HAVE_SSSE3
// Please do not replace with <x86intrin.h> or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <emmintrin.h>
#include <tmmintrin.h>
#endif
#if SNAPPY_HAVE_NEON
#include <arm_neon.h>
#endif
#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
#else
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
#endif
namespace snappy {
namespace internal {
#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
#if SNAPPY_HAVE_SSSE3
using V128 = __m128i;
#elif SNAPPY_HAVE_NEON
using V128 = uint8x16_t;
#endif
// Load 128 bits of integer data. `src` must be 16-byte aligned.
inline V128 V128_Load(const V128* src);
// Load 128 bits of integer data. `src` does not need to be aligned.
inline V128 V128_LoadU(const V128* src);
// Store 128 bits of integer data. `dst` does not need to be aligned.
inline void V128_StoreU(V128* dst, V128 val);
// Shuffle packed 8-bit integers using a shuffle mask.
// Each packed integer in the shuffle mask must be in [0,16).
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);
// Constructs V128 with 16 chars |c|.
inline V128 V128_DupChar(char c);
#if SNAPPY_HAVE_SSSE3
inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
return _mm_shuffle_epi8(input, shuffle_mask);
}
inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }
#elif SNAPPY_HAVE_NEON
inline V128 V128_Load(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}
inline V128 V128_LoadU(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}
inline void V128_StoreU(V128* dst, V128 val) {
vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
}
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
return vqtbl1q_u8(input, shuffle_mask);
}
inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
#endif
#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
// Working memory performs a single allocation to hold all scratch space
// required for compression.
class WorkingMemory {
@ -95,8 +170,9 @@ char* CompressFragment(const char* input,
// loading from s2 + n.
//
// Separate implementation for 64-bit, little-endian cpus.
#if !defined(SNAPPY_IS_BIG_ENDIAN) && \
(defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
#if !SNAPPY_IS_BIG_ENDIAN && \
(defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
defined(ARCH_ARM))
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
const char* s2,
const char* s2_limit,
@ -274,7 +350,8 @@ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual o
// because of efficiency reasons:
// (1) Extracting a byte is faster than a bit-field
// (2) It properly aligns copy offset so we do not need a <<8
static const uint16_t char_table[256] = {
static constexpr uint16_t char_table[256] = {
// clang-format off
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
@ -306,7 +383,8 @@ static const uint16_t char_table[256] = {
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
// clang-format on
};
} // end namespace internal

View file

@ -146,10 +146,10 @@ class Source {
class ByteArraySource : public Source {
public:
ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
virtual ~ByteArraySource();
virtual size_t Available() const;
virtual const char* Peek(size_t* len);
virtual void Skip(size_t n);
~ByteArraySource() override;
size_t Available() const override;
const char* Peek(size_t* len) override;
void Skip(size_t n) override;
private:
const char* ptr_;
size_t left_;
@ -159,15 +159,15 @@ class ByteArraySource : public Source {
class UncheckedByteArraySink : public Sink {
public:
explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
virtual ~UncheckedByteArraySink();
virtual void Append(const char* data, size_t n);
virtual char* GetAppendBuffer(size_t len, char* scratch);
virtual char* GetAppendBufferVariable(
~UncheckedByteArraySink() override;
void Append(const char* data, size_t n) override;
char* GetAppendBuffer(size_t len, char* scratch) override;
char* GetAppendBufferVariable(
size_t min_size, size_t desired_size_hint, char* scratch,
size_t scratch_size, size_t* allocated_size);
virtual void AppendAndTakeOwnership(
size_t scratch_size, size_t* allocated_size) override;
void AppendAndTakeOwnership(
char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
void *deleter_arg);
void *deleter_arg) override;
// Return the current output pointer so that a caller can see how
// many bytes were produced.

View file

@ -31,7 +31,7 @@
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
#ifdef HAVE_CONFIG_H
#if HAVE_CONFIG_H
#include "config.h"
#endif
@ -43,11 +43,11 @@
#include <limits>
#include <string>
#ifdef HAVE_SYS_MMAN_H
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#ifdef HAVE_UNISTD_H
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
@ -70,9 +70,7 @@
#include "snappy-stubs-public.h"
// Used to enable 64-bit optimized versions of some routines.
#if defined(__x86_64__) || defined(_M_X64)
#define ARCH_K8 1
#elif defined(__PPC64__) || defined(__powerpc64__)
#if defined(__PPC64__) || defined(__powerpc64__)
#define ARCH_PPC 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define ARCH_ARM 1
@ -92,32 +90,33 @@
#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
// Static prediction hints.
#ifdef HAVE_BUILTIN_EXPECT
#if HAVE_BUILTIN_EXPECT
#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define SNAPPY_PREDICT_FALSE(x) x
#define SNAPPY_PREDICT_TRUE(x) x
#endif
#endif // HAVE_BUILTIN_EXPECT
// Inlining hints.
#ifdef HAVE_ATTRIBUTE_ALWAYS_INLINE
#if HAVE_ATTRIBUTE_ALWAYS_INLINE
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#else
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
#endif
#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE
// This is only used for recomputing the tag byte table used during
// decompression; for simplicity we just remove it from the open-source
// version (anyone who wants to regenerate it can just do the call
// themselves within main()).
#define DEFINE_bool(flag_name, default_value, description) \
bool FLAGS_ ## flag_name = default_value
#define DECLARE_bool(flag_name) \
extern bool FLAGS_ ## flag_name
// Stubbed version of ABSL_FLAG.
//
// In the open source version, flags can only be changed at compile time.
#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
flag_type FLAGS_ ## flag_name = default_value
namespace snappy {
// Stubbed version of absl::GetFlag().
template <typename T>
inline T GetFlag(T flag) { return flag; }
static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
static const int64_t kint64max = std::numeric_limits<int64_t>::max();
@ -236,11 +235,11 @@ class LittleEndian {
}
static inline constexpr bool IsLittleEndian() {
#if defined(SNAPPY_IS_BIG_ENDIAN)
#if SNAPPY_IS_BIG_ENDIAN
return false;
#else
return true;
#endif // defined(SNAPPY_IS_BIG_ENDIAN)
#endif // SNAPPY_IS_BIG_ENDIAN
}
};
@ -266,7 +265,7 @@ class Bits {
void operator=(const Bits&);
};
#if defined(HAVE_BUILTIN_CTZ)
#if HAVE_BUILTIN_CTZ
inline int Bits::Log2FloorNonZero(uint32_t n) {
assert(n != 0);
@ -355,7 +354,7 @@ inline int Bits::FindLSBSetNonZero(uint32_t n) {
#endif // End portable versions.
#if defined(HAVE_BUILTIN_CTZ)
#if HAVE_BUILTIN_CTZ
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
assert(n != 0);
@ -389,7 +388,7 @@ inline int Bits::FindLSBSetNonZero64(uint64_t n) {
}
}
#endif // End portable version.
#endif // HAVE_BUILTIN_CTZ
// Variable-length integer encoding.
class Varint {

View file

@ -28,23 +28,74 @@
//
// Various stubs for the unit tests for the open-source version of Snappy.
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_WINDOWS_H
// Needed to be able to use std::max without workarounds in the source code.
// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
#define NOMINMAX
#include <windows.h>
#endif
#include "snappy-test.h"
#include <algorithm>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <string>
DEFINE_bool(run_microbenchmarks, true,
"Run microbenchmarks before doing anything else.");
namespace file {
OptionsStub::OptionsStub() = default;
OptionsStub::~OptionsStub() = default;
const OptionsStub &Defaults() {
static OptionsStub defaults;
return defaults;
}
StatusStub::StatusStub() = default;
StatusStub::StatusStub(const StatusStub &) = default;
StatusStub &StatusStub::operator=(const StatusStub &) = default;
StatusStub::~StatusStub() = default;
bool StatusStub::ok() { return true; }
StatusStub GetContents(const std::string &filename, std::string *output,
const OptionsStub & /* options */) {
std::FILE *fp = std::fopen(filename.c_str(), "rb");
if (fp == nullptr) {
std::perror(filename.c_str());
std::exit(1);
}
output->clear();
while (!std::feof(fp)) {
char buffer[4096];
size_t bytes_read = std::fread(buffer, 1, sizeof(buffer), fp);
if (bytes_read == 0 && std::ferror(fp)) {
std::perror("fread");
std::exit(1);
}
output->append(buffer, bytes_read);
}
std::fclose(fp);
return StatusStub();
}
StatusStub SetContents(const std::string &file_name, const std::string &content,
const OptionsStub & /* options */) {
std::FILE *fp = std::fopen(file_name.c_str(), "wb");
if (fp == nullptr) {
std::perror(file_name.c_str());
std::exit(1);
}
size_t bytes_written = std::fwrite(content.data(), 1, content.size(), fp);
if (bytes_written != content.size()) {
std::perror("fwrite");
std::exit(1);
}
std::fclose(fp);
return StatusStub();
}
} // namespace file
namespace snappy {
@ -56,212 +107,51 @@ std::string ReadTestDataFile(const std::string& base, size_t size_limit) {
prefix = std::string(srcdir) + "/";
}
file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults()
).CheckSuccess();
).ok();
if (size_limit > 0) {
contents = contents.substr(0, size_limit);
}
return contents;
}
std::string ReadTestDataFile(const std::string& base) {
return ReadTestDataFile(base, 0);
}
std::string StrFormat(const char* format, ...) {
char buf[4096];
char buffer[4096];
std::va_list ap;
va_start(ap, format);
std::vsnprintf(buf, sizeof(buf), format, ap);
std::vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
return buf;
return buffer;
}
bool benchmark_running = false;
int64_t benchmark_real_time_us = 0;
int64_t benchmark_cpu_time_us = 0;
std::string* benchmark_label = nullptr;
int64_t benchmark_bytes_processed = 0;
LogMessage::~LogMessage() { std::cerr << std::endl; }
void ResetBenchmarkTiming() {
benchmark_real_time_us = 0;
benchmark_cpu_time_us = 0;
LogMessage &LogMessage::operator<<(const std::string &message) {
std::cerr << message;
return *this;
}
#ifdef WIN32
LARGE_INTEGER benchmark_start_real;
FILETIME benchmark_start_cpu;
#else // WIN32
struct timeval benchmark_start_real;
struct rusage benchmark_start_cpu;
#endif // WIN32
LogMessage &LogMessage::operator<<(int number) {
std::cerr << number;
return *this;
}
void StartBenchmarkTiming() {
#ifdef WIN32
QueryPerformanceCounter(&benchmark_start_real);
FILETIME dummy;
CHECK(GetProcessTimes(
GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_start_cpu));
#else
gettimeofday(&benchmark_start_real, NULL);
if (getrusage(RUSAGE_SELF, &benchmark_start_cpu) == -1) {
std::perror("getrusage(RUSAGE_SELF)");
std::exit(1);
}
#ifdef _MSC_VER
// ~LogMessageCrash calls std::abort() and therefore never exits. This is by
// design, so temporarily disable warning C4722.
#pragma warning(push)
#pragma warning(disable : 4722)
#endif
benchmark_running = true;
LogMessageCrash::~LogMessageCrash() {
std::cerr << std::endl;
std::abort();
}
void StopBenchmarkTiming() {
if (!benchmark_running) {
return;
}
#ifdef WIN32
LARGE_INTEGER benchmark_stop_real;
LARGE_INTEGER benchmark_frequency;
QueryPerformanceCounter(&benchmark_stop_real);
QueryPerformanceFrequency(&benchmark_frequency);
double elapsed_real = static_cast<double>(
benchmark_stop_real.QuadPart - benchmark_start_real.QuadPart) /
benchmark_frequency.QuadPart;
benchmark_real_time_us += elapsed_real * 1e6 + 0.5;
FILETIME benchmark_stop_cpu, dummy;
CHECK(GetProcessTimes(
GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_stop_cpu));
ULARGE_INTEGER start_ulargeint;
start_ulargeint.LowPart = benchmark_start_cpu.dwLowDateTime;
start_ulargeint.HighPart = benchmark_start_cpu.dwHighDateTime;
ULARGE_INTEGER stop_ulargeint;
stop_ulargeint.LowPart = benchmark_stop_cpu.dwLowDateTime;
stop_ulargeint.HighPart = benchmark_stop_cpu.dwHighDateTime;
benchmark_cpu_time_us +=
(stop_ulargeint.QuadPart - start_ulargeint.QuadPart + 5) / 10;
#else // WIN32
struct timeval benchmark_stop_real;
gettimeofday(&benchmark_stop_real, NULL);
benchmark_real_time_us +=
1000000 * (benchmark_stop_real.tv_sec - benchmark_start_real.tv_sec);
benchmark_real_time_us +=
(benchmark_stop_real.tv_usec - benchmark_start_real.tv_usec);
struct rusage benchmark_stop_cpu;
if (getrusage(RUSAGE_SELF, &benchmark_stop_cpu) == -1) {
std::perror("getrusage(RUSAGE_SELF)");
std::exit(1);
}
benchmark_cpu_time_us += 1000000 * (benchmark_stop_cpu.ru_utime.tv_sec -
benchmark_start_cpu.ru_utime.tv_sec);
benchmark_cpu_time_us += (benchmark_stop_cpu.ru_utime.tv_usec -
benchmark_start_cpu.ru_utime.tv_usec);
#endif // WIN32
benchmark_running = false;
}
void SetBenchmarkLabel(const std::string& str) {
if (benchmark_label) {
delete benchmark_label;
}
benchmark_label = new std::string(str);
}
void SetBenchmarkBytesProcessed(int64_t bytes) {
benchmark_bytes_processed = bytes;
}
struct BenchmarkRun {
int64_t real_time_us;
int64_t cpu_time_us;
};
struct BenchmarkCompareCPUTime {
bool operator() (const BenchmarkRun& a, const BenchmarkRun& b) const {
return a.cpu_time_us < b.cpu_time_us;
}
};
void Benchmark::Run() {
for (int test_case_num = start_; test_case_num <= stop_; ++test_case_num) {
// Run a few iterations first to find out approximately how fast
// the benchmark is.
const int kCalibrateIterations = 100;
ResetBenchmarkTiming();
StartBenchmarkTiming();
(*function_)(kCalibrateIterations, test_case_num);
StopBenchmarkTiming();
// Let each test case run for about 200ms, but at least as many
// as we used to calibrate.
// Run five times and pick the median.
const int kNumRuns = 5;
const int kMedianPos = kNumRuns / 2;
int num_iterations = 0;
if (benchmark_real_time_us > 0) {
num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
}
num_iterations = std::max(num_iterations, kCalibrateIterations);
BenchmarkRun benchmark_runs[kNumRuns];
for (int run = 0; run < kNumRuns; ++run) {
ResetBenchmarkTiming();
StartBenchmarkTiming();
(*function_)(num_iterations, test_case_num);
StopBenchmarkTiming();
benchmark_runs[run].real_time_us = benchmark_real_time_us;
benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us;
}
std::string heading = StrFormat("%s/%d", name_.c_str(), test_case_num);
std::string human_readable_speed;
std::nth_element(benchmark_runs,
benchmark_runs + kMedianPos,
benchmark_runs + kNumRuns,
BenchmarkCompareCPUTime());
int64_t real_time_us = benchmark_runs[kMedianPos].real_time_us;
int64_t cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
if (cpu_time_us <= 0) {
human_readable_speed = "?";
} else {
int64_t bytes_per_second =
benchmark_bytes_processed * 1000000 / cpu_time_us;
if (bytes_per_second < 1024) {
human_readable_speed =
StrFormat("%dB/s", static_cast<int>(bytes_per_second));
} else if (bytes_per_second < 1024 * 1024) {
human_readable_speed = StrFormat(
"%.1fkB/s", bytes_per_second / 1024.0f);
} else if (bytes_per_second < 1024 * 1024 * 1024) {
human_readable_speed = StrFormat(
"%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f));
} else {
human_readable_speed = StrFormat(
"%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f));
}
}
std::fprintf(stderr,
#ifdef WIN32
"%-18s %10I64d %10I64d %10d %s %s\n",
#else
"%-18s %10lld %10lld %10d %s %s\n",
#ifdef _MSC_VER
#pragma warning(pop)
#endif
heading.c_str(),
static_cast<long long>(real_time_us * 1000 / num_iterations),
static_cast<long long>(cpu_time_us * 1000 / num_iterations),
num_iterations,
human_readable_speed.c_str(),
benchmark_label->c_str());
}
}
#ifdef HAVE_LIBZ
#if HAVE_LIBZ
ZLib::ZLib()
: comp_init_(false),

View file

@ -31,159 +31,90 @@
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
#include <cstdarg>
#include <cstdio>
#include <iostream>
#include <string>
#if HAVE_CONFIG_H
#include "config.h"
#endif
#include "snappy-stubs-internal.h"
#ifdef HAVE_SYS_MMAN_H
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#ifdef HAVE_SYS_RESOURCE_H
#if HAVE_SYS_RESOURCE_H
#include <sys/resource.h>
#endif
#ifdef HAVE_SYS_TIME_H
#if HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_WINDOWS_H
#if HAVE_WINDOWS_H
// Needed to be able to use std::max without workarounds in the source code.
// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
#define NOMINMAX
#include <windows.h>
#endif
#ifdef HAVE_GTEST
#define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0))
#include <gtest/gtest.h>
#undef TYPED_TEST
#define TYPED_TEST TEST
#define INIT_GTEST(argc, argv) ::testing::InitGoogleTest(argc, *argv)
#else
// Stubs for if the user doesn't have Google Test installed.
#define TEST(test_case, test_subcase) \
void Test_ ## test_case ## _ ## test_subcase()
#define INIT_GTEST(argc, argv)
#define TYPED_TEST TEST
#define EXPECT_EQ CHECK_EQ
#define EXPECT_NE CHECK_NE
#define EXPECT_FALSE(cond) CHECK(!(cond))
#endif
#ifdef HAVE_GFLAGS
#include <gflags/gflags.h>
// This is tricky; both gflags and Google Test want to look at the command line
// arguments. Google Test seems to be the most happy with unknown arguments,
// though, so we call it first and hope for the best.
#define InitGoogle(argv0, argc, argv, remove_flags) \
INIT_GTEST(argc, argv); \
google::ParseCommandLineFlags(argc, argv, remove_flags);
#else
// If we don't have the gflags package installed, these can only be
// changed at compile time.
#define DEFINE_int32(flag_name, default_value, description) \
static int FLAGS_ ## flag_name = default_value;
#define InitGoogle(argv0, argc, argv, remove_flags) \
INIT_GTEST(argc, argv)
#endif
#ifdef HAVE_LIBZ
#if HAVE_LIBZ
#include "zlib.h"
#endif
#ifdef HAVE_LIBLZO2
#if HAVE_LIBLZO2
#include "lzo/lzo1x.h"
#endif
namespace {
#if HAVE_LIBLZ4
#include "lz4.h"
#endif
namespace file {
int Defaults() { return 0; }
class DummyStatus {
public:
void CheckSuccess() { }
};
// Stubs the class file::Options.
//
// This class should not be instantiated explicitly. It should only be used by
// passing file::Defaults() to file::GetContents() / file::SetContents().
class OptionsStub {
public:
OptionsStub();
OptionsStub(const OptionsStub &) = delete;
OptionsStub &operator=(const OptionsStub &) = delete;
~OptionsStub();
};
DummyStatus GetContents(
const std::string& filename, std::string* data, int /*unused*/) {
FILE* fp = std::fopen(filename.c_str(), "rb");
if (fp == NULL) {
std::perror(filename.c_str());
std::exit(1);
}
const OptionsStub &Defaults();
data->clear();
while (!feof(fp)) {
char buf[4096];
size_t ret = fread(buf, 1, 4096, fp);
if (ret == 0 && ferror(fp)) {
std::perror("fread");
std::exit(1);
}
data->append(std::string(buf, ret));
}
// Stubs the class absl::Status.
//
// This class should not be instantiated explicitly. It should only be used by
// passing the result of file::GetContents() / file::SetContents() to
// CHECK_OK().
class StatusStub {
public:
StatusStub();
StatusStub(const StatusStub &);
StatusStub &operator=(const StatusStub &);
~StatusStub();
std::fclose(fp);
bool ok();
};
return DummyStatus();
}
StatusStub GetContents(const std::string &file_name, std::string *output,
const OptionsStub & /* options */);
inline DummyStatus SetContents(
const std::string& filename, const std::string& str, int /*unused*/) {
FILE* fp = std::fopen(filename.c_str(), "wb");
if (fp == NULL) {
std::perror(filename.c_str());
std::exit(1);
}
StatusStub SetContents(const std::string &file_name, const std::string &content,
const OptionsStub & /* options */);
int ret = std::fwrite(str.data(), str.size(), 1, fp);
if (ret != 1) {
std::perror("fwrite");
std::exit(1);
}
std::fclose(fp);
return DummyStatus();
}
} // namespace file
} // namespace
namespace snappy {
#define FLAGS_test_random_seed 301
using TypeParam = std::string;
void Test_CorruptedTest_VerifyCorrupted();
void Test_Snappy_SimpleTests();
void Test_Snappy_MaxBlowup();
void Test_Snappy_RandomData();
void Test_Snappy_FourByteOffset();
void Test_SnappyCorruption_TruncatedVarint();
void Test_SnappyCorruption_UnterminatedVarint();
void Test_SnappyCorruption_OverflowingVarint();
void Test_Snappy_ReadPastEndOfBuffer();
void Test_Snappy_FindMatchLength();
void Test_Snappy_FindMatchLengthRandom();
std::string ReadTestDataFile(const std::string& base, size_t size_limit);
std::string ReadTestDataFile(const std::string& base);
// A std::sprintf() variant that returns a std::string.
// Not safe for general use due to truncation issues.
std::string StrFormat(const char* format, ...);
@ -192,17 +123,18 @@ std::string StrFormat(const char* format, ...);
// system time changing.
class CycleTimer {
public:
CycleTimer() : real_time_us_(0) {}
inline CycleTimer() : real_time_us_(0) {}
inline ~CycleTimer() = default;
void Start() {
inline void Start() {
#ifdef WIN32
QueryPerformanceCounter(&start_);
#else
gettimeofday(&start_, NULL);
::gettimeofday(&start_, nullptr);
#endif
}
void Stop() {
inline void Stop() {
#ifdef WIN32
LARGE_INTEGER stop;
LARGE_INTEGER frequency;
@ -213,67 +145,78 @@ class CycleTimer {
frequency.QuadPart;
real_time_us_ += elapsed * 1e6 + 0.5;
#else
struct timeval stop;
gettimeofday(&stop, NULL);
struct ::timeval stop;
::gettimeofday(&stop, nullptr);
real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec);
real_time_us_ += (stop.tv_usec - start_.tv_usec);
#endif
}
double Get() {
return real_time_us_ * 1e-6;
}
inline double Get() { return real_time_us_ * 1e-6; }
private:
int64_t real_time_us_;
#ifdef WIN32
LARGE_INTEGER start_;
#else
struct timeval start_;
struct ::timeval start_;
#endif
};
// Minimalistic microbenchmark framework.
// Logging.
typedef void (*BenchmarkFunction)(int, int);
class Benchmark {
class LogMessage {
public:
Benchmark(const std::string& name, BenchmarkFunction function)
: name_(name), function_(function) {}
inline LogMessage() = default;
~LogMessage();
Benchmark* DenseRange(int start, int stop) {
start_ = start;
stop_ = stop;
return this;
}
void Run();
private:
const std::string name_;
const BenchmarkFunction function_;
int start_, stop_;
LogMessage &operator<<(const std::string &message);
LogMessage &operator<<(int number);
};
#define BENCHMARK(benchmark_name) \
Benchmark* Benchmark_ ## benchmark_name = \
(new Benchmark(#benchmark_name, benchmark_name))
extern Benchmark* Benchmark_BM_UFlat;
extern Benchmark* Benchmark_BM_UIOVec;
extern Benchmark* Benchmark_BM_UValidate;
extern Benchmark* Benchmark_BM_ZFlat;
extern Benchmark* Benchmark_BM_ZFlatAll;
extern Benchmark* Benchmark_BM_ZFlatIncreasingTableSize;
class LogMessageCrash : public LogMessage {
public:
inline LogMessageCrash() = default;
~LogMessageCrash();
};
void ResetBenchmarkTiming();
void StartBenchmarkTiming();
void StopBenchmarkTiming();
void SetBenchmarkLabel(const std::string& str);
void SetBenchmarkBytesProcessed(int64_t bytes);
// This class is used to explicitly ignore values in the conditional
// logging macros. This avoids compiler warnings like "value computed
// is not used" and "statement has no effect".
#ifdef HAVE_LIBZ
class LogMessageVoidify {
public:
inline LogMessageVoidify() = default;
inline ~LogMessageVoidify() = default;
// This has to be an operator with a precedence lower than << but
// higher than ?:
inline void operator&(const LogMessage &) {}
};
// Asserts, both versions activated in debug mode only,
// and ones that are always active.
#define CRASH_UNLESS(condition) \
SNAPPY_PREDICT_TRUE(condition) \
? (void)0 \
: snappy::LogMessageVoidify() & snappy::LogMessageCrash()
#define LOG(level) LogMessage()
#define VLOG(level) \
true ? (void)0 : snappy::LogMessageVoidify() & snappy::LogMessage()
#define CHECK(cond) CRASH_UNLESS(cond)
#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
#define CHECK_OK(cond) (cond).ok()
#if HAVE_LIBZ
// Object-oriented wrapper around zlib.
class ZLib {
@ -396,131 +339,4 @@ class ZLib {
} // namespace snappy
DECLARE_bool(run_microbenchmarks);
static inline void RunSpecifiedBenchmarks() {
if (!FLAGS_run_microbenchmarks) {
return;
}
std::fprintf(stderr, "Running microbenchmarks.\n");
#ifndef NDEBUG
std::fprintf(stderr,
"WARNING: Compiled with assertions enabled, will be slow.\n");
#endif
#ifndef __OPTIMIZE__
std::fprintf(stderr,
"WARNING: Compiled without optimization, will be slow.\n");
#endif
std::fprintf(stderr, "Benchmark Time(ns) CPU(ns) Iterations\n");
std::fprintf(stderr, "---------------------------------------------------\n");
snappy::Benchmark_BM_UFlat->Run();
snappy::Benchmark_BM_UIOVec->Run();
snappy::Benchmark_BM_UValidate->Run();
snappy::Benchmark_BM_ZFlat->Run();
snappy::Benchmark_BM_ZFlatAll->Run();
snappy::Benchmark_BM_ZFlatIncreasingTableSize->Run();
std::fprintf(stderr, "\n");
}
#ifndef HAVE_GTEST
static inline int RUN_ALL_TESTS() {
std::fprintf(stderr, "Running correctness tests.\n");
snappy::Test_CorruptedTest_VerifyCorrupted();
snappy::Test_Snappy_SimpleTests();
snappy::Test_Snappy_MaxBlowup();
snappy::Test_Snappy_RandomData();
snappy::Test_Snappy_FourByteOffset();
snappy::Test_SnappyCorruption_TruncatedVarint();
snappy::Test_SnappyCorruption_UnterminatedVarint();
snappy::Test_SnappyCorruption_OverflowingVarint();
snappy::Test_Snappy_ReadPastEndOfBuffer();
snappy::Test_Snappy_FindMatchLength();
snappy::Test_Snappy_FindMatchLengthRandom();
std::fprintf(stderr, "All tests passed.\n");
return 0;
}
#endif // HAVE_GTEST
// For main().
namespace snappy {
// Logging.
#define LOG(level) LogMessage()
#define VLOG(level) true ? (void)0 : \
snappy::LogMessageVoidify() & snappy::LogMessage()
class LogMessage {
public:
LogMessage() { }
~LogMessage() {
std::cerr << std::endl;
}
LogMessage& operator<<(const std::string& msg) {
std::cerr << msg;
return *this;
}
LogMessage& operator<<(int x) {
std::cerr << x;
return *this;
}
};
// Asserts, both versions activated in debug mode only,
// and ones that are always active.
#define CRASH_UNLESS(condition) \
SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \
snappy::LogMessageVoidify() & snappy::LogMessageCrash()
#ifdef _MSC_VER
// ~LogMessageCrash calls std::abort() and therefore never exits. This is by
// design, so temporarily disable warning C4722.
#pragma warning(push)
#pragma warning(disable:4722)
#endif
class LogMessageCrash : public LogMessage {
public:
LogMessageCrash() { }
~LogMessageCrash() {
std::cerr << std::endl;
std::abort();
}
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
// This class is used to explicitly ignore values in the conditional
// logging macros. This avoids compiler warnings like "value computed
// is not used" and "statement has no effect".
class LogMessageVoidify {
public:
LogMessageVoidify() { }
// This has to be an operator with a precedence lower than << but
// higher than ?:
void operator&(const LogMessage&) { }
};
#define CHECK(cond) CRASH_UNLESS(cond)
#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
#define CHECK_OK(cond) (cond).CheckSuccess()
} // namespace snappy
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_

792
snappy.cc

File diff suppressed because it is too large Load diff

330
snappy_benchmark.cc Normal file
View file

@ -0,0 +1,330 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include "snappy-test.h"
#include "benchmark/benchmark.h"
#include "snappy-internal.h"
#include "snappy-sinksource.h"
#include "snappy.h"
#include "snappy_test_data.h"
namespace snappy {
namespace {
void BM_UFlat(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
std::string contents =
ReadTestDataFile(kTestDataFiles[file_index].filename,
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
char* dst = new char[contents.size()];
for (auto s : state) {
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
benchmark::DoNotOptimize(dst);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(contents.size()));
state.SetLabel(kTestDataFiles[file_index].label);
delete[] dst;
}
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
struct SourceFiles {
SourceFiles() {
for (int i = 0; i < kFiles; i++) {
std::string contents = ReadTestDataFile(kTestDataFiles[i].filename,
kTestDataFiles[i].size_limit);
max_size = std::max(max_size, contents.size());
sizes[i] = contents.size();
snappy::Compress(contents.data(), contents.size(), &zcontents[i]);
}
}
static constexpr int kFiles = ARRAYSIZE(kTestDataFiles);
std::string zcontents[kFiles];
size_t sizes[kFiles];
size_t max_size = 0;
};
void BM_UFlatMedley(benchmark::State& state) {
static const SourceFiles* const source = new SourceFiles();
std::vector<char> dst(source->max_size);
for (auto s : state) {
for (int i = 0; i < SourceFiles::kFiles; i++) {
CHECK(snappy::RawUncompress(source->zcontents[i].data(),
source->zcontents[i].size(), dst.data()));
benchmark::DoNotOptimize(dst);
}
}
int64_t source_sizes = 0;
for (int i = 0; i < SourceFiles::kFiles; i++) {
source_sizes += static_cast<int64_t>(source->sizes[i]);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
source_sizes);
}
BENCHMARK(BM_UFlatMedley);
void BM_UValidate(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
std::string contents =
ReadTestDataFile(kTestDataFiles[file_index].filename,
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
for (auto s : state) {
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(contents.size()));
state.SetLabel(kTestDataFiles[file_index].label);
}
BENCHMARK(BM_UValidate)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
void BM_UValidateMedley(benchmark::State& state) {
static const SourceFiles* const source = new SourceFiles();
for (auto s : state) {
for (int i = 0; i < SourceFiles::kFiles; i++) {
CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(),
source->zcontents[i].size()));
}
}
int64_t source_sizes = 0;
for (int i = 0; i < SourceFiles::kFiles; i++) {
source_sizes += static_cast<int64_t>(source->sizes[i]);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
source_sizes);
}
BENCHMARK(BM_UValidateMedley);
void BM_UIOVec(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
std::string contents =
ReadTestDataFile(kTestDataFiles[file_index].filename,
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
// Uncompress into an iovec containing ten entries.
const int kNumEntries = 10;
struct iovec iov[kNumEntries];
char *dst = new char[contents.size()];
size_t used_so_far = 0;
for (int i = 0; i < kNumEntries; ++i) {
iov[i].iov_base = dst + used_so_far;
if (used_so_far == contents.size()) {
iov[i].iov_len = 0;
continue;
}
if (i == kNumEntries - 1) {
iov[i].iov_len = contents.size() - used_so_far;
} else {
iov[i].iov_len = contents.size() / kNumEntries;
}
used_so_far += iov[i].iov_len;
}
for (auto s : state) {
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
kNumEntries));
benchmark::DoNotOptimize(iov);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(contents.size()));
state.SetLabel(kTestDataFiles[file_index].label);
delete[] dst;
}
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
void BM_UFlatSink(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
std::string contents =
ReadTestDataFile(kTestDataFiles[file_index].filename,
kTestDataFiles[file_index].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
char* dst = new char[contents.size()];
for (auto s : state) {
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
snappy::UncheckedByteArraySink sink(dst);
CHECK(snappy::Uncompress(&source, &sink));
benchmark::DoNotOptimize(sink);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(contents.size()));
state.SetLabel(kTestDataFiles[file_index].label);
std::string s(dst, contents.size());
CHECK_EQ(contents, s);
delete[] dst;
}
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
void BM_ZFlat(benchmark::State& state) {
// Pick file to process based on state.range(0).
int file_index = state.range(0);
CHECK_GE(file_index, 0);
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
std::string contents =
ReadTestDataFile(kTestDataFiles[file_index].filename,
kTestDataFiles[file_index].size_limit);
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
size_t zsize = 0;
for (auto s : state) {
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
benchmark::DoNotOptimize(dst);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(contents.size()));
const double compression_ratio =
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
100.0 * compression_ratio));
VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
kTestDataFiles[file_index].label, contents.size(),
zsize);
delete[] dst;
}
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
void BM_ZFlatAll(benchmark::State& state) {
const int num_files = ARRAYSIZE(kTestDataFiles);
std::vector<std::string> contents(num_files);
std::vector<char*> dst(num_files);
int64_t total_contents_size = 0;
for (int i = 0; i < num_files; ++i) {
contents[i] = ReadTestDataFile(kTestDataFiles[i].filename,
kTestDataFiles[i].size_limit);
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
total_contents_size += contents[i].size();
}
size_t zsize = 0;
for (auto s : state) {
for (int i = 0; i < num_files; ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
benchmark::DoNotOptimize(dst);
}
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
total_contents_size);
for (char* dst_item : dst) {
delete[] dst_item;
}
state.SetLabel(StrFormat("%d kTestDataFiles", num_files));
}
BENCHMARK(BM_ZFlatAll);
void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
CHECK_GT(ARRAYSIZE(kTestDataFiles), 0);
const std::string base_content = ReadTestDataFile(
kTestDataFiles[0].filename, kTestDataFiles[0].size_limit);
std::vector<std::string> contents;
std::vector<char*> dst;
int64_t total_contents_size = 0;
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
++table_bits) {
std::string content = base_content;
content.resize(1 << table_bits);
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
total_contents_size += content.size();
contents.push_back(std::move(content));
}
size_t zsize = 0;
for (auto s : state) {
for (size_t i = 0; i < contents.size(); ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
benchmark::DoNotOptimize(dst);
}
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
total_contents_size);
for (char* dst_item : dst) {
delete[] dst_item;
}
state.SetLabel(StrFormat("%d tables", contents.size()));
}
BENCHMARK(BM_ZFlatIncreasingTableSize);
} // namespace
} // namespace snappy

57
snappy_test_data.cc Normal file
View file

@ -0,0 +1,57 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Support code for reading test data.
#include "snappy_test_data.h"
#include <cstddef>
#include <cstdlib>
#include <string>
#include "snappy-test.h"
namespace snappy {
std::string ReadTestDataFile(const char* base, size_t size_limit) {
std::string srcdir;
const char* srcdir_env = std::getenv("srcdir"); // This is set by Automake.
if (srcdir_env) {
srcdir = std::string(srcdir_env) + "/";
}
std::string contents;
CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents,
file::Defaults()));
if (size_limit > 0) {
contents = contents.substr(0, size_limit);
}
return contents;
}
} // namespace snappy

68
snappy_test_data.h Normal file
View file

@ -0,0 +1,68 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// List of test case files.
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
#define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
#include <cstddef>
#include <string>
namespace snappy {
std::string ReadTestDataFile(const char* base, size_t size_limit);
// TODO: Replace anonymous namespace with inline variable when we can
// rely on C++17.
namespace {
constexpr struct {
const char* label;
const char* filename;
size_t size_limit;
} kTestDataFiles[] = {
{ "html", "html", 0 },
{ "urls", "urls.10K", 0 },
{ "jpg", "fireworks.jpeg", 0 },
{ "jpg_200", "fireworks.jpeg", 200 },
{ "pdf", "paper-100k.pdf", 0 },
{ "html4", "html_x_4", 0 },
{ "txt1", "alice29.txt", 0 },
{ "txt2", "asyoulik.txt", 0 },
{ "txt3", "lcet10.txt", 0 },
{ "txt4", "plrabn12.txt", 0 },
{ "pb", "geo.protodata", 0 },
{ "gaviota", "kppkn.gtb", 0 },
};
} // namespace
} // namespace snappy
#endif // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__

471
snappy_test_tool.cc Normal file
View file

@ -0,0 +1,471 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "snappy-test.h"
#include "snappy-internal.h"
#include "snappy-sinksource.h"
#include "snappy.h"
#include "snappy_test_data.h"
SNAPPY_FLAG(int32_t, start_len, -1,
"Starting prefix size for testing (-1: just full file contents)");
SNAPPY_FLAG(int32_t, end_len, -1,
"Starting prefix size for testing (-1: just full file contents)");
SNAPPY_FLAG(int32_t, bytes, 10485760,
"How many bytes to compress/uncompress per file for timing");
SNAPPY_FLAG(bool, zlib, true,
"Run zlib compression (http://www.zlib.net)");
SNAPPY_FLAG(bool, lzo, true,
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
SNAPPY_FLAG(bool, lz4, true,
"Run LZ4 compression (https://github.com/lz4/lz4)");
SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
SNAPPY_FLAG(bool, write_compressed, false,
"Write compressed versions of each file to <file>.comp");
SNAPPY_FLAG(bool, write_uncompressed, false,
"Write uncompressed versions of each file to <file>.uncomp");
namespace snappy {
namespace {
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
// To test against code that reads beyond its input, this class copies a
// string to a newly allocated group of pages, the last of which
// is made unreadable via mprotect. Note that we need to allocate the
// memory with mmap(), as POSIX allows mprotect() only on memory allocated
// with mmap(), and some malloc/posix_memalign implementations expect to
// be able to read previously allocated memory while doing heap allocations.
class DataEndingAtUnreadablePage {
public:
explicit DataEndingAtUnreadablePage(const std::string& s) {
const size_t page_size = sysconf(_SC_PAGESIZE);
const size_t size = s.size();
// Round up space for string to a multiple of page_size.
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
alloc_size_ = space_for_string + page_size;
mem_ = mmap(NULL, alloc_size_,
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
CHECK_NE(MAP_FAILED, mem_);
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
char* dst = protected_page_ - size;
std::memcpy(dst, s.data(), size);
data_ = dst;
size_ = size;
// Make guard page unreadable.
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
}
~DataEndingAtUnreadablePage() {
const size_t page_size = sysconf(_SC_PAGESIZE);
// Undo the mprotect.
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
CHECK_EQ(0, munmap(mem_, alloc_size_));
}
const char* data() const { return data_; }
size_t size() const { return size_; }
private:
size_t alloc_size_;
void* mem_;
char* protected_page_;
const char* data_;
size_t size_;
};
#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
// Fallback for systems without mmap.
using DataEndingAtUnreadablePage = std::string;
#endif
enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB:
return ZLib::MinCompressbufSize(input_size);
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO:
return input_size + input_size/64 + 16 + 3;
#endif // LZO_VERSION
#ifdef LZ4_VERSION_NUMBER
case LZ4:
return LZ4_compressBound(input_size);
#endif // LZ4_VERSION_NUMBER
case SNAPPY:
return snappy::MaxCompressedLength(input_size);
default:
LOG(FATAL) << "Unknown compression type number " << comp;
return 0;
}
}
// Returns true if we successfully compressed, false otherwise.
//
// If compressed_is_preallocated is set, do not resize the compressed buffer.
// This is typically what you want for a benchmark, in order to not spend
// time in the memory allocator. If you do set this flag, however,
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
// number of bytes, and may contain junk bytes at the end after return.
bool Compress(const char* input, size_t input_size, CompressorType comp,
std::string* compressed, bool compressed_is_preallocated) {
if (!compressed_is_preallocated) {
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
}
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB: {
ZLib zlib;
uLongf destlen = compressed->size();
int ret = zlib.Compress(
reinterpret_cast<Bytef*>(string_as_array(compressed)),
&destlen,
reinterpret_cast<const Bytef*>(input),
input_size);
CHECK_EQ(Z_OK, ret);
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
return true;
}
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO: {
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
lzo_uint destlen;
int ret = lzo1x_1_15_compress(
reinterpret_cast<const uint8_t*>(input),
input_size,
reinterpret_cast<uint8_t*>(string_as_array(compressed)),
&destlen,
mem);
CHECK_EQ(LZO_E_OK, ret);
delete[] mem;
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
break;
}
#endif // LZO_VERSION
#ifdef LZ4_VERSION_NUMBER
case LZ4: {
int destlen = compressed->size();
destlen = LZ4_compress_default(input, string_as_array(compressed),
input_size, destlen);
CHECK_NE(destlen, 0);
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
break;
}
#endif // LZ4_VERSION_NUMBER
case SNAPPY: {
size_t destlen;
snappy::RawCompress(input, input_size,
string_as_array(compressed),
&destlen);
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
break;
}
default: {
return false; // the asked-for library wasn't compiled in
}
}
return true;
}
bool Uncompress(const std::string& compressed, CompressorType comp, int size,
std::string* output) {
// TODO: Switch to [[maybe_unused]] when we can assume C++17.
(void)size;
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB: {
output->resize(size);
ZLib zlib;
uLongf destlen = output->size();
int ret = zlib.Uncompress(
reinterpret_cast<Bytef*>(string_as_array(output)),
&destlen,
reinterpret_cast<const Bytef*>(compressed.data()),
compressed.size());
CHECK_EQ(Z_OK, ret);
CHECK_EQ(static_cast<uLongf>(size), destlen);
break;
}
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO: {
output->resize(size);
lzo_uint destlen;
int ret = lzo1x_decompress(
reinterpret_cast<const uint8_t*>(compressed.data()),
compressed.size(),
reinterpret_cast<uint8_t*>(string_as_array(output)),
&destlen,
NULL);
CHECK_EQ(LZO_E_OK, ret);
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
break;
}
#endif // LZO_VERSION
#ifdef LZ4_VERSION_NUMBER
case LZ4: {
output->resize(size);
int destlen = output->size();
destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
compressed.size(), destlen);
CHECK_NE(destlen, 0);
CHECK_EQ(size, destlen);
break;
}
#endif // LZ4_VERSION_NUMBER
case SNAPPY: {
snappy::RawUncompress(compressed.data(), compressed.size(),
string_as_array(output));
break;
}
default: {
return false; // the asked-for library wasn't compiled in
}
}
return true;
}
void Measure(const char* data, size_t length, CompressorType comp, int repeats,
int block_size) {
// Run tests a few time and pick median running times
static const int kRuns = 5;
double ctime[kRuns];
double utime[kRuns];
int compressed_size = 0;
{
// Chop the input into blocks
int num_blocks = (length + block_size - 1) / block_size;
std::vector<const char*> input(num_blocks);
std::vector<size_t> input_length(num_blocks);
std::vector<std::string> compressed(num_blocks);
std::vector<std::string> output(num_blocks);
for (int b = 0; b < num_blocks; ++b) {
int input_start = b * block_size;
int input_limit = std::min<int>((b+1)*block_size, length);
input[b] = data+input_start;
input_length[b] = input_limit-input_start;
}
// Pre-grow the output buffers so we don't measure string append time.
for (std::string& compressed_block : compressed) {
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
}
// First, try one trial compression to make sure the code is compiled in
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
LOG(WARNING) << "Skipping " << names[comp] << ": "
<< "library not compiled in";
return;
}
for (int run = 0; run < kRuns; ++run) {
CycleTimer ctimer, utimer;
// Pre-grow the output buffers so we don't measure string append time.
for (std::string& compressed_block : compressed) {
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
}
ctimer.Start();
for (int b = 0; b < num_blocks; ++b) {
for (int i = 0; i < repeats; ++i)
Compress(input[b], input_length[b], comp, &compressed[b], true);
}
ctimer.Stop();
// Compress once more, with resizing, so we don't leave junk
// at the end that will confuse the decompressor.
for (int b = 0; b < num_blocks; ++b) {
Compress(input[b], input_length[b], comp, &compressed[b], false);
}
for (int b = 0; b < num_blocks; ++b) {
output[b].resize(input_length[b]);
}
utimer.Start();
for (int i = 0; i < repeats; ++i) {
for (int b = 0; b < num_blocks; ++b)
Uncompress(compressed[b], comp, input_length[b], &output[b]);
}
utimer.Stop();
ctime[run] = ctimer.Get();
utime[run] = utimer.Get();
}
compressed_size = 0;
for (const std::string& compressed_item : compressed) {
compressed_size += compressed_item.size();
}
}
std::sort(ctime, ctime + kRuns);
std::sort(utime, utime + kRuns);
const int med = kRuns/2;
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
std::string x = names[comp];
x += ":";
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
: std::string("?");
std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
"comp %5.1f MB/s uncomp %5s MB/s\n",
x.c_str(),
block_size/(1<<20),
static_cast<int>(length), static_cast<uint32_t>(compressed_size),
(compressed_size * 100.0) / std::max<int>(1, length),
comp_rate,
urate.c_str());
}
void CompressFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
std::string compressed;
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
file::Defaults()));
}
void UncompressFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
size_t uncompLength;
CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
&uncompLength));
std::string uncompressed;
uncompressed.resize(uncompLength);
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
file::Defaults()));
}
void MeasureFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
std::printf("%-40s :\n", fname);
int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
? fullinput.size()
: snappy::GetFlag(FLAGS_start_len);
int end_len = fullinput.size();
if (snappy::GetFlag(FLAGS_end_len) >= 0) {
end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
}
for (int len = start_len; len <= end_len; ++len) {
const char* const input = fullinput.data();
int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
if (snappy::GetFlag(FLAGS_zlib))
Measure(input, len, ZLIB, repeats, 1024 << 10);
if (snappy::GetFlag(FLAGS_lzo))
Measure(input, len, LZO, repeats, 1024 << 10);
if (snappy::GetFlag(FLAGS_lz4))
Measure(input, len, LZ4, repeats, 1024 << 10);
if (snappy::GetFlag(FLAGS_snappy))
Measure(input, len, SNAPPY, repeats, 4096 << 10);
// For block-size based measurements
if (0 && snappy::GetFlag(FLAGS_snappy)) {
Measure(input, len, SNAPPY, repeats, 8<<10);
Measure(input, len, SNAPPY, repeats, 16<<10);
Measure(input, len, SNAPPY, repeats, 32<<10);
Measure(input, len, SNAPPY, repeats, 64<<10);
Measure(input, len, SNAPPY, repeats, 256<<10);
Measure(input, len, SNAPPY, repeats, 1024<<10);
}
}
}
} // namespace
} // namespace snappy
int main(int argc, char** argv) {
InitGoogle(argv[0], &argc, &argv, true);
for (int arg = 1; arg < argc; ++arg) {
if (snappy::GetFlag(FLAGS_write_compressed)) {
snappy::CompressFile(argv[arg]);
} else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
snappy::UncompressFile(argv[arg]);
} else {
snappy::MeasureFile(argv[arg]);
}
}
return 0;
}

View file

@ -26,44 +26,31 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <algorithm>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "snappy.h"
#include "snappy-internal.h"
#include "snappy-test.h"
#include "gtest/gtest.h"
#include "snappy-internal.h"
#include "snappy-sinksource.h"
#include "snappy.h"
#include "snappy_test_data.h"
DEFINE_int32(start_len, -1,
"Starting prefix size for testing (-1: just full file contents)");
DEFINE_int32(end_len, -1,
"Starting prefix size for testing (-1: just full file contents)");
DEFINE_int32(bytes, 10485760,
"How many bytes to compress/uncompress per file for timing");
DEFINE_bool(zlib, false,
"Run zlib compression (http://www.zlib.net)");
DEFINE_bool(lzo, false,
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
DEFINE_bool(snappy, true, "Run snappy compression");
DEFINE_bool(write_compressed, false,
"Write compressed versions of each file to <file>.comp");
DEFINE_bool(write_uncompressed, false,
"Write uncompressed versions of each file to <file>.uncomp");
DEFINE_bool(snappy_dump_decompression_table, false,
SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
"If true, we print the decompression table during tests.");
namespace snappy {
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
namespace {
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
// To test against code that reads beyond its input, this class copies a
// string to a newly allocated group of pages, the last of which
@ -109,260 +96,14 @@ class DataEndingAtUnreadablePage {
size_t size_;
};
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
// Fallback for systems without mmap.
using DataEndingAtUnreadablePage = std::string;
#endif
enum CompressorType {
ZLIB, LZO, SNAPPY
};
const char* names[] = {
"ZLIB", "LZO", "SNAPPY"
};
static size_t MinimumRequiredOutputSpace(size_t input_size,
CompressorType comp) {
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB:
return ZLib::MinCompressbufSize(input_size);
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO:
return input_size + input_size/64 + 16 + 3;
#endif // LZO_VERSION
case SNAPPY:
return snappy::MaxCompressedLength(input_size);
default:
LOG(FATAL) << "Unknown compression type number " << comp;
return 0;
}
}
// Returns true if we successfully compressed, false otherwise.
//
// If compressed_is_preallocated is set, do not resize the compressed buffer.
// This is typically what you want for a benchmark, in order to not spend
// time in the memory allocator. If you do set this flag, however,
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
// number of bytes, and may contain junk bytes at the end after return.
static bool Compress(const char* input, size_t input_size, CompressorType comp,
std::string* compressed, bool compressed_is_preallocated) {
if (!compressed_is_preallocated) {
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
}
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB: {
ZLib zlib;
uLongf destlen = compressed->size();
int ret = zlib.Compress(
reinterpret_cast<Bytef*>(string_as_array(compressed)),
&destlen,
reinterpret_cast<const Bytef*>(input),
input_size);
CHECK_EQ(Z_OK, ret);
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
return true;
}
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO: {
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
lzo_uint destlen;
int ret = lzo1x_1_15_compress(
reinterpret_cast<const uint8_t*>(input),
input_size,
reinterpret_cast<uint8_t*>(string_as_array(compressed)),
&destlen,
mem);
CHECK_EQ(LZO_E_OK, ret);
delete[] mem;
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
break;
}
#endif // LZO_VERSION
case SNAPPY: {
size_t destlen;
snappy::RawCompress(input, input_size,
string_as_array(compressed),
&destlen);
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
if (!compressed_is_preallocated) {
compressed->resize(destlen);
}
break;
}
default: {
return false; // the asked-for library wasn't compiled in
}
}
return true;
}
static bool Uncompress(const std::string& compressed, CompressorType comp,
int size, std::string* output) {
switch (comp) {
#ifdef ZLIB_VERSION
case ZLIB: {
output->resize(size);
ZLib zlib;
uLongf destlen = output->size();
int ret = zlib.Uncompress(
reinterpret_cast<Bytef*>(string_as_array(output)),
&destlen,
reinterpret_cast<const Bytef*>(compressed.data()),
compressed.size());
CHECK_EQ(Z_OK, ret);
CHECK_EQ(static_cast<uLongf>(size), destlen);
break;
}
#endif // ZLIB_VERSION
#ifdef LZO_VERSION
case LZO: {
output->resize(size);
lzo_uint destlen;
int ret = lzo1x_decompress(
reinterpret_cast<const uint8_t*>(compressed.data()),
compressed.size(),
reinterpret_cast<uint8_t*>(string_as_array(output)),
&destlen,
NULL);
CHECK_EQ(LZO_E_OK, ret);
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
break;
}
#endif // LZO_VERSION
case SNAPPY: {
snappy::RawUncompress(compressed.data(), compressed.size(),
string_as_array(output));
break;
}
default: {
return false; // the asked-for library wasn't compiled in
}
}
return true;
}
static void Measure(const char* data,
size_t length,
CompressorType comp,
int repeats,
int block_size) {
// Run tests a few time and pick median running times
static const int kRuns = 5;
double ctime[kRuns];
double utime[kRuns];
int compressed_size = 0;
{
// Chop the input into blocks
int num_blocks = (length + block_size - 1) / block_size;
std::vector<const char*> input(num_blocks);
std::vector<size_t> input_length(num_blocks);
std::vector<std::string> compressed(num_blocks);
std::vector<std::string> output(num_blocks);
for (int b = 0; b < num_blocks; ++b) {
int input_start = b * block_size;
int input_limit = std::min<int>((b+1)*block_size, length);
input[b] = data+input_start;
input_length[b] = input_limit-input_start;
}
// Pre-grow the output buffers so we don't measure string append time.
for (std::string& compressed_block : compressed) {
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
}
// First, try one trial compression to make sure the code is compiled in
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
LOG(WARNING) << "Skipping " << names[comp] << ": "
<< "library not compiled in";
return;
}
for (int run = 0; run < kRuns; ++run) {
CycleTimer ctimer, utimer;
// Pre-grow the output buffers so we don't measure string append time.
for (std::string& compressed_block : compressed) {
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
}
ctimer.Start();
for (int b = 0; b < num_blocks; ++b) {
for (int i = 0; i < repeats; ++i)
Compress(input[b], input_length[b], comp, &compressed[b], true);
}
ctimer.Stop();
// Compress once more, with resizing, so we don't leave junk
// at the end that will confuse the decompressor.
for (int b = 0; b < num_blocks; ++b) {
Compress(input[b], input_length[b], comp, &compressed[b], false);
}
for (int b = 0; b < num_blocks; ++b) {
output[b].resize(input_length[b]);
}
utimer.Start();
for (int i = 0; i < repeats; ++i) {
for (int b = 0; b < num_blocks; ++b)
Uncompress(compressed[b], comp, input_length[b], &output[b]);
}
utimer.Stop();
ctime[run] = ctimer.Get();
utime[run] = utimer.Get();
}
compressed_size = 0;
for (const std::string& compressed_item : compressed) {
compressed_size += compressed_item.size();
}
}
std::sort(ctime, ctime + kRuns);
std::sort(utime, utime + kRuns);
const int med = kRuns/2;
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
std::string x = names[comp];
x += ":";
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
: std::string("?");
std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
"comp %5.1f MB/s uncomp %5s MB/s\n",
x.c_str(),
block_size/(1<<20),
static_cast<int>(length), static_cast<uint32_t>(compressed_size),
(compressed_size * 100.0) / std::max<int>(1, length),
comp_rate,
urate.c_str());
}
static int VerifyString(const std::string& input) {
int VerifyString(const std::string& input) {
std::string compressed;
DataEndingAtUnreadablePage i(input);
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
@ -378,7 +119,7 @@ static int VerifyString(const std::string& input) {
return uncompressed.size();
}
static void VerifyStringSink(const std::string& input) {
void VerifyStringSink(const std::string& input) {
std::string compressed;
DataEndingAtUnreadablePage i(input);
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
@ -396,7 +137,7 @@ static void VerifyStringSink(const std::string& input) {
CHECK_EQ(uncompressed, input);
}
static void VerifyIOVec(const std::string& input) {
void VerifyIOVec(const std::string& input) {
std::string compressed;
DataEndingAtUnreadablePage i(input);
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
@ -443,7 +184,7 @@ static void VerifyIOVec(const std::string& input) {
// Test that data compressed by a compressor that does not
// obey block sizes is uncompressed properly.
static void VerifyNonBlockedCompression(const std::string& input) {
void VerifyNonBlockedCompression(const std::string& input) {
if (input.length() > snappy::kBlockSize) {
// We cannot test larger blocks than the maximum block size, obviously.
return;
@ -496,7 +237,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
}
// Expand the input so that it is at least K times as big as block size
static std::string Expand(const std::string& input) {
std::string Expand(const std::string& input) {
static const int K = 3;
std::string data = input;
while (data.size() < K * snappy::kBlockSize) {
@ -505,7 +246,7 @@ static std::string Expand(const std::string& input) {
return data;
}
static int Verify(const std::string& input) {
int Verify(const std::string& input) {
VLOG(1) << "Verifying input of size " << input.size();
// Compress using string based routines
@ -525,10 +266,10 @@ static int Verify(const std::string& input) {
return result;
}
static bool IsValidCompressedBuffer(const std::string& c) {
bool IsValidCompressedBuffer(const std::string& c) {
return snappy::IsValidCompressedBuffer(c.data(), c.size());
}
static bool Uncompress(const std::string& c, std::string* u) {
bool Uncompress(const std::string& c, std::string* u) {
return snappy::Uncompress(c.data(), c.size(), u);
}
@ -605,7 +346,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
// These mirror the compression code in snappy.cc, but are copied
// here so that we can bypass some limitations in the how snappy.cc
// invokes these routines.
static void AppendLiteral(std::string* dst, const std::string& literal) {
void AppendLiteral(std::string* dst, const std::string& literal) {
if (literal.empty()) return;
int n = literal.size() - 1;
if (n < 60) {
@ -625,7 +366,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) {
*dst += literal;
}
static void AppendCopy(std::string* dst, int offset, int length) {
void AppendCopy(std::string* dst, int offset, int length) {
while (length > 0) {
// Figure out how much to copy in one shot
int to_copy;
@ -669,6 +410,41 @@ TEST(Snappy, SimpleTests) {
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
}
// Regression test for cr/345340892.
TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
Verify("abcabcabcabcabcabcab");
Verify("abcabcabcabcabcabcab0123456789ABCDEF");
Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
}
// Regression test for cr/345340892.
TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
std::mt19937 rng;
std::uniform_int_distribution<int> uniform_byte(0, 255);
for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
for (int length = 1; length <= 64; ++length) {
for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
const int size = pattern_size + length + extra_bytes_after_pattern;
std::string input;
input.resize(size);
for (int i = 0; i < pattern_size; ++i) {
input[i] = 'a' + i;
}
for (int i = 0; i < length; ++i) {
input[pattern_size + i] = input[i];
}
for (int i = 0; i < extra_bytes_after_pattern; ++i) {
input[pattern_size + length + i] =
static_cast<char>(uniform_byte(rng));
}
Verify(input);
}
}
}
}
// Verify max blowup (lots of four-byte copies)
TEST(Snappy, MaxBlowup) {
std::mt19937 rng;
@ -685,7 +461,7 @@ TEST(Snappy, MaxBlowup) {
}
TEST(Snappy, RandomData) {
std::minstd_rand0 rng(FLAGS_test_random_seed);
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
std::uniform_int_distribution<int> uniform_byte(0, 255);
@ -876,8 +652,7 @@ TEST(Snappy, IOVecCopyOverflow) {
}
}
static bool CheckUncompressedLength(const std::string& compressed,
size_t* ulength) {
bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
compressed.size(),
ulength);
@ -958,8 +733,6 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
}
namespace {
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
uint64_t data;
std::pair<size_t, bool> p =
@ -968,8 +741,6 @@ int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
return p.first;
}
} // namespace
TEST(Snappy, FindMatchLength) {
// Exercise all different code paths through the function.
// 64-bit version:
@ -1063,7 +834,7 @@ TEST(Snappy, FindMatchLength) {
TEST(Snappy, FindMatchLengthRandom) {
constexpr int kNumTrials = 10000;
constexpr int kTypicalLength = 10;
std::minstd_rand0 rng(FLAGS_test_random_seed);
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
std::uniform_int_distribution<int> uniform_byte(0, 255);
std::bernoulli_distribution one_in_two(1.0 / 2);
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
@ -1090,9 +861,8 @@ TEST(Snappy, FindMatchLengthRandom) {
}
}
static uint16_t MakeEntry(unsigned int extra,
unsigned int len,
unsigned int copy_offset) {
uint16_t MakeEntry(unsigned int extra, unsigned int len,
unsigned int copy_offset) {
// Check that all of the fields fit within the allocated space
assert(extra == (extra & 0x7)); // At most 3 bits
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
@ -1135,7 +905,7 @@ TEST(Snappy, VerifyCharTable) {
// COPY_1_BYTE_OFFSET.
//
// The tag byte in the compressed data stores len-4 in 3 bits, and
// offset/256 in 5 bits. offset%256 is stored in the next byte.
// offset/256 in 3 bits. offset%256 is stored in the next byte.
//
// This format is used for length in range [4..11] and offset in
// range [0..2047]
@ -1168,7 +938,7 @@ TEST(Snappy, VerifyCharTable) {
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
}
if (FLAGS_snappy_dump_decompression_table) {
if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
std::printf("static const uint16_t char_table[256] = {\n ");
for (int i = 0; i < 256; ++i) {
std::printf("0x%04x%s",
@ -1184,329 +954,13 @@ TEST(Snappy, VerifyCharTable) {
}
}
static void CompressFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
std::string compressed;
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
file::Defaults()));
}
static void UncompressFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
size_t uncompLength;
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
std::string uncompressed;
uncompressed.resize(uncompLength);
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
file::Defaults()));
}
static void MeasureFile(const char* fname) {
std::string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
std::printf("%-40s :\n", fname);
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
int end_len = fullinput.size();
if (FLAGS_end_len >= 0) {
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
}
for (int len = start_len; len <= end_len; ++len) {
const char* const input = fullinput.data();
int repeats = (FLAGS_bytes + len) / (len + 1);
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
// For block-size based measurements
if (0 && FLAGS_snappy) {
Measure(input, len, SNAPPY, repeats, 8<<10);
Measure(input, len, SNAPPY, repeats, 16<<10);
Measure(input, len, SNAPPY, repeats, 32<<10);
Measure(input, len, SNAPPY, repeats, 64<<10);
Measure(input, len, SNAPPY, repeats, 256<<10);
Measure(input, len, SNAPPY, repeats, 1024<<10);
}
TEST(Snappy, TestBenchmarkFiles) {
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
Verify(ReadTestDataFile(kTestDataFiles[i].filename,
kTestDataFiles[i].size_limit));
}
}
static struct {
const char* label;
const char* filename;
size_t size_limit;
} files[] = {
{ "html", "html", 0 },
{ "urls", "urls.10K", 0 },
{ "jpg", "fireworks.jpeg", 0 },
{ "jpg_200", "fireworks.jpeg", 200 },
{ "pdf", "paper-100k.pdf", 0 },
{ "html4", "html_x_4", 0 },
{ "txt1", "alice29.txt", 0 },
{ "txt2", "asyoulik.txt", 0 },
{ "txt3", "lcet10.txt", 0 },
{ "txt4", "plrabn12.txt", 0 },
{ "pb", "geo.protodata", 0 },
{ "gaviota", "kppkn.gtb", 0 },
};
static void BM_UFlat(int iters, int arg) {
StopBenchmarkTiming();
// Pick file to process based on "arg"
CHECK_GE(arg, 0);
CHECK_LT(arg, ARRAYSIZE(files));
std::string contents =
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
char* dst = new char[contents.size()];
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
static_cast<int64_t>(contents.size()));
SetBenchmarkLabel(files[arg].label);
StartBenchmarkTiming();
while (iters-- > 0) {
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
}
StopBenchmarkTiming();
delete[] dst;
}
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
static void BM_UValidate(int iters, int arg) {
StopBenchmarkTiming();
// Pick file to process based on "arg"
CHECK_GE(arg, 0);
CHECK_LT(arg, ARRAYSIZE(files));
std::string contents =
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
static_cast<int64_t>(contents.size()));
SetBenchmarkLabel(files[arg].label);
StartBenchmarkTiming();
while (iters-- > 0) {
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
}
StopBenchmarkTiming();
}
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
static void BM_UIOVec(int iters, int arg) {
StopBenchmarkTiming();
// Pick file to process based on "arg"
CHECK_GE(arg, 0);
CHECK_LT(arg, ARRAYSIZE(files));
std::string contents =
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
// Uncompress into an iovec containing ten entries.
const int kNumEntries = 10;
struct iovec iov[kNumEntries];
char *dst = new char[contents.size()];
size_t used_so_far = 0;
for (int i = 0; i < kNumEntries; ++i) {
iov[i].iov_base = dst + used_so_far;
if (used_so_far == contents.size()) {
iov[i].iov_len = 0;
continue;
}
if (i == kNumEntries - 1) {
iov[i].iov_len = contents.size() - used_so_far;
} else {
iov[i].iov_len = contents.size() / kNumEntries;
}
used_so_far += iov[i].iov_len;
}
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
static_cast<int64_t>(contents.size()));
SetBenchmarkLabel(files[arg].label);
StartBenchmarkTiming();
while (iters-- > 0) {
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
kNumEntries));
}
StopBenchmarkTiming();
delete[] dst;
}
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
static void BM_UFlatSink(int iters, int arg) {
StopBenchmarkTiming();
// Pick file to process based on "arg"
CHECK_GE(arg, 0);
CHECK_LT(arg, ARRAYSIZE(files));
std::string contents =
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
std::string zcontents;
snappy::Compress(contents.data(), contents.size(), &zcontents);
char* dst = new char[contents.size()];
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
static_cast<int64_t>(contents.size()));
SetBenchmarkLabel(files[arg].label);
StartBenchmarkTiming();
while (iters-- > 0) {
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
snappy::UncheckedByteArraySink sink(dst);
CHECK(snappy::Uncompress(&source, &sink));
}
StopBenchmarkTiming();
std::string s(dst, contents.size());
CHECK_EQ(contents, s);
delete[] dst;
}
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
static void BM_ZFlat(int iters, int arg) {
StopBenchmarkTiming();
// Pick file to process based on "arg"
CHECK_GE(arg, 0);
CHECK_LT(arg, ARRAYSIZE(files));
std::string contents =
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
static_cast<int64_t>(contents.size()));
StartBenchmarkTiming();
size_t zsize = 0;
while (iters-- > 0) {
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
}
StopBenchmarkTiming();
const double compression_ratio =
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
100.0 * compression_ratio));
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
files[arg].label, static_cast<int>(contents.size()),
static_cast<int>(zsize));
delete[] dst;
}
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
static void BM_ZFlatAll(int iters, int arg) {
StopBenchmarkTiming();
CHECK_EQ(arg, 0);
const int num_files = ARRAYSIZE(files);
std::vector<std::string> contents(num_files);
std::vector<char*> dst(num_files);
int64_t total_contents_size = 0;
for (int i = 0; i < num_files; ++i) {
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
total_contents_size += contents[i].size();
}
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
StartBenchmarkTiming();
size_t zsize = 0;
while (iters-- > 0) {
for (int i = 0; i < num_files; ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
}
}
StopBenchmarkTiming();
for (char* dst_item : dst) {
delete[] dst_item;
}
SetBenchmarkLabel(StrFormat("%d files", num_files));
}
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
StopBenchmarkTiming();
CHECK_EQ(arg, 0);
CHECK_GT(ARRAYSIZE(files), 0);
const std::string base_content =
ReadTestDataFile(files[0].filename, files[0].size_limit);
std::vector<std::string> contents;
std::vector<char*> dst;
int64_t total_contents_size = 0;
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
++table_bits) {
std::string content = base_content;
content.resize(1 << table_bits);
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
total_contents_size += content.size();
contents.push_back(std::move(content));
}
size_t zsize = 0;
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
StartBenchmarkTiming();
while (iters-- > 0) {
for (size_t i = 0; i < contents.size(); ++i) {
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
&zsize);
}
}
StopBenchmarkTiming();
for (char* dst_item : dst) {
delete[] dst_item;
}
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
}
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
} // namespace
} // namespace snappy
int main(int argc, char** argv) {
InitGoogle(argv[0], &argc, &argv, true);
RunSpecifiedBenchmarks();
if (argc >= 2) {
for (int arg = 1; arg < argc; ++arg) {
if (FLAGS_write_compressed) {
snappy::CompressFile(argv[arg]);
} else if (FLAGS_write_uncompressed) {
snappy::UncompressFile(argv[arg]);
} else {
snappy::MeasureFile(argv[arg]);
}
}
return 0;
}
return RUN_ALL_TESTS();
}

1
third_party/benchmark vendored Submodule

@ -0,0 +1 @@
Subproject commit bf585a2789e30585b4e3ce6baf11ef2750b54677

1
third_party/googletest vendored Submodule

@ -0,0 +1 @@
Subproject commit 18f8200e3079b0e54fa00cb7ac55d4c39dcf6da6