diff --git a/.appveyor.yml b/.appveyor.yml index 441a2b6..7229a25 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -34,3 +34,4 @@ build_script: test_script: - build\%CONFIGURATION%\snappy_unittest + - build\%CONFIGURATION%\snappy_benchmark diff --git a/.travis.yml b/.travis.yml index e10c7d4..975899a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -90,6 +90,7 @@ before_script: script: - build/snappy_unittest +- build/snappy_benchmark - if [ -f build/snappy_compress_fuzzer ]; then build/snappy_compress_fuzzer -runs=1000 -close_fd_mask=3; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index ebaed22..7e252f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,372 +1,404 @@ -# Copyright 2019 Google Inc. All Rights Reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -cmake_minimum_required(VERSION 3.1) -project(Snappy VERSION 1.1.8 LANGUAGES C CXX) - -# C++ standard can be overridden when this is used as a sub-project. -if(NOT CMAKE_CXX_STANDARD) - # This project requires C++11. - set(CMAKE_CXX_STANDARD 11) - set(CMAKE_CXX_STANDARD_REQUIRED ON) - set(CMAKE_CXX_EXTENSIONS OFF) -endif(NOT CMAKE_CXX_STANDARD) - -# https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # Use the highest warning level for Visual Studio. - set(CMAKE_CXX_WARNING_LEVEL 4) - if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") - string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") - endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") - - # Disable C++ exceptions. - string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-") - add_definitions(-D_HAS_EXCEPTIONS=0) - - # Disable RTTI. - string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") -else(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # Use -Wall for clang and gcc. - if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") - endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") - - # Use -Wextra for clang and gcc. - if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") - endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") - - # Use -Werror for clang only. - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") - endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") - endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - - # Disable C++ exceptions. - string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") - - # Disable RTTI. - string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") -endif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - -# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make -# it prominent in the GUI. -option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) - -option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) - -option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF) - -option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) - -option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) - -option(SNAPPY_INSTALL "Install Snappy's header and library" ON) - -include(TestBigEndian) -test_big_endian(SNAPPY_IS_BIG_ENDIAN) - -include(CheckIncludeFile) -check_include_file("sys/mman.h" HAVE_SYS_MMAN_H) -check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) -check_include_file("sys/time.h" HAVE_SYS_TIME_H) -check_include_file("sys/uio.h" HAVE_SYS_UIO_H) -check_include_file("unistd.h" HAVE_UNISTD_H) -check_include_file("windows.h" HAVE_WINDOWS_H) - -include(CheckLibraryExists) -check_library_exists(z zlibVersion "" HAVE_LIBZ) -check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) -check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4) - -include(CheckCXXCompilerFlag) -CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) -CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) -CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) -CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) -if(SNAPPY_REQUIRE_AVX2) - if(HAVE_VISUAL_STUDIO_ARCH_AVX2) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") - endif(HAVE_VISUAL_STUDIO_ARCH_AVX2) - if(HAVE_CLANG_MAVX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") - endif(HAVE_CLANG_MAVX) - if(HAVE_CLANG_MBMI2) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2") - endif(HAVE_CLANG_MBMI2) -elseif (SNAPPY_REQUIRE_AVX) - if(HAVE_VISUAL_STUDIO_ARCH_AVX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") - endif(HAVE_VISUAL_STUDIO_ARCH_AVX) - if(HAVE_CLANG_MAVX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") - endif(HAVE_CLANG_MAVX) -endif(SNAPPY_REQUIRE_AVX2) - -# Used by googletest. -check_cxx_compiler_flag(-Wno-missing-field-initializers - SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) - -include(CheckCXXSourceCompiles) -check_cxx_source_compiles(" -int main() { - return __builtin_expect(0, 1); -}" HAVE_BUILTIN_EXPECT) - -check_cxx_source_compiles(" -int main() { - return __builtin_ctzll(0); -}" HAVE_BUILTIN_CTZ) - -check_cxx_source_compiles(" -__attribute__((always_inline)) int zero() { return 0; } - -int main() { - return zero(); -}" HAVE_ATTRIBUTE_ALWAYS_INLINE) - -check_cxx_source_compiles(" -#include - -int main() { - const __m128i *src = 0; - __m128i dest; - const __m128i shuffle_mask = _mm_load_si128(src); - const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask); - _mm_storeu_si128(&dest, pattern); - return 0; -}" SNAPPY_HAVE_SSSE3) - -check_cxx_source_compiles(" -#include -int main() { - return _bzhi_u32(0, 1); -}" SNAPPY_HAVE_BMI2) - -include(CheckSymbolExists) -check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) -check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) - -find_package(Gflags QUIET) -if(GFLAGS_FOUND OR GFLAGS_TARGET) - set(HAVE_GFLAGS 1) -endif(GFLAGS_FOUND OR GFLAGS_TARGET) - -configure_file( - "cmake/config.h.in" - "${PROJECT_BINARY_DIR}/config.h" -) - -# We don't want to define HAVE_ macros in public headers. Instead, we use -# CMake's variable substitution with 0/1 variables, which will be seen by the -# preprocessor as constants. -set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H}) -if(NOT HAVE_SYS_UIO_H_01) - set(HAVE_SYS_UIO_H_01 0) -endif(NOT HAVE_SYS_UIO_H_01) - -if (SNAPPY_FUZZING_BUILD) - if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - message(WARNING "Fuzzing builds are only supported with Clang") - endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - - if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") - endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") - - if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link") - endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") -endif (SNAPPY_FUZZING_BUILD) - -configure_file( - "snappy-stubs-public.h.in" - "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") - -add_library(snappy "") -target_sources(snappy - PRIVATE - "snappy-internal.h" - "snappy-stubs-internal.h" - "snappy-c.cc" - "snappy-sinksource.cc" - "snappy-stubs-internal.cc" - "snappy.cc" - "${PROJECT_BINARY_DIR}/config.h" - - # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". - $<$:PUBLIC> - $ - $ - $ - $ - $ - $ - $ - $ -) -target_include_directories(snappy - PUBLIC - $ - $ - $ -) -set_target_properties(snappy - PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) - -target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) -if(BUILD_SHARED_LIBS) - set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) -endif(BUILD_SHARED_LIBS) - -if(SNAPPY_BUILD_TESTS) - enable_testing() - - # Prevent overriding the parent project's compiler/linker settings on Windows. - set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - set(install_gtest OFF) - set(install_gmock OFF) - set(build_gmock ON) - - # This project is tested using GoogleTest. - add_subdirectory("third_party/googletest") - - # This project uses Google benchmark for benchmarking. - set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) - set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) - add_subdirectory("third_party/benchmark") - - # GoogleTest triggers a missing field initializers warning. - if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) - set_property(TARGET gtest - APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) - set_property(TARGET gmock - APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) - endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) - - add_executable(snappy_unittest "") - target_sources(snappy_unittest - PRIVATE - "snappy_unittest.cc" - "snappy-test.cc" - ) - target_compile_definitions(snappy_unittest PRIVATE -DHAVE_CONFIG_H) - target_link_libraries(snappy_unittest - snappy ${GFLAGS_LIBRARIES} gmock gtest benchmark) - - if(HAVE_LIBZ) - target_link_libraries(snappy_unittest z) - endif(HAVE_LIBZ) - if(HAVE_LIBLZO2) - target_link_libraries(snappy_unittest lzo2) - endif(HAVE_LIBLZO2) - if(HAVE_LIBLZ4) - target_link_libraries(snappy_unittest lz4) - endif(HAVE_LIBLZ4) - - target_include_directories(snappy_unittest - BEFORE PRIVATE - "${PROJECT_SOURCE_DIR}" - "${GFLAGS_INCLUDE_DIRS}" - ) - - add_test( - NAME snappy_unittest - WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" - COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest") -endif(SNAPPY_BUILD_TESTS) - -if(SNAPPY_FUZZING_BUILD) - add_executable(snappy_compress_fuzzer "") - target_sources(snappy_compress_fuzzer - PRIVATE "snappy_compress_fuzzer.cc" - ) - target_link_libraries(snappy_compress_fuzzer snappy) - set_target_properties(snappy_compress_fuzzer - PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" - ) - - add_executable(snappy_uncompress_fuzzer "") - target_sources(snappy_uncompress_fuzzer - PRIVATE "snappy_uncompress_fuzzer.cc" - ) - target_link_libraries(snappy_uncompress_fuzzer snappy) - set_target_properties(snappy_uncompress_fuzzer - PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" - ) -endif(SNAPPY_FUZZING_BUILD) - -# Must be included before CMAKE_INSTALL_INCLUDEDIR is used. -include(GNUInstallDirs) - -if(SNAPPY_INSTALL) - install(TARGETS snappy - EXPORT SnappyTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) - install( - FILES - "snappy-c.h" - "snappy-sinksource.h" - "snappy.h" - "${PROJECT_BINARY_DIR}/snappy-stubs-public.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - ) - - include(CMakePackageConfigHelpers) - configure_package_config_file( - "cmake/${PROJECT_NAME}Config.cmake.in" - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" - INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" - ) - write_basic_package_version_file( - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" - COMPATIBILITY SameMajorVersion - ) - install( - EXPORT SnappyTargets - NAMESPACE Snappy:: - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" - ) - install( - FILES - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" - ) -endif(SNAPPY_INSTALL) +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required(VERSION 3.1) +project(Snappy VERSION 1.1.8 LANGUAGES C CXX) + +# C++ standard can be overridden when this is used as a sub-project. +if(NOT CMAKE_CXX_STANDARD) + # This project requires C++11. + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) +endif(NOT CMAKE_CXX_STANDARD) + +# https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Use the highest warning level for Visual Studio. + set(CMAKE_CXX_WARNING_LEVEL 4) + if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + + # Disable C++ exceptions. + string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-") + add_definitions(-D_HAS_EXCEPTIONS=0) + + # Disable RTTI. + string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") +else(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Use -Wall for clang and gcc. + if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") + + # Use -Wextra for clang and gcc. + if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") + + # Use -Werror for clang only. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") + endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + + # Disable C++ exceptions. + string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") + + # Disable RTTI. + string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +endif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) + +option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) + +option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" ON) + +option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF) + +option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) + +option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) + +option(SNAPPY_INSTALL "Install Snappy's header and library" ON) + +include(TestBigEndian) +test_big_endian(SNAPPY_IS_BIG_ENDIAN) + +include(CheckIncludeFile) +check_include_file("sys/mman.h" HAVE_SYS_MMAN_H) +check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) +check_include_file("sys/time.h" HAVE_SYS_TIME_H) +check_include_file("sys/uio.h" HAVE_SYS_UIO_H) +check_include_file("unistd.h" HAVE_UNISTD_H) +check_include_file("windows.h" HAVE_WINDOWS_H) + +include(CheckLibraryExists) +check_library_exists(z zlibVersion "" HAVE_LIBZ) +check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) +check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4) + +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) +CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) +CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) +CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) +if(SNAPPY_REQUIRE_AVX2) + if(HAVE_VISUAL_STUDIO_ARCH_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX2) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) + if(HAVE_CLANG_MBMI2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2") + endif(HAVE_CLANG_MBMI2) +elseif (SNAPPY_REQUIRE_AVX) + if(HAVE_VISUAL_STUDIO_ARCH_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) +endif(SNAPPY_REQUIRE_AVX2) + +# Used by googletest. +check_cxx_compiler_flag(-Wno-missing-field-initializers + SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) + +include(CheckCXXSourceCompiles) +check_cxx_source_compiles(" +int main() { + return __builtin_expect(0, 1); +}" HAVE_BUILTIN_EXPECT) + +check_cxx_source_compiles(" +int main() { + return __builtin_ctzll(0); +}" HAVE_BUILTIN_CTZ) + +check_cxx_source_compiles(" +__attribute__((always_inline)) int zero() { return 0; } + +int main() { + return zero(); +}" HAVE_ATTRIBUTE_ALWAYS_INLINE) + +check_cxx_source_compiles(" +#include + +int main() { + const __m128i *src = 0; + __m128i dest; + const __m128i shuffle_mask = _mm_load_si128(src); + const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask); + _mm_storeu_si128(&dest, pattern); + return 0; +}" SNAPPY_HAVE_SSSE3) + +check_cxx_source_compiles(" +#include +int main() { + return _bzhi_u32(0, 1); +}" SNAPPY_HAVE_BMI2) + +include(CheckSymbolExists) +check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) +check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) + +find_package(Gflags QUIET) +if(GFLAGS_FOUND OR GFLAGS_TARGET) + set(HAVE_GFLAGS 1) +endif(GFLAGS_FOUND OR GFLAGS_TARGET) + +configure_file( + "cmake/config.h.in" + "${PROJECT_BINARY_DIR}/config.h" +) + +# We don't want to define HAVE_ macros in public headers. Instead, we use +# CMake's variable substitution with 0/1 variables, which will be seen by the +# preprocessor as constants. +set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H}) +if(NOT HAVE_SYS_UIO_H_01) + set(HAVE_SYS_UIO_H_01 0) +endif(NOT HAVE_SYS_UIO_H_01) + +if (SNAPPY_FUZZING_BUILD) + if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + message(WARNING "Fuzzing builds are only supported with Clang") + endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + + if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") + + if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") +endif (SNAPPY_FUZZING_BUILD) + +configure_file( + "snappy-stubs-public.h.in" + "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") + +add_library(snappy "") +target_sources(snappy + PRIVATE + "snappy-internal.h" + "snappy-stubs-internal.h" + "snappy-c.cc" + "snappy-sinksource.cc" + "snappy-stubs-internal.cc" + "snappy.cc" + "${PROJECT_BINARY_DIR}/config.h" + + # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". + $<$:PUBLIC> + $ + $ + $ + $ + $ + $ + $ + $ +) +target_include_directories(snappy + PUBLIC + $ + $ + $ +) +set_target_properties(snappy + PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) + +target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) +if(BUILD_SHARED_LIBS) + set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif(BUILD_SHARED_LIBS) + +if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) + add_library(snappy_test_support "") + target_sources(snappy_test_support + PRIVATE + "snappy-test.cc" + "snappy-test.h" + "snappy_test_data.cc" + "snappy_test_data.h" + "${PROJECT_BINARY_DIR}/config.h" + ) + + # Test files include snappy-test.h, HAVE_CONFIG_H must be defined. + target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H) + + target_link_libraries(snappy_test_support snappy ${GFLAGS_LIBRARIES}) + + if(HAVE_LIBZ) + target_link_libraries(snappy_test_support z) + endif(HAVE_LIBZ) + if(HAVE_LIBLZO2) + target_link_libraries(snappy_test_support lzo2) + endif(HAVE_LIBLZO2) + if(HAVE_LIBLZ4) + target_link_libraries(snappy_test_support lz4) + endif(HAVE_LIBLZ4) + + target_include_directories(snappy_test_support + BEFORE PUBLIC + "${PROJECT_SOURCE_DIR}" + "${GFLAGS_INCLUDE_DIRS}" + ) +endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) + +if(SNAPPY_BUILD_TESTS) + enable_testing() + + # Prevent overriding the parent project's compiler/linker settings on Windows. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + set(install_gtest OFF) + set(install_gmock OFF) + set(build_gmock ON) + + # This project is tested using GoogleTest. + add_subdirectory("third_party/googletest") + + # GoogleTest triggers a missing field initializers warning. + if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) + set_property(TARGET gtest + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + set_property(TARGET gmock + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) + + add_executable(snappy_unittest "") + target_sources(snappy_unittest + PRIVATE + "snappy_unittest.cc" + ) + target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest) + + add_test( + NAME snappy_unittest + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest") + + add_executable(snappy_test_tool "") + target_sources(snappy_test_tool + PRIVATE + "snappy_test_tool.cc" + ) + target_link_libraries(snappy_test_tool snappy_test_support) +endif(SNAPPY_BUILD_TESTS) + +if(SNAPPY_BUILD_BENCHMARKS) + add_executable(snappy_benchmark "") + target_sources(snappy_benchmark + PRIVATE + "snappy_benchmark.cc" + ) + target_link_libraries(snappy_benchmark snappy_test_support benchmark_main) + + # This project uses Google benchmark for benchmarking. + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) + set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) + add_subdirectory("third_party/benchmark") +endif(SNAPPY_BUILD_BENCHMARKS) + +if(SNAPPY_FUZZING_BUILD) + add_executable(snappy_compress_fuzzer "") + target_sources(snappy_compress_fuzzer + PRIVATE "snappy_compress_fuzzer.cc" + ) + target_link_libraries(snappy_compress_fuzzer snappy) + set_target_properties(snappy_compress_fuzzer + PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" + ) + + add_executable(snappy_uncompress_fuzzer "") + target_sources(snappy_uncompress_fuzzer + PRIVATE "snappy_uncompress_fuzzer.cc" + ) + target_link_libraries(snappy_uncompress_fuzzer snappy) + set_target_properties(snappy_uncompress_fuzzer + PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" + ) +endif(SNAPPY_FUZZING_BUILD) + +# Must be included before CMAKE_INSTALL_INCLUDEDIR is used. +include(GNUInstallDirs) + +if(SNAPPY_INSTALL) + install(TARGETS snappy + EXPORT SnappyTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + install( + FILES + "snappy-c.h" + "snappy-sinksource.h" + "snappy.h" + "${PROJECT_BINARY_DIR}/snappy-stubs-public.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + ) + + include(CMakePackageConfigHelpers) + configure_package_config_file( + "cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + COMPATIBILITY SameMajorVersion + ) + install( + EXPORT SnappyTargets + NAMESPACE Snappy:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + install( + FILES + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) +endif(SNAPPY_INSTALL) diff --git a/README.md b/README.md index 0d2549d..2602a4a 100644 --- a/README.md +++ b/README.md @@ -109,35 +109,33 @@ information. Tests and benchmarks ==================== -When you compile Snappy, snappy_unittest is compiled in addition to the -library itself. You do not need it to use the compressor from your own library, -but it contains several useful components for Snappy development. +When you compile Snappy, the following binaries are compiled in addition to the +library itself. You do not need them to use the compressor from your own +library, but they are useful for Snappy development. -First of all, it contains unit tests, verifying correctness on your machine in -various scenarios. If you want to change or optimize Snappy, please run the -tests to verify you have not broken anything. +* `snappy_benchmark` contains microbenchmarks used to tune compression and + decompression performance. +* `snappy_unittests` contains unit tests, verifying correctness on your machine + in various scenarios. +* `snappy_test_tool` can benchmark Snappy against a few other compression + libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure + time. To benchmark using a given file, give the compression algorithm you want + to test Snappy against (e.g. --zlib) and then a list of one or more file names + on the command line. + +If you want to change or optimize Snappy, please run the tests and benchmarks to +verify you have not broken anything. + +The testdata/ directory contains the files used by the microbenchmarks, which +should provide a reasonably balanced starting point for benchmarking. (Note that +baddata[1-3].snappy are not intended as benchmarks; they are used to verify +correctness in the presence of corrupted data in the unit test.) The gflags library for handling of command-line flags is used if it's installed. You can find it at https://gflags.github.io/gflags/ -In addition to the unit tests, snappy contains microbenchmarks used to -tune compression and decompression performance. These are automatically run -before the unit tests, but you can disable them using the flag ---run_microbenchmarks=false if you have gflags installed (otherwise you will -need to edit the source). - -Finally, snappy can benchmark Snappy against a few other compression libraries -(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time. -To benchmark using a given file, give the compression algorithm you want to test -Snappy against (e.g. --zlib) and then a list of one or more file names on the -command line. The testdata/ directory contains the files used by the -microbenchmark, which should provide a reasonably balanced starting point for -benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they -are used to verify correctness in the presence of corrupted data in the unit -test.) - Contact ======= diff --git a/cmake/config.h.in b/cmake/config.h.in index 86b415c..76b24f7 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -19,9 +19,6 @@ /* Define to 1 to use the gflags package for command-line parsing. */ #cmakedefine HAVE_GFLAGS 1 -/* Define to 1 if you have Google Test. */ -#cmakedefine HAVE_GTEST 1 - /* Define to 1 if you have the `lzo2' library (-llzo2). */ #cmakedefine HAVE_LIBLZO2 1 diff --git a/snappy_benchmark.cc b/snappy_benchmark.cc new file mode 100644 index 0000000..df7f4e3 --- /dev/null +++ b/snappy_benchmark.cc @@ -0,0 +1,329 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "snappy-internal.h" +#include "snappy-sinksource.h" +#include "snappy-test.h" +#include "snappy.h" +#include "snappy_test_data.h" + +namespace snappy { + +namespace { + +void BM_UFlat(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress(contents.data(), contents.size(), &zcontents); + char* dst = new char[contents.size()]; + + for (auto s : state) { + CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); + benchmark::DoNotOptimize(dst); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + delete[] dst; +} +BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1); + +struct SourceFiles { + SourceFiles() { + for (int i = 0; i < kFiles; i++) { + std::string contents = ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit); + max_size = std::max(max_size, contents.size()); + sizes[i] = contents.size(); + snappy::Compress(contents.data(), contents.size(), &zcontents[i]); + } + } + static constexpr int kFiles = ARRAYSIZE(kTestDataFiles); + std::string zcontents[kFiles]; + size_t sizes[kFiles]; + size_t max_size = 0; +}; + +void BM_UFlatMedley(benchmark::State& state) { + static const SourceFiles* const source = new SourceFiles(); + + std::vector dst(source->max_size); + + for (auto s : state) { + for (int i = 0; i < SourceFiles::kFiles; i++) { + CHECK(snappy::RawUncompress(source->zcontents[i].data(), + source->zcontents[i].size(), dst.data())); + benchmark::DoNotOptimize(dst); + } + } + + int64_t source_sizes = 0; + for (int i = 0; i < SourceFiles::kFiles; i++) { + source_sizes += static_cast(source->sizes[i]); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + source_sizes); +} +BENCHMARK(BM_UFlatMedley); + +void BM_UValidate(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress(contents.data(), contents.size(), &zcontents); + + for (auto s : state) { + CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); +} +BENCHMARK(BM_UValidate)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1); + +void BM_UValidateMedley(benchmark::State& state) { + static const SourceFiles* const source = new SourceFiles(); + + for (auto s : state) { + for (int i = 0; i < SourceFiles::kFiles; i++) { + CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(), + source->zcontents[i].size())); + } + } + + int64_t source_sizes = 0; + for (int i = 0; i < SourceFiles::kFiles; i++) { + source_sizes += static_cast(source->sizes[i]); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + source_sizes); +} +BENCHMARK(BM_UValidateMedley); + +void BM_UIOVec(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress(contents.data(), contents.size(), &zcontents); + + // Uncompress into an iovec containing ten entries. + const int kNumEntries = 10; + struct iovec iov[kNumEntries]; + char *dst = new char[contents.size()]; + size_t used_so_far = 0; + for (int i = 0; i < kNumEntries; ++i) { + iov[i].iov_base = dst + used_so_far; + if (used_so_far == contents.size()) { + iov[i].iov_len = 0; + continue; + } + + if (i == kNumEntries - 1) { + iov[i].iov_len = contents.size() - used_so_far; + } else { + iov[i].iov_len = contents.size() / kNumEntries; + } + used_so_far += iov[i].iov_len; + } + + for (auto s : state) { + CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, + kNumEntries)); + benchmark::DoNotOptimize(iov); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + delete[] dst; +} +BENCHMARK(BM_UIOVec)->DenseRange(0, 4); + +void BM_UFlatSink(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress(contents.data(), contents.size(), &zcontents); + char* dst = new char[contents.size()]; + + for (auto s : state) { + snappy::ByteArraySource source(zcontents.data(), zcontents.size()); + snappy::UncheckedByteArraySink sink(dst); + CHECK(snappy::Uncompress(&source, &sink)); + benchmark::DoNotOptimize(sink); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + std::string s(dst, contents.size()); + CHECK_EQ(contents, s); + + delete[] dst; +} + +BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1); + +void BM_ZFlat(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + char* dst = new char[snappy::MaxCompressedLength(contents.size())]; + + size_t zsize = 0; + for (auto s : state) { + snappy::RawCompress(contents.data(), contents.size(), dst, &zsize); + benchmark::DoNotOptimize(dst); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + const double compression_ratio = + static_cast(zsize) / std::max(1, contents.size()); + state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label, + 100.0 * compression_ratio)); + VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", + kTestDataFiles[file_index].label, contents.size(), + zsize); + delete[] dst; +} +BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1); + +void BM_ZFlatAll(benchmark::State& state) { + const int num_files = ARRAYSIZE(kTestDataFiles); + + std::vector contents(num_files); + std::vector dst(num_files); + + int64_t total_contents_size = 0; + for (int i = 0; i < num_files; ++i) { + contents[i] = ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit); + dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())]; + total_contents_size += contents[i].size(); + } + + size_t zsize = 0; + for (auto s : state) { + for (int i = 0; i < num_files; ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize); + benchmark::DoNotOptimize(dst); + } + } + + state.SetBytesProcessed(static_cast(state.iterations()) * + total_contents_size); + + for (char* dst_item : dst) { + delete[] dst_item; + } + state.SetLabel(StrFormat("%d kTestDataFiles", num_files)); +} +BENCHMARK(BM_ZFlatAll); + +void BM_ZFlatIncreasingTableSize(benchmark::State& state) { + CHECK_GT(ARRAYSIZE(kTestDataFiles), 0); + const std::string base_content = ReadTestDataFile( + kTestDataFiles[0].filename, kTestDataFiles[0].size_limit); + + std::vector contents; + std::vector dst; + int64_t total_contents_size = 0; + for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits; + ++table_bits) { + std::string content = base_content; + content.resize(1 << table_bits); + dst.push_back(new char[snappy::MaxCompressedLength(content.size())]); + total_contents_size += content.size(); + contents.push_back(std::move(content)); + } + + size_t zsize = 0; + for (auto s : state) { + for (size_t i = 0; i < contents.size(); ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize); + benchmark::DoNotOptimize(dst); + } + } + + state.SetBytesProcessed(static_cast(state.iterations()) * + total_contents_size); + + for (char* dst_item : dst) { + delete[] dst_item; + } + state.SetLabel(StrFormat("%d tables", contents.size())); +} +BENCHMARK(BM_ZFlatIncreasingTableSize); + +} // namespace + +} // namespace snappy diff --git a/snappy_test_data.cc b/snappy_test_data.cc new file mode 100644 index 0000000..8b54153 --- /dev/null +++ b/snappy_test_data.cc @@ -0,0 +1,57 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Support code for reading test data. + +#include "snappy_test_data.h" + +#include +#include +#include + +#include "snappy-test.h" + +namespace snappy { + +std::string ReadTestDataFile(const char* base, size_t size_limit) { + std::string srcdir; + const char* srcdir_env = std::getenv("srcdir"); // This is set by Automake. + if (srcdir_env) { + srcdir = std::string(srcdir_env) + "/"; + } + + std::string contents; + CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents, + file::Defaults())); + if (size_limit > 0) { + contents = contents.substr(0, size_limit); + } + return contents; +} + +} // namespace snappy diff --git a/snappy_test_data.h b/snappy_test_data.h new file mode 100644 index 0000000..b01f74b --- /dev/null +++ b/snappy_test_data.h @@ -0,0 +1,68 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// List of test case files. + +#ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ +#define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ + +#include +#include + +namespace snappy { + +std::string ReadTestDataFile(const char* base, size_t size_limit); + +// TODO: Replace anonymous namespace with inline variable when we can +// rely on C++17. +namespace { + +constexpr struct { + const char* label; + const char* filename; + size_t size_limit; +} kTestDataFiles[] = { + { "html", "html", 0 }, + { "urls", "urls.10K", 0 }, + { "jpg", "fireworks.jpeg", 0 }, + { "jpg_200", "fireworks.jpeg", 200 }, + { "pdf", "paper-100k.pdf", 0 }, + { "html4", "html_x_4", 0 }, + { "txt1", "alice29.txt", 0 }, + { "txt2", "asyoulik.txt", 0 }, + { "txt3", "lcet10.txt", 0 }, + { "txt4", "plrabn12.txt", 0 }, + { "pb", "geo.protodata", 0 }, + { "gaviota", "kppkn.gtb", 0 }, +}; + +} // namespace + +} // namespace snappy + +#endif // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ diff --git a/snappy_test_tool.cc b/snappy_test_tool.cc new file mode 100644 index 0000000..c5c195a --- /dev/null +++ b/snappy_test_tool.cc @@ -0,0 +1,461 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include +#include +#include +#include + +#include "snappy-test.h" + +#include "snappy-internal.h" +#include "snappy-sinksource.h" +#include "snappy.h" +#include "snappy_test_data.h" + +DEFINE_int32(start_len, -1, + "Starting prefix size for testing (-1: just full file contents)"); +DEFINE_int32(end_len, -1, + "Starting prefix size for testing (-1: just full file contents)"); +DEFINE_int32(bytes, 10485760, + "How many bytes to compress/uncompress per file for timing"); + +DEFINE_bool(zlib, true, + "Run zlib compression (http://www.zlib.net)"); +DEFINE_bool(lzo, true, + "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); +DEFINE_bool(lz4, true, "Run LZ4 compression (https://github.com/lz4/lz4)"); +DEFINE_bool(snappy, true, "Run snappy compression"); + +DEFINE_bool(write_compressed, false, + "Write compressed versions of each file to .comp"); +DEFINE_bool(write_uncompressed, false, + "Write uncompressed versions of each file to .uncomp"); + +namespace snappy { + +namespace { + +#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) + +// To test against code that reads beyond its input, this class copies a +// string to a newly allocated group of pages, the last of which +// is made unreadable via mprotect. Note that we need to allocate the +// memory with mmap(), as POSIX allows mprotect() only on memory allocated +// with mmap(), and some malloc/posix_memalign implementations expect to +// be able to read previously allocated memory while doing heap allocations. +class DataEndingAtUnreadablePage { + public: + explicit DataEndingAtUnreadablePage(const std::string& s) { + const size_t page_size = sysconf(_SC_PAGESIZE); + const size_t size = s.size(); + // Round up space for string to a multiple of page_size. + size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); + alloc_size_ = space_for_string + page_size; + mem_ = mmap(NULL, alloc_size_, + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + CHECK_NE(MAP_FAILED, mem_); + protected_page_ = reinterpret_cast(mem_) + space_for_string; + char* dst = protected_page_ - size; + std::memcpy(dst, s.data(), size); + data_ = dst; + size_ = size; + // Make guard page unreadable. + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); + } + + ~DataEndingAtUnreadablePage() { + const size_t page_size = sysconf(_SC_PAGESIZE); + // Undo the mprotect. + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); + CHECK_EQ(0, munmap(mem_, alloc_size_)); + } + + const char* data() const { return data_; } + size_t size() const { return size_; } + + private: + size_t alloc_size_; + void* mem_; + char* protected_page_; + const char* data_; + size_t size_; +}; + +#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) + +// Fallback for systems without mmap. +using DataEndingAtUnreadablePage = std::string; + +#endif + +enum CompressorType { ZLIB, LZO, LZ4, SNAPPY }; + +const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"}; + +size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) { + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: + return ZLib::MinCompressbufSize(input_size); +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: + return input_size + input_size/64 + 16 + 3; +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: + return LZ4_compressBound(input_size); +#endif // LZ4_VERSION_NUMBER + + case SNAPPY: + return snappy::MaxCompressedLength(input_size); + + default: + LOG(FATAL) << "Unknown compression type number " << comp; + return 0; + } +} + +// Returns true if we successfully compressed, false otherwise. +// +// If compressed_is_preallocated is set, do not resize the compressed buffer. +// This is typically what you want for a benchmark, in order to not spend +// time in the memory allocator. If you do set this flag, however, +// "compressed" must be preinitialized to at least MinCompressbufSize(comp) +// number of bytes, and may contain junk bytes at the end after return. +bool Compress(const char* input, size_t input_size, CompressorType comp, + std::string* compressed, bool compressed_is_preallocated) { + if (!compressed_is_preallocated) { + compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); + } + + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: { + ZLib zlib; + uLongf destlen = compressed->size(); + int ret = zlib.Compress( + reinterpret_cast(string_as_array(compressed)), + &destlen, + reinterpret_cast(input), + input_size); + CHECK_EQ(Z_OK, ret); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + return true; + } +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: { + unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; + lzo_uint destlen; + int ret = lzo1x_1_15_compress( + reinterpret_cast(input), + input_size, + reinterpret_cast(string_as_array(compressed)), + &destlen, + mem); + CHECK_EQ(LZO_E_OK, ret); + delete[] mem; + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: { + int destlen = compressed->size(); + destlen = LZ4_compress_default(input, string_as_array(compressed), + input_size, destlen); + CHECK(destlen != 0); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } +#endif // LZ4_VERSION_NUMBER + + case SNAPPY: { + size_t destlen; + snappy::RawCompress(input, input_size, + string_as_array(compressed), + &destlen); + CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } + + default: { + return false; // the asked-for library wasn't compiled in + } + } + return true; +} + +bool Uncompress(const std::string& compressed, CompressorType comp, int size, + std::string* output) { + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: { + output->resize(size); + ZLib zlib; + uLongf destlen = output->size(); + int ret = zlib.Uncompress( + reinterpret_cast(string_as_array(output)), + &destlen, + reinterpret_cast(compressed.data()), + compressed.size()); + CHECK_EQ(Z_OK, ret); + CHECK_EQ(static_cast(size), destlen); + break; + } +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: { + output->resize(size); + lzo_uint destlen; + int ret = lzo1x_decompress( + reinterpret_cast(compressed.data()), + compressed.size(), + reinterpret_cast(string_as_array(output)), + &destlen, + NULL); + CHECK_EQ(LZO_E_OK, ret); + CHECK_EQ(static_cast(size), destlen); + break; + } +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: { + output->resize(size); + int destlen = output->size(); + destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output), + compressed.size(), destlen); + CHECK(destlen != 0); + CHECK_EQ(size, destlen); + break; + } +#endif // LZ4_VERSION_NUMBER + case SNAPPY: { + snappy::RawUncompress(compressed.data(), compressed.size(), + string_as_array(output)); + break; + } + + default: { + return false; // the asked-for library wasn't compiled in + } + } + return true; +} + +void Measure(const char* data, size_t length, CompressorType comp, int repeats, + int block_size) { + // Run tests a few time and pick median running times + static const int kRuns = 5; + double ctime[kRuns]; + double utime[kRuns]; + int compressed_size = 0; + + { + // Chop the input into blocks + int num_blocks = (length + block_size - 1) / block_size; + std::vector input(num_blocks); + std::vector input_length(num_blocks); + std::vector compressed(num_blocks); + std::vector output(num_blocks); + for (int b = 0; b < num_blocks; ++b) { + int input_start = b * block_size; + int input_limit = std::min((b+1)*block_size, length); + input[b] = data+input_start; + input_length[b] = input_limit-input_start; + } + + // Pre-grow the output buffers so we don't measure string append time. + for (std::string& compressed_block : compressed) { + compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); + } + + // First, try one trial compression to make sure the code is compiled in + if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { + LOG(WARNING) << "Skipping " << names[comp] << ": " + << "library not compiled in"; + return; + } + + for (int run = 0; run < kRuns; ++run) { + CycleTimer ctimer, utimer; + + // Pre-grow the output buffers so we don't measure string append time. + for (std::string& compressed_block : compressed) { + compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); + } + + ctimer.Start(); + for (int b = 0; b < num_blocks; ++b) { + for (int i = 0; i < repeats; ++i) + Compress(input[b], input_length[b], comp, &compressed[b], true); + } + ctimer.Stop(); + + // Compress once more, with resizing, so we don't leave junk + // at the end that will confuse the decompressor. + for (int b = 0; b < num_blocks; ++b) { + Compress(input[b], input_length[b], comp, &compressed[b], false); + } + + for (int b = 0; b < num_blocks; ++b) { + output[b].resize(input_length[b]); + } + + utimer.Start(); + for (int i = 0; i < repeats; ++i) { + for (int b = 0; b < num_blocks; ++b) + Uncompress(compressed[b], comp, input_length[b], &output[b]); + } + utimer.Stop(); + + ctime[run] = ctimer.Get(); + utime[run] = utimer.Get(); + } + + compressed_size = 0; + for (const std::string& compressed_item : compressed) { + compressed_size += compressed_item.size(); + } + } + + std::sort(ctime, ctime + kRuns); + std::sort(utime, utime + kRuns); + const int med = kRuns/2; + + float comp_rate = (length / ctime[med]) * repeats / 1048576.0; + float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; + std::string x = names[comp]; + x += ":"; + std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) + : std::string("?"); + std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " + "comp %5.1f MB/s uncomp %5s MB/s\n", + x.c_str(), + block_size/(1<<20), + static_cast(length), static_cast(compressed_size), + (compressed_size * 100.0) / std::max(1, length), + comp_rate, + urate.c_str()); +} + +void CompressFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + + std::string compressed; + Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); + + CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, + file::Defaults())); +} + +void UncompressFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + + size_t uncompLength; + CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(), + &uncompLength)); + + std::string uncompressed; + uncompressed.resize(uncompLength); + CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); + + CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, + file::Defaults())); +} + +void MeasureFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + std::printf("%-40s :\n", fname); + + int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len; + int end_len = fullinput.size(); + if (FLAGS_end_len >= 0) { + end_len = std::min(fullinput.size(), FLAGS_end_len); + } + for (int len = start_len; len <= end_len; ++len) { + const char* const input = fullinput.data(); + int repeats = (FLAGS_bytes + len) / (len + 1); + if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024 << 10); + if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024 << 10); + if (FLAGS_lz4) Measure(input, len, LZ4, repeats, 1024 << 10); + if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096 << 10); + + // For block-size based measurements + if (0 && FLAGS_snappy) { + Measure(input, len, SNAPPY, repeats, 8<<10); + Measure(input, len, SNAPPY, repeats, 16<<10); + Measure(input, len, SNAPPY, repeats, 32<<10); + Measure(input, len, SNAPPY, repeats, 64<<10); + Measure(input, len, SNAPPY, repeats, 256<<10); + Measure(input, len, SNAPPY, repeats, 1024<<10); + } + } +} + +} // namespace + +} // namespace snappy + +int main(int argc, char** argv) { + InitGoogle(argv[0], &argc, &argv, true); + + for (int arg = 1; arg < argc; ++arg) { + if (FLAGS_write_compressed) { + snappy::CompressFile(argv[arg]); + } else if (FLAGS_write_uncompressed) { + snappy::UncompressFile(argv[arg]); + } else { + snappy::MeasureFile(argv[arg]); + } + } + return 0; +} diff --git a/snappy_unittest.cc b/snappy_unittest.cc index 9fae7af..0adacb7 100644 --- a/snappy_unittest.cc +++ b/snappy_unittest.cc @@ -26,10 +26,9 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include - -#include #include #include #include @@ -37,31 +36,12 @@ #include "snappy-test.h" -#include "benchmark/benchmark.h" #include "gtest/gtest.h" -#include "snappy.h" #include "snappy-internal.h" #include "snappy-sinksource.h" - -DEFINE_int32(start_len, -1, - "Starting prefix size for testing (-1: just full file contents)"); -DEFINE_int32(end_len, -1, - "Starting prefix size for testing (-1: just full file contents)"); -DEFINE_int32(bytes, 10485760, - "How many bytes to compress/uncompress per file for timing"); - -DEFINE_bool(zlib, false, - "Run zlib compression (http://www.zlib.net)"); -DEFINE_bool(lzo, false, - "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); -DEFINE_bool(lz4, false, "Run LZ4 compression (https://github.com/lz4/lz4)"); -DEFINE_bool(snappy, true, "Run snappy compression"); - -DEFINE_bool(write_compressed, false, - "Write compressed versions of each file to .comp"); -DEFINE_bool(write_uncompressed, false, - "Write uncompressed versions of each file to .uncomp"); +#include "snappy.h" +#include "snappy_test_data.h" DEFINE_bool(snappy_dump_decompression_table, false, "If true, we print the decompression table during tests."); @@ -123,273 +103,6 @@ using DataEndingAtUnreadablePage = std::string; #endif -enum CompressorType { ZLIB, LZO, LZ4, SNAPPY }; - -const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"}; - -size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) { - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: - return ZLib::MinCompressbufSize(input_size); -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: - return input_size + input_size/64 + 16 + 3; -#endif // LZO_VERSION - -#ifdef LZ4_VERSION_NUMBER - case LZ4: - return LZ4_compressBound(input_size); -#endif // LZ4_VERSION_NUMBER - - case SNAPPY: - return snappy::MaxCompressedLength(input_size); - - default: - LOG(FATAL) << "Unknown compression type number " << comp; - return 0; - } -} - -// Returns true if we successfully compressed, false otherwise. -// -// If compressed_is_preallocated is set, do not resize the compressed buffer. -// This is typically what you want for a benchmark, in order to not spend -// time in the memory allocator. If you do set this flag, however, -// "compressed" must be preinitialized to at least MinCompressbufSize(comp) -// number of bytes, and may contain junk bytes at the end after return. -bool Compress(const char* input, size_t input_size, CompressorType comp, - std::string* compressed, bool compressed_is_preallocated) { - if (!compressed_is_preallocated) { - compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); - } - - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: { - ZLib zlib; - uLongf destlen = compressed->size(); - int ret = zlib.Compress( - reinterpret_cast(string_as_array(compressed)), - &destlen, - reinterpret_cast(input), - input_size); - CHECK_EQ(Z_OK, ret); - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - return true; - } -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: { - unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; - lzo_uint destlen; - int ret = lzo1x_1_15_compress( - reinterpret_cast(input), - input_size, - reinterpret_cast(string_as_array(compressed)), - &destlen, - mem); - CHECK_EQ(LZO_E_OK, ret); - delete[] mem; - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - break; - } -#endif // LZO_VERSION - -#ifdef LZ4_VERSION_NUMBER - case LZ4: { - int destlen = compressed->size(); - destlen = LZ4_compress_default(input, string_as_array(compressed), - input_size, destlen); - CHECK(destlen != 0); - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - break; - } -#endif // LZ4_VERSION_NUMBER - - case SNAPPY: { - size_t destlen; - snappy::RawCompress(input, input_size, - string_as_array(compressed), - &destlen); - CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - break; - } - - default: { - return false; // the asked-for library wasn't compiled in - } - } - return true; -} - -bool Uncompress(const std::string& compressed, CompressorType comp, int size, - std::string* output) { - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: { - output->resize(size); - ZLib zlib; - uLongf destlen = output->size(); - int ret = zlib.Uncompress( - reinterpret_cast(string_as_array(output)), - &destlen, - reinterpret_cast(compressed.data()), - compressed.size()); - CHECK_EQ(Z_OK, ret); - CHECK_EQ(static_cast(size), destlen); - break; - } -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: { - output->resize(size); - lzo_uint destlen; - int ret = lzo1x_decompress( - reinterpret_cast(compressed.data()), - compressed.size(), - reinterpret_cast(string_as_array(output)), - &destlen, - NULL); - CHECK_EQ(LZO_E_OK, ret); - CHECK_EQ(static_cast(size), destlen); - break; - } -#endif // LZO_VERSION - -#ifdef LZ4_VERSION_NUMBER - case LZ4: { - output->resize(size); - int destlen = output->size(); - destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output), - compressed.size(), destlen); - CHECK(destlen != 0); - CHECK_EQ(size, destlen); - break; - } -#endif // LZ4_VERSION_NUMBER - case SNAPPY: { - snappy::RawUncompress(compressed.data(), compressed.size(), - string_as_array(output)); - break; - } - - default: { - return false; // the asked-for library wasn't compiled in - } - } - return true; -} - -void Measure(const char* data, size_t length, CompressorType comp, int repeats, - int block_size) { - // Run tests a few time and pick median running times - static const int kRuns = 5; - double ctime[kRuns]; - double utime[kRuns]; - int compressed_size = 0; - - { - // Chop the input into blocks - int num_blocks = (length + block_size - 1) / block_size; - std::vector input(num_blocks); - std::vector input_length(num_blocks); - std::vector compressed(num_blocks); - std::vector output(num_blocks); - for (int b = 0; b < num_blocks; ++b) { - int input_start = b * block_size; - int input_limit = std::min((b+1)*block_size, length); - input[b] = data+input_start; - input_length[b] = input_limit-input_start; - } - - // Pre-grow the output buffers so we don't measure string append time. - for (std::string& compressed_block : compressed) { - compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); - } - - // First, try one trial compression to make sure the code is compiled in - if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { - LOG(WARNING) << "Skipping " << names[comp] << ": " - << "library not compiled in"; - return; - } - - for (int run = 0; run < kRuns; ++run) { - CycleTimer ctimer, utimer; - - // Pre-grow the output buffers so we don't measure string append time. - for (std::string& compressed_block : compressed) { - compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); - } - - ctimer.Start(); - for (int b = 0; b < num_blocks; ++b) { - for (int i = 0; i < repeats; ++i) - Compress(input[b], input_length[b], comp, &compressed[b], true); - } - ctimer.Stop(); - - // Compress once more, with resizing, so we don't leave junk - // at the end that will confuse the decompressor. - for (int b = 0; b < num_blocks; ++b) { - Compress(input[b], input_length[b], comp, &compressed[b], false); - } - - for (int b = 0; b < num_blocks; ++b) { - output[b].resize(input_length[b]); - } - - utimer.Start(); - for (int i = 0; i < repeats; ++i) { - for (int b = 0; b < num_blocks; ++b) - Uncompress(compressed[b], comp, input_length[b], &output[b]); - } - utimer.Stop(); - - ctime[run] = ctimer.Get(); - utime[run] = utimer.Get(); - } - - compressed_size = 0; - for (const std::string& compressed_item : compressed) { - compressed_size += compressed_item.size(); - } - } - - std::sort(ctime, ctime + kRuns); - std::sort(utime, utime + kRuns); - const int med = kRuns/2; - - float comp_rate = (length / ctime[med]) * repeats / 1048576.0; - float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; - std::string x = names[comp]; - x += ":"; - std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) - : std::string("?"); - std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " - "comp %5.1f MB/s uncomp %5s MB/s\n", - x.c_str(), - block_size/(1<<20), - static_cast(length), static_cast(compressed_size), - (compressed_size * 100.0) / std::max(1, length), - comp_rate, - urate.c_str()); -} - int VerifyString(const std::string& input) { std::string compressed; DataEndingAtUnreadablePage i(input); @@ -633,7 +346,7 @@ TEST(CorruptedTest, VerifyCorrupted) { // These mirror the compression code in snappy.cc, but are copied // here so that we can bypass some limitations in the how snappy.cc // invokes these routines. -static void AppendLiteral(std::string* dst, const std::string& literal) { +void AppendLiteral(std::string* dst, const std::string& literal) { if (literal.empty()) return; int n = literal.size() - 1; if (n < 60) { @@ -653,7 +366,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) { *dst += literal; } -static void AppendCopy(std::string* dst, int offset, int length) { +void AppendCopy(std::string* dst, int offset, int length) { while (length > 0) { // Figure out how much to copy in one shot int to_copy; @@ -1241,382 +954,13 @@ TEST(Snappy, VerifyCharTable) { } } -void CompressFile(const char* fname) { - std::string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - - std::string compressed; - Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); - - CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, - file::Defaults())); -} - -void UncompressFile(const char* fname) { - std::string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - - size_t uncompLength; - CHECK(CheckUncompressedLength(fullinput, &uncompLength)); - - std::string uncompressed; - uncompressed.resize(uncompLength); - CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); - - CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, - file::Defaults())); -} - -void MeasureFile(const char* fname) { - std::string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - std::printf("%-40s :\n", fname); - - int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len; - int end_len = fullinput.size(); - if (FLAGS_end_len >= 0) { - end_len = std::min(fullinput.size(), FLAGS_end_len); - } - for (int len = start_len; len <= end_len; ++len) { - const char* const input = fullinput.data(); - int repeats = (FLAGS_bytes + len) / (len + 1); - if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024 << 10); - if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024 << 10); - if (FLAGS_lz4) Measure(input, len, LZ4, repeats, 1024 << 10); - if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096 << 10); - - // For block-size based measurements - if (0 && FLAGS_snappy) { - Measure(input, len, SNAPPY, repeats, 8<<10); - Measure(input, len, SNAPPY, repeats, 16<<10); - Measure(input, len, SNAPPY, repeats, 32<<10); - Measure(input, len, SNAPPY, repeats, 64<<10); - Measure(input, len, SNAPPY, repeats, 256<<10); - Measure(input, len, SNAPPY, repeats, 1024<<10); - } - } -} - -static struct { - const char* label; - const char* filename; - size_t size_limit; -} files[] = { - { "html", "html", 0 }, - { "urls", "urls.10K", 0 }, - { "jpg", "fireworks.jpeg", 0 }, - { "jpg_200", "fireworks.jpeg", 200 }, - { "pdf", "paper-100k.pdf", 0 }, - { "html4", "html_x_4", 0 }, - { "txt1", "alice29.txt", 0 }, - { "txt2", "asyoulik.txt", 0 }, - { "txt3", "lcet10.txt", 0 }, - { "txt4", "plrabn12.txt", 0 }, - { "pb", "geo.protodata", 0 }, - { "gaviota", "kppkn.gtb", 0 }, -}; - TEST(Snappy, TestBenchmarkFiles) { - for (int i = 0; i < ARRAYSIZE(files); ++i) { - Verify(ReadTestDataFile(files[i].filename, files[i].size_limit)); + for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) { + Verify(ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit)); } } -void BM_UFlat(benchmark::State& state) { - // Pick file to process based on state.range(0). - int file_index = state.range(0); - - CHECK_GE(file_index, 0); - CHECK_LT(file_index, ARRAYSIZE(files)); - std::string contents = ReadTestDataFile(files[file_index].filename, - files[file_index].size_limit); - - std::string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - char* dst = new char[contents.size()]; - - for (auto s : state) { - CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); - benchmark::DoNotOptimize(dst); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - static_cast(contents.size())); - state.SetLabel(files[file_index].label); - - delete[] dst; -} -BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1); - -struct SourceFiles { - SourceFiles() { - for (int i = 0; i < kFiles; i++) { - std::string contents = - ReadTestDataFile(files[i].filename, files[i].size_limit); - max_size = std::max(max_size, contents.size()); - sizes[i] = contents.size(); - snappy::Compress(contents.data(), contents.size(), &zcontents[i]); - } - } - static constexpr int kFiles = ARRAYSIZE(files); - std::string zcontents[kFiles]; - size_t sizes[kFiles]; - size_t max_size = 0; -}; - -void BM_UFlatMedley(benchmark::State& state) { - static const SourceFiles* const source = new SourceFiles(); - - std::vector dst(source->max_size); - - for (auto s : state) { - for (int i = 0; i < SourceFiles::kFiles; i++) { - CHECK(snappy::RawUncompress(source->zcontents[i].data(), - source->zcontents[i].size(), dst.data())); - benchmark::DoNotOptimize(dst); - } - } - - int64_t source_sizes = 0; - for (int i = 0; i < SourceFiles::kFiles; i++) { - source_sizes += static_cast(source->sizes[i]); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - source_sizes); -} -BENCHMARK(BM_UFlatMedley); - -void BM_UValidate(benchmark::State& state) { - // Pick file to process based on state.range(0). - int file_index = state.range(0); - - CHECK_GE(file_index, 0); - CHECK_LT(file_index, ARRAYSIZE(files)); - std::string contents = ReadTestDataFile(files[file_index].filename, - files[file_index].size_limit); - - std::string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - - for (auto s : state) { - CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - static_cast(contents.size())); - state.SetLabel(files[file_index].label); -} -BENCHMARK(BM_UValidate)->DenseRange(0, ARRAYSIZE(files) - 1); - -void BM_UValidateMedley(benchmark::State& state) { - static const SourceFiles* const source = new SourceFiles(); - - for (auto s : state) { - for (int i = 0; i < SourceFiles::kFiles; i++) { - CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(), - source->zcontents[i].size())); - } - } - - int64_t source_sizes = 0; - for (int i = 0; i < SourceFiles::kFiles; i++) { - source_sizes += static_cast(source->sizes[i]); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - source_sizes); -} -BENCHMARK(BM_UValidateMedley); - -void BM_UIOVec(benchmark::State& state) { - // Pick file to process based on state.range(0). - int file_index = state.range(0); - - CHECK_GE(file_index, 0); - CHECK_LT(file_index, ARRAYSIZE(files)); - std::string contents = ReadTestDataFile(files[file_index].filename, - files[file_index].size_limit); - - std::string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - - // Uncompress into an iovec containing ten entries. - const int kNumEntries = 10; - struct iovec iov[kNumEntries]; - char *dst = new char[contents.size()]; - size_t used_so_far = 0; - for (int i = 0; i < kNumEntries; ++i) { - iov[i].iov_base = dst + used_so_far; - if (used_so_far == contents.size()) { - iov[i].iov_len = 0; - continue; - } - - if (i == kNumEntries - 1) { - iov[i].iov_len = contents.size() - used_so_far; - } else { - iov[i].iov_len = contents.size() / kNumEntries; - } - used_so_far += iov[i].iov_len; - } - - for (auto s : state) { - CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, - kNumEntries)); - benchmark::DoNotOptimize(iov); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - static_cast(contents.size())); - state.SetLabel(files[file_index].label); - - delete[] dst; -} -BENCHMARK(BM_UIOVec)->DenseRange(0, 4); - -void BM_UFlatSink(benchmark::State& state) { - // Pick file to process based on state.range(0). - int file_index = state.range(0); - - CHECK_GE(file_index, 0); - CHECK_LT(file_index, ARRAYSIZE(files)); - std::string contents = ReadTestDataFile(files[file_index].filename, - files[file_index].size_limit); - - std::string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - char* dst = new char[contents.size()]; - - for (auto s : state) { - snappy::ByteArraySource source(zcontents.data(), zcontents.size()); - snappy::UncheckedByteArraySink sink(dst); - CHECK(snappy::Uncompress(&source, &sink)); - benchmark::DoNotOptimize(sink); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - static_cast(contents.size())); - state.SetLabel(files[file_index].label); - - std::string s(dst, contents.size()); - CHECK_EQ(contents, s); - - delete[] dst; -} - -BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1); - -void BM_ZFlat(benchmark::State& state) { - // Pick file to process based on state.range(0). - int file_index = state.range(0); - - CHECK_GE(file_index, 0); - CHECK_LT(file_index, ARRAYSIZE(files)); - std::string contents = ReadTestDataFile(files[file_index].filename, - files[file_index].size_limit); - char* dst = new char[snappy::MaxCompressedLength(contents.size())]; - - size_t zsize = 0; - for (auto s : state) { - snappy::RawCompress(contents.data(), contents.size(), dst, &zsize); - benchmark::DoNotOptimize(dst); - } - state.SetBytesProcessed(static_cast(state.iterations()) * - static_cast(contents.size())); - const double compression_ratio = - static_cast(zsize) / std::max(1, contents.size()); - state.SetLabel(StrFormat("%s (%.2f %%)", files[file_index].label, - 100.0 * compression_ratio)); - VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", - files[file_index].label, contents.size(), zsize); - delete[] dst; -} -BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1); - -void BM_ZFlatAll(benchmark::State& state) { - const int num_files = ARRAYSIZE(files); - - std::vector contents(num_files); - std::vector dst(num_files); - - int64_t total_contents_size = 0; - for (int i = 0; i < num_files; ++i) { - contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit); - dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())]; - total_contents_size += contents[i].size(); - } - - size_t zsize = 0; - for (auto s : state) { - for (int i = 0; i < num_files; ++i) { - snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], - &zsize); - benchmark::DoNotOptimize(dst); - } - } - - state.SetBytesProcessed(static_cast(state.iterations()) * - total_contents_size); - - for (char* dst_item : dst) { - delete[] dst_item; - } - state.SetLabel(StrFormat("%d files", num_files)); -} -BENCHMARK(BM_ZFlatAll); - -void BM_ZFlatIncreasingTableSize(benchmark::State& state) { - CHECK_GT(ARRAYSIZE(files), 0); - const std::string base_content = - ReadTestDataFile(files[0].filename, files[0].size_limit); - - std::vector contents; - std::vector dst; - int64_t total_contents_size = 0; - for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits; - ++table_bits) { - std::string content = base_content; - content.resize(1 << table_bits); - dst.push_back(new char[snappy::MaxCompressedLength(content.size())]); - total_contents_size += content.size(); - contents.push_back(std::move(content)); - } - - size_t zsize = 0; - for (auto s : state) { - for (size_t i = 0; i < contents.size(); ++i) { - snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], - &zsize); - benchmark::DoNotOptimize(dst); - } - } - - state.SetBytesProcessed(static_cast(state.iterations()) * - total_contents_size); - - for (char* dst_item : dst) { - delete[] dst_item; - } - state.SetLabel(StrFormat("%d tables", contents.size())); -} -BENCHMARK(BM_ZFlatIncreasingTableSize); - } // namespace } // namespace snappy - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - InitGoogle(argv[0], &argc, &argv, true); - ::benchmark::RunSpecifiedBenchmarks(); - - if (argc >= 2) { - for (int arg = 1; arg < argc; ++arg) { - if (FLAGS_write_compressed) { - snappy::CompressFile(argv[arg]); - } else if (FLAGS_write_uncompressed) { - snappy::UncompressFile(argv[arg]); - } else { - snappy::MeasureFile(argv[arg]); - } - } - return 0; - } - - return RUN_ALL_TESTS(); -}