From 4f0adca400ed519c463f8bbc3031296e38dd746d Mon Sep 17 00:00:00 2001 From: costan Date: Tue, 8 Jan 2019 06:06:34 -0800 Subject: [PATCH] Wrap BMI2 instruction usage in support checks. A previous version of this was submitted and rolled back due to breakage -- an attempt to accommodate Visual Studio resulted in compiler errors on GCC/Clang with -mavx2 but without -mbmi2. This version makes the BMI2 support check more strict, to avoid the errors. A previous CL introduced _bzhi_u32 (part of Intel's BMI2 instruction set, released in Haswell) gated by a check for the __BMI2__ preprocessor macro. This works for Clang and GCC, but does not work on Visual Studio, and may not work on other compilers. This CL plumbs the BMI2 support checks through the CMake configuration used by the open source build. It also replaces the header, which does not exist on Visual Studio, with the more scoped headers (for SSSE3) and (for BMI2/AVX2). Asides from fixing the open source build, the more scoped headers make it slightly less likely that newer intrinsics will creep in without proper gating. --- .appveyor.yml | 2 +- .travis.yml | 15 ++++++++++++--- CMakeLists.txt | 24 ++++++++++++++++++++++-- cmake/config.h.in | 3 +++ snappy.cc | 27 +++++++++++++++++++++++++-- 5 files changed, 63 insertions(+), 8 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index f2329ec..bc0d01e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -30,7 +30,7 @@ build_script: - if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64 - cmake --version - cmake .. -G "%CMAKE_GENERATOR%" - -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX=ON + -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX2=ON - cmake --build . --config %CONFIGURATION% - cd .. diff --git a/.travis.yml b/.travis.yml index 9bbf346..1f67471 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,10 @@ os: - osx env: - - BUILD_TYPE=Debug - - BUILD_TYPE=RelWithDebInfo + - BUILD_TYPE=Debug CPU_LEVEL=AVX + - BUILD_TYPE=Debug CPU_LEVEL=AVX2 + - BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX + - BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2 matrix: exclude: @@ -22,6 +24,12 @@ matrix: # https://github.com/travis-ci/travis-ci/issues/9640 - compiler: gcc os: osx + # Travis OSX servers seem to run on pre-Haswell CPUs. Attempting to use AVX2 + # results in crashes. + - env: BUILD_TYPE=Debug CPU_LEVEL=AVX2 + os: osx + - env: BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2 + os: osx addons: apt: @@ -61,7 +69,8 @@ install: before_script: - mkdir -p build && cd build -- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSNAPPY_REQUIRE_AVX=ON +- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE + -DSNAPPY_REQUIRE_${CPU_LEVEL}=ON - cmake --build . - cd .. diff --git a/CMakeLists.txt b/CMakeLists.txt index 41a2124..6d3a157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,8 @@ option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) +option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) + include(TestBigEndian) test_big_endian(SNAPPY_IS_BIG_ENDIAN) @@ -33,15 +35,27 @@ check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) +CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) -if (SNAPPY_REQUIRE_AVX) +CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) +if(SNAPPY_REQUIRE_AVX2) + if(HAVE_VISUAL_STUDIO_ARCH_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX2) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) + if(HAVE_CLANG_MBMI2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2") + endif(HAVE_CLANG_MBMI2) +elseif (SNAPPY_REQUIRE_AVX) if(HAVE_VISUAL_STUDIO_ARCH_AVX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") endif(HAVE_VISUAL_STUDIO_ARCH_AVX) if(HAVE_CLANG_MAVX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") endif(HAVE_CLANG_MAVX) -endif(SNAPPY_REQUIRE_AVX) +endif(SNAPPY_REQUIRE_AVX2) include(CheckCXXSourceCompiles) check_cxx_source_compiles(" @@ -66,6 +80,12 @@ int main() { return 0; }" SNAPPY_HAVE_SSSE3) +check_cxx_source_compiles(" +#include +int main() { + return _bzhi_u32(0, 1); +}" SNAPPY_HAVE_BMI2) + include(CheckSymbolExists) check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) diff --git a/cmake/config.h.in b/cmake/config.h.in index 3027b62..24f27ef 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -52,6 +52,9 @@ /* Define to 1 if you target processors with SSSE3+ and have . */ #cmakedefine01 SNAPPY_HAVE_SSSE3 +/* Define to 1 if you target processors with BMI2+ and have . */ +#cmakedefine01 SNAPPY_HAVE_BMI2 + /* Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ #cmakedefine SNAPPY_IS_BIG_ENDIAN 1 diff --git a/snappy.cc b/snappy.cc index e35eb7c..4ab26d7 100644 --- a/snappy.cc +++ b/snappy.cc @@ -41,8 +41,31 @@ #endif #endif // !defined(SNAPPY_HAVE_SSSE3) +#if !defined(SNAPPY_HAVE_BMI2) +// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 +// specifically, but it does define __AVX2__ when AVX2 support is available. +// Fortunately, AVX2 was introduced in Haswell, just like BMI2. +// +// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, +// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which +// case issuing BMI2 instructions results in a compiler error. +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +#define SNAPPY_HAVE_BMI2 1 +#else +#define SNAPPY_HAVE_BMI2 0 +#endif +#endif // !defined(SNAPPY_HAVE_BMI2) + #if SNAPPY_HAVE_SSSE3 -#include +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#endif + +#if SNAPPY_HAVE_BMI2 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include #endif #include @@ -699,7 +722,7 @@ static inline uint32 ExtractLowBytes(uint32 v, int n) { assert(n <= 4); // TODO(b/121042345): Remove !defined(MEMORY_SANITIZER) once MSan // handles _bzhi_u32() correctly. -#if defined(__BMI2__) && !defined(MEMORY_SANITIZER) +#if SNAPPY_HAVE_BMI2 && !defined(MEMORY_SANITIZER) return _bzhi_u32(v, 8 * n); #else // This needs to be wider than uint32 otherwise `mask << 32` will be