mirror of https://github.com/google/snappy.git
Wrap BMI2 instruction usage in support checks.
A previous version of this was submitted and rolled back due to breakage -- an attempt to accommodate Visual Studio resulted in compiler errors on GCC/Clang with -mavx2 but without -mbmi2. This version makes the BMI2 support check more strict, to avoid the errors. A previous CL introduced _bzhi_u32 (part of Intel's BMI2 instruction set, released in Haswell) gated by a check for the __BMI2__ preprocessor macro. This works for Clang and GCC, but does not work on Visual Studio, and may not work on other compilers. This CL plumbs the BMI2 support checks through the CMake configuration used by the open source build. It also replaces the <x86intrin.h> header, which does not exist on Visual Studio, with the more scoped headers <tmmintrin.h> (for SSSE3) and <immintrin.h> (for BMI2/AVX2). Asides from fixing the open source build, the more scoped headers make it slightly less likely that newer intrinsics will creep in without proper gating.
This commit is contained in:
parent
46768e335d
commit
4f0adca400
|
@ -30,7 +30,7 @@ build_script:
|
|||
- if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64
|
||||
- cmake --version
|
||||
- cmake .. -G "%CMAKE_GENERATOR%"
|
||||
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX=ON
|
||||
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX2=ON
|
||||
- cmake --build . --config %CONFIGURATION%
|
||||
- cd ..
|
||||
|
||||
|
|
15
.travis.yml
15
.travis.yml
|
@ -13,8 +13,10 @@ os:
|
|||
- osx
|
||||
|
||||
env:
|
||||
- BUILD_TYPE=Debug
|
||||
- BUILD_TYPE=RelWithDebInfo
|
||||
- BUILD_TYPE=Debug CPU_LEVEL=AVX
|
||||
- BUILD_TYPE=Debug CPU_LEVEL=AVX2
|
||||
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX
|
||||
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
|
||||
|
||||
matrix:
|
||||
exclude:
|
||||
|
@ -22,6 +24,12 @@ matrix:
|
|||
# https://github.com/travis-ci/travis-ci/issues/9640
|
||||
- compiler: gcc
|
||||
os: osx
|
||||
# Travis OSX servers seem to run on pre-Haswell CPUs. Attempting to use AVX2
|
||||
# results in crashes.
|
||||
- env: BUILD_TYPE=Debug CPU_LEVEL=AVX2
|
||||
os: osx
|
||||
- env: BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
|
||||
os: osx
|
||||
|
||||
addons:
|
||||
apt:
|
||||
|
@ -61,7 +69,8 @@ install:
|
|||
|
||||
before_script:
|
||||
- mkdir -p build && cd build
|
||||
- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSNAPPY_REQUIRE_AVX=ON
|
||||
- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE
|
||||
-DSNAPPY_REQUIRE_${CPU_LEVEL}=ON
|
||||
- cmake --build .
|
||||
- cd ..
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@ option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON)
|
|||
|
||||
option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
|
||||
|
||||
option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF)
|
||||
|
||||
include(TestBigEndian)
|
||||
test_big_endian(SNAPPY_IS_BIG_ENDIAN)
|
||||
|
||||
|
@ -33,15 +35,27 @@ check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
|
|||
|
||||
include(CheckCXXCompilerFlag)
|
||||
CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
|
||||
CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2)
|
||||
CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
|
||||
if (SNAPPY_REQUIRE_AVX)
|
||||
CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2)
|
||||
if(SNAPPY_REQUIRE_AVX2)
|
||||
if(HAVE_VISUAL_STUDIO_ARCH_AVX2)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
endif(HAVE_VISUAL_STUDIO_ARCH_AVX2)
|
||||
if(HAVE_CLANG_MAVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
|
||||
endif(HAVE_CLANG_MAVX)
|
||||
if(HAVE_CLANG_MBMI2)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2")
|
||||
endif(HAVE_CLANG_MBMI2)
|
||||
elseif (SNAPPY_REQUIRE_AVX)
|
||||
if(HAVE_VISUAL_STUDIO_ARCH_AVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
|
||||
endif(HAVE_VISUAL_STUDIO_ARCH_AVX)
|
||||
if(HAVE_CLANG_MAVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
|
||||
endif(HAVE_CLANG_MAVX)
|
||||
endif(SNAPPY_REQUIRE_AVX)
|
||||
endif(SNAPPY_REQUIRE_AVX2)
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
check_cxx_source_compiles("
|
||||
|
@ -66,6 +80,12 @@ int main() {
|
|||
return 0;
|
||||
}" SNAPPY_HAVE_SSSE3)
|
||||
|
||||
check_cxx_source_compiles("
|
||||
#include <immintrin.h>
|
||||
int main() {
|
||||
return _bzhi_u32(0, 1);
|
||||
}" SNAPPY_HAVE_BMI2)
|
||||
|
||||
include(CheckSymbolExists)
|
||||
check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)
|
||||
check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF)
|
||||
|
|
|
@ -52,6 +52,9 @@
|
|||
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
|
||||
#cmakedefine01 SNAPPY_HAVE_SSSE3
|
||||
|
||||
/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
|
||||
#cmakedefine01 SNAPPY_HAVE_BMI2
|
||||
|
||||
/* Define to 1 if your processor stores words with the most significant byte
|
||||
first (like Motorola and SPARC, unlike Intel and VAX). */
|
||||
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1
|
||||
|
|
27
snappy.cc
27
snappy.cc
|
@ -41,8 +41,31 @@
|
|||
#endif
|
||||
#endif // !defined(SNAPPY_HAVE_SSSE3)
|
||||
|
||||
#if !defined(SNAPPY_HAVE_BMI2)
|
||||
// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
|
||||
// specifically, but it does define __AVX2__ when AVX2 support is available.
|
||||
// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
|
||||
//
|
||||
// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
|
||||
// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
|
||||
// case issuing BMI2 instructions results in a compiler error.
|
||||
#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
|
||||
#define SNAPPY_HAVE_BMI2 1
|
||||
#else
|
||||
#define SNAPPY_HAVE_BMI2 0
|
||||
#endif
|
||||
#endif // !defined(SNAPPY_HAVE_BMI2)
|
||||
|
||||
#if SNAPPY_HAVE_SSSE3
|
||||
#include <x86intrin.h>
|
||||
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
||||
// advanced SSE versions without checking with all the OWNERS.
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if SNAPPY_HAVE_BMI2
|
||||
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
||||
// advanced SSE versions without checking with all the OWNERS.
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -699,7 +722,7 @@ static inline uint32 ExtractLowBytes(uint32 v, int n) {
|
|||
assert(n <= 4);
|
||||
// TODO(b/121042345): Remove !defined(MEMORY_SANITIZER) once MSan
|
||||
// handles _bzhi_u32() correctly.
|
||||
#if defined(__BMI2__) && !defined(MEMORY_SANITIZER)
|
||||
#if SNAPPY_HAVE_BMI2 && !defined(MEMORY_SANITIZER)
|
||||
return _bzhi_u32(v, 8 * n);
|
||||
#else
|
||||
// This needs to be wider than uint32 otherwise `mask << 32` will be
|
||||
|
|
Loading…
Reference in New Issue