Wrap BMI2 instruction usage in support checks.

A previous version of this was submitted and rolled back due to breakage
-- an attempt to accommodate Visual Studio resulted in compiler errors
on GCC/Clang with -mavx2 but without -mbmi2. This version makes the BMI2
support check more strict, to avoid the errors.

A previous CL introduced _bzhi_u32 (part of Intel's BMI2 instruction
set, released in Haswell) gated by a check for the __BMI2__ preprocessor
macro. This works for Clang and GCC, but does not work on Visual Studio,
and may not work on other compilers.

This CL plumbs the BMI2 support checks through the CMake configuration
used by the open source build. It also replaces the <x86intrin.h>
header, which does not exist on Visual Studio, with the more scoped
headers <tmmintrin.h> (for SSSE3) and <immintrin.h> (for BMI2/AVX2).
Asides from fixing the open source build, the more scoped headers make
it slightly less likely that newer intrinsics will creep in without
proper gating.
This commit is contained in:
costan 2019-01-08 06:06:34 -08:00 committed by Victor Costan
parent 46768e335d
commit 4f0adca400
5 changed files with 63 additions and 8 deletions

View File

@ -30,7 +30,7 @@ build_script:
- if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64 - if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64
- cmake --version - cmake --version
- cmake .. -G "%CMAKE_GENERATOR%" - cmake .. -G "%CMAKE_GENERATOR%"
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX=ON -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX2=ON
- cmake --build . --config %CONFIGURATION% - cmake --build . --config %CONFIGURATION%
- cd .. - cd ..

View File

@ -13,8 +13,10 @@ os:
- osx - osx
env: env:
- BUILD_TYPE=Debug - BUILD_TYPE=Debug CPU_LEVEL=AVX
- BUILD_TYPE=RelWithDebInfo - BUILD_TYPE=Debug CPU_LEVEL=AVX2
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX
- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
matrix: matrix:
exclude: exclude:
@ -22,6 +24,12 @@ matrix:
# https://github.com/travis-ci/travis-ci/issues/9640 # https://github.com/travis-ci/travis-ci/issues/9640
- compiler: gcc - compiler: gcc
os: osx os: osx
# Travis OSX servers seem to run on pre-Haswell CPUs. Attempting to use AVX2
# results in crashes.
- env: BUILD_TYPE=Debug CPU_LEVEL=AVX2
os: osx
- env: BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2
os: osx
addons: addons:
apt: apt:
@ -61,7 +69,8 @@ install:
before_script: before_script:
- mkdir -p build && cd build - mkdir -p build && cd build
- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSNAPPY_REQUIRE_AVX=ON - cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE
-DSNAPPY_REQUIRE_${CPU_LEVEL}=ON
- cmake --build . - cmake --build .
- cd .. - cd ..

View File

@ -14,6 +14,8 @@ option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON)
option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF)
include(TestBigEndian) include(TestBigEndian)
test_big_endian(SNAPPY_IS_BIG_ENDIAN) test_big_endian(SNAPPY_IS_BIG_ENDIAN)
@ -33,15 +35,27 @@ check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2)
CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
if (SNAPPY_REQUIRE_AVX) CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2)
if(SNAPPY_REQUIRE_AVX2)
if(HAVE_VISUAL_STUDIO_ARCH_AVX2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
endif(HAVE_VISUAL_STUDIO_ARCH_AVX2)
if(HAVE_CLANG_MAVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
endif(HAVE_CLANG_MAVX)
if(HAVE_CLANG_MBMI2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2")
endif(HAVE_CLANG_MBMI2)
elseif (SNAPPY_REQUIRE_AVX)
if(HAVE_VISUAL_STUDIO_ARCH_AVX) if(HAVE_VISUAL_STUDIO_ARCH_AVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
endif(HAVE_VISUAL_STUDIO_ARCH_AVX) endif(HAVE_VISUAL_STUDIO_ARCH_AVX)
if(HAVE_CLANG_MAVX) if(HAVE_CLANG_MAVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
endif(HAVE_CLANG_MAVX) endif(HAVE_CLANG_MAVX)
endif(SNAPPY_REQUIRE_AVX) endif(SNAPPY_REQUIRE_AVX2)
include(CheckCXXSourceCompiles) include(CheckCXXSourceCompiles)
check_cxx_source_compiles(" check_cxx_source_compiles("
@ -66,6 +80,12 @@ int main() {
return 0; return 0;
}" SNAPPY_HAVE_SSSE3) }" SNAPPY_HAVE_SSSE3)
check_cxx_source_compiles("
#include <immintrin.h>
int main() {
return _bzhi_u32(0, 1);
}" SNAPPY_HAVE_BMI2)
include(CheckSymbolExists) include(CheckSymbolExists)
check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)
check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF)

View File

@ -52,6 +52,9 @@
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */ /* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
#cmakedefine01 SNAPPY_HAVE_SSSE3 #cmakedefine01 SNAPPY_HAVE_SSSE3
/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
#cmakedefine01 SNAPPY_HAVE_BMI2
/* Define to 1 if your processor stores words with the most significant byte /* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */ first (like Motorola and SPARC, unlike Intel and VAX). */
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1 #cmakedefine SNAPPY_IS_BIG_ENDIAN 1

View File

@ -41,8 +41,31 @@
#endif #endif
#endif // !defined(SNAPPY_HAVE_SSSE3) #endif // !defined(SNAPPY_HAVE_SSSE3)
#if !defined(SNAPPY_HAVE_BMI2)
// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
// specifically, but it does define __AVX2__ when AVX2 support is available.
// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
//
// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
// case issuing BMI2 instructions results in a compiler error.
#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
#define SNAPPY_HAVE_BMI2 1
#else
#define SNAPPY_HAVE_BMI2 0
#endif
#endif // !defined(SNAPPY_HAVE_BMI2)
#if SNAPPY_HAVE_SSSE3 #if SNAPPY_HAVE_SSSE3
#include <x86intrin.h> // Please do not replace with <x86intrin.h>. or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <tmmintrin.h>
#endif
#if SNAPPY_HAVE_BMI2
// Please do not replace with <x86intrin.h>. or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <immintrin.h>
#endif #endif
#include <stdio.h> #include <stdio.h>
@ -699,7 +722,7 @@ static inline uint32 ExtractLowBytes(uint32 v, int n) {
assert(n <= 4); assert(n <= 4);
// TODO(b/121042345): Remove !defined(MEMORY_SANITIZER) once MSan // TODO(b/121042345): Remove !defined(MEMORY_SANITIZER) once MSan
// handles _bzhi_u32() correctly. // handles _bzhi_u32() correctly.
#if defined(__BMI2__) && !defined(MEMORY_SANITIZER) #if SNAPPY_HAVE_BMI2 && !defined(MEMORY_SANITIZER)
return _bzhi_u32(v, 8 * n); return _bzhi_u32(v, 8 * n);
#else #else
// This needs to be wider than uint32 otherwise `mask << 32` will be // This needs to be wider than uint32 otherwise `mask << 32` will be