From 73c31e824c2640b78d7dcf02bb83b21f53c843c8 Mon Sep 17 00:00:00 2001 From: costan Date: Wed, 8 Aug 2018 14:41:36 -0700 Subject: [PATCH] Fix Visual Studio build. Commit 8f469d97e2ccbd87e5c00d25b1969daebfbfa350 introduced SSSE3 fast paths that are gated by __SSE3__ macro checks and the header, neither of which exists in Visual Studio. This commit adds logic for detecting SSSE3 compiler support that works for all compilers supported by the open source release. The commit also replaces the header with , which only defines intrinsics supported by SSSE3 and below. This should help flag any use of SIMD instructions that require more advanced SSE support, so the uses can be gated by checks that also work in the open source release. Last, this commit requires C++11 support for the open source build. This is needed by the alignas specifier, which was also introduced in commit 8f469d97e2ccbd87e5c00d25b1969daebfbfa350. --- .appveyor.yml | 2 +- .travis.yml | 2 +- CMakeLists.txt | 45 ++++++++++++++++++++++++++++++++++++++++----- cmake/config.h.in | 3 +++ snappy.cc | 27 ++++++++++++++++++++++----- 5 files changed, 67 insertions(+), 12 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index f1b3e20..f2329ec 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -30,7 +30,7 @@ build_script: - if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64 - cmake --version - cmake .. -G "%CMAKE_GENERATOR%" - -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" + -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX=ON - cmake --build . --config %CONFIGURATION% - cd .. diff --git a/.travis.yml b/.travis.yml index 29fcae2..adb9a6c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,7 +58,7 @@ install: before_script: - mkdir -p build && cd build -- cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE +- cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSNAPPY_REQUIRE_AVX=ON - cmake --build . - cd .. diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f534b1..62ecd09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,19 @@ cmake_minimum_required(VERSION 3.1) project(Snappy VERSION 1.1.7 LANGUAGES C CXX) +# This project requires C++11. +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make # it prominent in the GUI. option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) +option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) + include(TestBigEndian) test_big_endian(SNAPPY_IS_BIG_ENDIAN) @@ -26,12 +33,40 @@ include(CheckLibraryExists) check_library_exists(z zlibVersion "" HAVE_LIBZ) check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) -include(CheckCXXSourceCompiles) -check_cxx_source_compiles( - "int main(void) { return __builtin_expect(0, 1); }" HAVE_BUILTIN_EXPECT) +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) +CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) +if (SNAPPY_REQUIRE_AVX) + if(HAVE_VISUAL_STUDIO_ARCH_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) +endif(SNAPPY_REQUIRE_AVX) -check_cxx_source_compiles( - "int main(void) { return __builtin_ctzll(0); }" HAVE_BUILTIN_CTZ) +include(CheckCXXSourceCompiles) +check_cxx_source_compiles(" +int main() { + return __builtin_expect(0, 1); +}" HAVE_BUILTIN_EXPECT) + +check_cxx_source_compiles(" +int main() { + return __builtin_ctzll(0); +}" HAVE_BUILTIN_CTZ) + +check_cxx_source_compiles(" +#include + +int main() { + const __m128i *src = 0; + __m128i dest; + const __m128i shuffle_mask = _mm_load_si128(src); + const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask); + _mm_storeu_si128(&dest, pattern); + return 0; +}" SNAPPY_HAVE_SSSE3) include(CheckSymbolExists) check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) diff --git a/cmake/config.h.in b/cmake/config.h.in index 97cd818..088ff6f 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -55,6 +55,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_WINDOWS_H 1 +/* Define to 1 if you target processors with SSSE3+ and have . */ +#cmakedefine01 SNAPPY_HAVE_SSSE3 + /* Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ #cmakedefine SNAPPY_IS_BIG_ENDIAN 1 diff --git a/snappy.cc b/snappy.cc index a5703a3..b89d627 100644 --- a/snappy.cc +++ b/snappy.cc @@ -30,9 +30,23 @@ #include "snappy-internal.h" #include "snappy-sinksource.h" -#if defined(__SSSE3__) -#include +#if !defined(SNAPPY_HAVE_SSSE3) +// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD +// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and +// defines __AVX__ when AVX support is available. +#if defined(__SSSE3__) || defined(__AVX__) +#define SNAPPY_HAVE_SSSE3 1 +#else +#define SNAPPY_HAVE_SSSE3 0 #endif +#endif // !defined(SNAPPY_HAVE_SSSE3) + +#if SNAPPY_HAVE_SSSE3 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#endif + #include #include @@ -95,6 +109,9 @@ void UnalignedCopy64(const void* src, void* dst) { } void UnalignedCopy128(const void* src, void* dst) { + // memcpy gets vectorized when the appropriate compiler options are used. + // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load + // and store. char tmp[16]; memcpy(tmp, src, 16); memcpy(dst, tmp, 16); @@ -167,7 +184,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // Handle the uncommon case where pattern is less than 8 bytes. if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { -#if defined(__SSSE3__) +#if SNAPPY_HAVE_SSSE3 // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB // to permute the register's contents in-place into a repeating sequence of // the first "pattern_size" bytes. @@ -197,7 +214,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; } return IncrementalCopySlow(src, op, op_limit); -#else +#else // !SNAPPY_HAVE_SSSE3 // If plenty of buffer space remains, expand the pattern to at least 8 // bytes. The way the following loop is written, we need 8 bytes of buffer // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 @@ -214,7 +231,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, } else { return IncrementalCopySlow(src, op, op_limit); } -#endif +#endif // SNAPPY_HAVE_SSSE3 } assert(pattern_size >= 8);