Fix Visual Studio build.

Commit 8f469d97e2 introduced SSSE3 fast
paths that are gated by __SSE3__ macro checks and the <x86intrin.h>
header, neither of which exists in Visual Studio. This commit adds logic
for detecting SSSE3 compiler support that works for all compilers
supported by the open source release.

The commit also replaces the header with <tmmintrin.h>, which only
defines intrinsics supported by SSSE3 and below. This should help flag
any use of SIMD instructions that require more advanced SSE support, so
the uses can be gated by checks that also work in the open source
release.

Last, this commit requires C++11 support for the open source build. This is
needed by the alignas specifier, which was also introduced in commit
8f469d97e2.
This commit is contained in:
costan 2018-08-08 14:41:36 -07:00 committed by Victor Costan
parent 27ff0af12a
commit 73c31e824c
5 changed files with 67 additions and 12 deletions

View File

@ -30,7 +30,7 @@ build_script:
- if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64
- cmake --version
- cmake .. -G "%CMAKE_GENERATOR%"
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%"
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" -DSNAPPY_REQUIRE_AVX=ON
- cmake --build . --config %CONFIGURATION%
- cd ..

View File

@ -58,7 +58,7 @@ install:
before_script:
- mkdir -p build && cd build
- cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE
- cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSNAPPY_REQUIRE_AVX=ON
- cmake --build .
- cd ..

View File

@ -1,12 +1,19 @@
cmake_minimum_required(VERSION 3.1)
project(Snappy VERSION 1.1.7 LANGUAGES C CXX)
# This project requires C++11.
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make
# it prominent in the GUI.
option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF)
option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON)
option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
include(TestBigEndian)
test_big_endian(SNAPPY_IS_BIG_ENDIAN)
@ -26,12 +33,40 @@ include(CheckLibraryExists)
check_library_exists(z zlibVersion "" HAVE_LIBZ)
check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
include(CheckCXXSourceCompiles)
check_cxx_source_compiles(
"int main(void) { return __builtin_expect(0, 1); }" HAVE_BUILTIN_EXPECT)
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
if (SNAPPY_REQUIRE_AVX)
if(HAVE_VISUAL_STUDIO_ARCH_AVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
endif(HAVE_VISUAL_STUDIO_ARCH_AVX)
if(HAVE_CLANG_MAVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
endif(HAVE_CLANG_MAVX)
endif(SNAPPY_REQUIRE_AVX)
check_cxx_source_compiles(
"int main(void) { return __builtin_ctzll(0); }" HAVE_BUILTIN_CTZ)
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
int main() {
return __builtin_expect(0, 1);
}" HAVE_BUILTIN_EXPECT)
check_cxx_source_compiles("
int main() {
return __builtin_ctzll(0);
}" HAVE_BUILTIN_CTZ)
check_cxx_source_compiles("
#include <tmmintrin.h>
int main() {
const __m128i *src = 0;
__m128i dest;
const __m128i shuffle_mask = _mm_load_si128(src);
const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask);
_mm_storeu_si128(&dest, pattern);
return 0;
}" SNAPPY_HAVE_SSSE3)
include(CheckSymbolExists)
check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)

View File

@ -55,6 +55,9 @@
/* Define to 1 if you have the <windows.h> header file. */
#cmakedefine HAVE_WINDOWS_H 1
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
#cmakedefine01 SNAPPY_HAVE_SSSE3
/* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1

View File

@ -30,9 +30,23 @@
#include "snappy-internal.h"
#include "snappy-sinksource.h"
#if defined(__SSSE3__)
#include <x86intrin.h>
#if !defined(SNAPPY_HAVE_SSSE3)
// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
// defines __AVX__ when AVX support is available.
#if defined(__SSSE3__) || defined(__AVX__)
#define SNAPPY_HAVE_SSSE3 1
#else
#define SNAPPY_HAVE_SSSE3 0
#endif
#endif // !defined(SNAPPY_HAVE_SSSE3)
#if SNAPPY_HAVE_SSSE3
// Please do not replace with <x86intrin.h>. or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <tmmintrin.h>
#endif
#include <stdio.h>
#include <algorithm>
@ -95,6 +109,9 @@ void UnalignedCopy64(const void* src, void* dst) {
}
void UnalignedCopy128(const void* src, void* dst) {
// memcpy gets vectorized when the appropriate compiler options are used.
// For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load
// and store.
char tmp[16];
memcpy(tmp, src, 16);
memcpy(dst, tmp, 16);
@ -167,7 +184,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
// Handle the uncommon case where pattern is less than 8 bytes.
if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
#if defined(__SSSE3__)
#if SNAPPY_HAVE_SSSE3
// Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
// to permute the register's contents in-place into a repeating sequence of
// the first "pattern_size" bytes.
@ -197,7 +214,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
}
return IncrementalCopySlow(src, op, op_limit);
#else
#else // !SNAPPY_HAVE_SSSE3
// If plenty of buffer space remains, expand the pattern to at least 8
// bytes. The way the following loop is written, we need 8 bytes of buffer
// space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
@ -214,7 +231,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
} else {
return IncrementalCopySlow(src, op, op_limit);
}
#endif
#endif // SNAPPY_HAVE_SSSE3
}
assert(pattern_size >= 8);