mirror of https://github.com/google/snappy.git
Make UnalignedCopy64 not exhibit undefined behavior when src and dst overlap.
name old speed new speed delta BM_UFlat/0 3.09GB/s ± 3% 3.07GB/s ± 2% -0.78% (p=0.009 n=19+19) BM_UFlat/1 1.63GB/s ± 2% 1.62GB/s ± 2% ~ (p=0.099 n=19+20) BM_UFlat/2 19.7GB/s ±19% 20.7GB/s ±11% ~ (p=0.054 n=20+19) BM_UFlat/3 1.61GB/s ± 2% 1.60GB/s ± 1% -0.48% (p=0.049 n=20+17) BM_UFlat/4 15.8GB/s ± 7% 15.6GB/s ±10% ~ (p=0.234 n=20+20) BM_UFlat/5 2.47GB/s ± 1% 2.46GB/s ± 2% ~ (p=0.608 n=19+19) BM_UFlat/6 1.07GB/s ± 2% 1.07GB/s ± 1% ~ (p=0.128 n=20+19) BM_UFlat/7 1.01GB/s ± 1% 1.00GB/s ± 2% ~ (p=0.656 n=15+19) BM_UFlat/8 1.13GB/s ± 1% 1.13GB/s ± 1% ~ (p=0.532 n=18+19) BM_UFlat/9 918MB/s ± 1% 916MB/s ± 1% ~ (p=0.443 n=19+18) BM_UFlat/10 3.90GB/s ± 1% 3.90GB/s ± 1% ~ (p=0.895 n=20+19) BM_UFlat/11 1.30GB/s ± 1% 1.29GB/s ± 2% ~ (p=0.156 n=19+19) BM_UFlat/12 2.35GB/s ± 2% 2.34GB/s ± 1% ~ (p=0.349 n=19+17) BM_UFlat/13 2.07GB/s ± 1% 2.06GB/s ± 2% ~ (p=0.475 n=18+19) BM_UFlat/14 2.23GB/s ± 1% 2.23GB/s ± 1% ~ (p=0.983 n=19+19) BM_UFlat/15 1.55GB/s ± 1% 1.55GB/s ± 1% ~ (p=0.314 n=19+19) BM_UFlat/16 1.26GB/s ± 1% 1.26GB/s ± 1% ~ (p=0.907 n=15+18) BM_UFlat/17 2.32GB/s ± 1% 2.32GB/s ± 1% ~ (p=0.604 n=18+19) BM_UFlat/18 1.61GB/s ± 1% 1.61GB/s ± 1% ~ (p=0.212 n=18+19) BM_UFlat/19 1.78GB/s ± 1% 1.78GB/s ± 2% ~ (p=0.350 n=19+19) BM_UFlat/20 1.89GB/s ± 1% 1.90GB/s ± 2% ~ (p=0.092 n=19+19) Also tested the current version against UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)), there is no difference (old is memcpy, new is UNALIGNED*): name old speed new speed delta BM_UFlat/0 3.14GB/s ± 1% 3.16GB/s ± 2% ~ (p=0.156 n=19+19) BM_UFlat/1 1.62GB/s ± 1% 1.61GB/s ± 2% ~ (p=0.102 n=19+20) BM_UFlat/2 18.8GB/s ±17% 19.1GB/s ±11% ~ (p=0.390 n=20+16) BM_UFlat/3 1.59GB/s ± 1% 1.58GB/s ± 1% -1.06% (p=0.000 n=18+18) BM_UFlat/4 15.8GB/s ± 6% 15.6GB/s ± 7% ~ (p=0.184 n=19+20) BM_UFlat/5 2.46GB/s ± 1% 2.44GB/s ± 1% -0.95% (p=0.000 n=19+18) BM_UFlat/6 1.08GB/s ± 1% 1.06GB/s ± 1% -1.17% (p=0.000 n=19+18) BM_UFlat/7 1.00GB/s ± 1% 0.99GB/s ± 1% -1.16% (p=0.000 n=19+18) BM_UFlat/8 1.14GB/s ± 2% 1.12GB/s ± 1% -1.12% (p=0.000 n=19+18) BM_UFlat/9 921MB/s ± 1% 914MB/s ± 1% -0.84% (p=0.000 n=20+17) BM_UFlat/10 3.94GB/s ± 2% 3.92GB/s ± 1% ~ (p=0.058 n=19+17) BM_UFlat/11 1.29GB/s ± 1% 1.28GB/s ± 1% -0.77% (p=0.001 n=19+17) BM_UFlat/12 2.34GB/s ± 1% 2.31GB/s ± 1% -1.10% (p=0.000 n=18+18) BM_UFlat/13 2.06GB/s ± 1% 2.05GB/s ± 1% -0.73% (p=0.001 n=19+18) BM_UFlat/14 2.22GB/s ± 1% 2.20GB/s ± 1% -0.73% (p=0.000 n=18+18) BM_UFlat/15 1.55GB/s ± 1% 1.53GB/s ± 1% -1.07% (p=0.000 n=19+18) BM_UFlat/16 1.26GB/s ± 1% 1.25GB/s ± 1% -0.79% (p=0.000 n=18+18) BM_UFlat/17 2.31GB/s ± 1% 2.29GB/s ± 1% -0.98% (p=0.000 n=20+18) BM_UFlat/18 1.61GB/s ± 1% 1.60GB/s ± 2% -0.71% (p=0.001 n=20+19) BM_UFlat/19 1.77GB/s ± 1% 1.76GB/s ± 1% -0.61% (p=0.007 n=19+18) BM_UFlat/20 1.89GB/s ± 1% 1.88GB/s ± 1% -0.75% (p=0.000 n=20+18)
This commit is contained in:
parent
d3c6d20d0a
commit
3c706d2230
|
@ -89,7 +89,9 @@ size_t MaxCompressedLength(size_t source_len) {
|
|||
namespace {
|
||||
|
||||
void UnalignedCopy64(const void* src, void* dst) {
|
||||
memcpy(dst, src, 8);
|
||||
char tmp[8];
|
||||
memcpy(tmp, src, 8);
|
||||
memcpy(dst, tmp, 8);
|
||||
}
|
||||
|
||||
void UnalignedCopy128(const void* src, void* dst) {
|
||||
|
@ -99,7 +101,9 @@ void UnalignedCopy128(const void* src, void* dst) {
|
|||
__m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
|
||||
_mm_storeu_si128(static_cast<__m128i*>(dst), x);
|
||||
#else
|
||||
memcpy(dst, src, 16);
|
||||
char tmp[16];
|
||||
memcpy(tmp, src, 16);
|
||||
memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue