mirror of https://github.com/google/snappy.git
Change a few branch annotations that profiling found to be wrong.
Overall performance is neutral or slightly positive. Westmere (64-bit, opt): Benchmark Base (ns) New (ns) Improvement -------------------------------------------------------------------------------------- BM_UFlat/0 73798 71464 1.3GB/s html +3.3% BM_UFlat/1 715223 704318 953.5MB/s urls +1.5% BM_UFlat/2 8137 8871 13.0GB/s jpg -8.3% BM_UFlat/3 200 204 935.5MB/s jpg_200 -2.0% BM_UFlat/4 21627 21281 4.5GB/s pdf +1.6% BM_UFlat/5 302806 290350 1.3GB/s html4 +4.3% BM_UFlat/6 218920 219017 664.1MB/s txt1 -0.0% BM_UFlat/7 190437 191212 626.1MB/s txt2 -0.4% BM_UFlat/8 584192 580484 703.4MB/s txt3 +0.6% BM_UFlat/9 776537 779055 591.6MB/s txt4 -0.3% BM_UFlat/10 76056 72606 1.5GB/s pb +4.8% BM_UFlat/11 235962 239043 737.4MB/s gaviota -1.3% BM_UFlat/12 28049 28000 840.1MB/s cp +0.2% BM_UFlat/13 12225 12021 886.9MB/s c +1.7% BM_UFlat/14 3362 3544 1004.0MB/s lsp -5.1% BM_UFlat/15 937015 939206 1048.9MB/s xls -0.2% BM_UFlat/16 236 233 823.1MB/s xls_200 +1.3% BM_UFlat/17 373170 361947 1.3GB/s bin +3.1% BM_UFlat/18 264 264 725.5MB/s bin_200 +0.0% BM_UFlat/19 42834 43577 839.2MB/s sum -1.7% BM_UFlat/20 4770 4736 853.6MB/s man +0.7% BM_UValidate/0 39671 39944 2.4GB/s html -0.7% BM_UValidate/1 443391 443391 1.5GB/s urls +0.0% BM_UValidate/2 163 163 703.3GB/s jpg +0.0% BM_UValidate/3 113 112 1.7GB/s jpg_200 +0.9% BM_UValidate/4 7555 7608 12.6GB/s pdf -0.7% BM_ZFlat/0 157616 157568 621.5MB/s html (22.31 %) +0.0% BM_ZFlat/1 1997290 2014486 333.4MB/s urls (47.77 %) -0.9% BM_ZFlat/2 23035 22237 5.2GB/s jpg (99.95 %) +3.6% BM_ZFlat/3 539 540 354.5MB/s jpg_200 (73.00 %) -0.2% BM_ZFlat/4 80709 81369 1.2GB/s pdf (81.85 %) -0.8% BM_ZFlat/5 639059 639220 613.0MB/s html4 (22.51 %) -0.0% BM_ZFlat/6 577203 583370 249.3MB/s txt1 (57.87 %) -1.1% BM_ZFlat/7 510887 516094 232.0MB/s txt2 (61.93 %) -1.0% BM_ZFlat/8 1535843 1556973 262.2MB/s txt3 (54.92 %) -1.4% BM_ZFlat/9 2070068 2102380 219.3MB/s txt4 (66.22 %) -1.5% BM_ZFlat/10 152396 152148 745.5MB/s pb (19.64 %) +0.2% BM_ZFlat/11 447367 445859 395.4MB/s gaviota (37.72 %) +0.3% BM_ZFlat/12 76375 76797 306.3MB/s cp (48.12 %) -0.5% BM_ZFlat/13 31518 31987 333.3MB/s c (42.40 %) -1.5% BM_ZFlat/14 10598 10827 328.6MB/s lsp (48.37 %) -2.1% BM_ZFlat/15 1782243 1802728 546.5MB/s xls (41.23 %) -1.1% BM_ZFlat/16 526 539 355.0MB/s xls_200 (78.00 %) -2.4% BM_ZFlat/17 598141 597311 822.1MB/s bin (18.11 %) +0.1% BM_ZFlat/18 121 120 1.6GB/s bin_200 (7.50 %) +0.8% BM_ZFlat/19 109981 112173 326.0MB/s sum (48.96 %) -2.0% BM_ZFlat/20 14355 14575 277.4MB/s man (59.36 %) -1.5% Sum of all benchmarks 33882722 33879325 +0.0% Sandy Bridge (64-bit, opt): Benchmark Base (ns) New (ns) Improvement -------------------------------------------------------------------------------------- BM_UFlat/0 43764 41600 2.3GB/s html +5.2% BM_UFlat/1 517990 507058 1.3GB/s urls +2.2% BM_UFlat/2 6625 5529 20.8GB/s jpg +19.8% BM_UFlat/3 154 155 1.2GB/s jpg_200 -0.6% BM_UFlat/4 12795 11747 8.1GB/s pdf +8.9% BM_UFlat/5 200335 193413 2.0GB/s html4 +3.6% BM_UFlat/6 156574 156426 929.2MB/s txt1 +0.1% BM_UFlat/7 137574 137464 870.4MB/s txt2 +0.1% BM_UFlat/8 422551 421603 967.4MB/s txt3 +0.2% BM_UFlat/9 577749 578985 795.6MB/s txt4 -0.2% BM_UFlat/10 42329 39362 2.8GB/s pb +7.5% BM_UFlat/11 170615 169751 1037.9MB/s gaviota +0.5% BM_UFlat/12 12800 12719 1.8GB/s cp +0.6% BM_UFlat/13 6585 6579 1.6GB/s c +0.1% BM_UFlat/14 2066 2044 1.7GB/s lsp +1.1% BM_UFlat/15 750861 746911 1.3GB/s xls +0.5% BM_UFlat/16 188 192 996.0MB/s xls_200 -2.1% BM_UFlat/17 271622 264333 1.8GB/s bin +2.8% BM_UFlat/18 208 207 923.6MB/s bin_200 +0.5% BM_UFlat/19 24667 24845 1.4GB/s sum -0.7% BM_UFlat/20 2663 2662 1.5GB/s man +0.0% BM_ZFlat/0 115173 115624 846.5MB/s html (22.31 %) -0.4% BM_ZFlat/1 1530331 1537769 436.5MB/s urls (47.77 %) -0.5% BM_ZFlat/2 17503 17013 6.8GB/s jpg (99.95 %) +2.9% BM_ZFlat/3 385 385 496.3MB/s jpg_200 (73.00 %) +0.0% BM_ZFlat/4 61753 61540 1.6GB/s pdf (81.85 %) +0.3% BM_ZFlat/5 484806 483356 810.1MB/s html4 (22.51 %) +0.3% BM_ZFlat/6 464143 467609 310.9MB/s txt1 (57.87 %) -0.7% BM_ZFlat/7 410315 413319 289.5MB/s txt2 (61.93 %) -0.7% BM_ZFlat/8 1244082 1249381 326.5MB/s txt3 (54.92 %) -0.4% BM_ZFlat/9 1696914 1709685 269.4MB/s txt4 (66.22 %) -0.7% BM_ZFlat/10 104148 103372 1096.7MB/s pb (19.64 %) +0.8% BM_ZFlat/11 363522 359722 489.8MB/s gaviota (37.72 %) +1.1% BM_ZFlat/12 47021 50095 469.3MB/s cp (48.12 %) -6.1% BM_ZFlat/13 16888 16985 627.4MB/s c (42.40 %) -0.6% BM_ZFlat/14 5496 5469 650.3MB/s lsp (48.37 %) +0.5% BM_ZFlat/15 1460713 1448760 679.5MB/s xls (41.23 %) +0.8% BM_ZFlat/16 387 393 486.8MB/s xls_200 (78.00 %) -1.5% BM_ZFlat/17 457654 451462 1086.6MB/s bin (18.11 %) +1.4% BM_ZFlat/18 97 87 2.1GB/s bin_200 (7.50 %) +11.5% BM_ZFlat/19 77904 80924 451.7MB/s sum (48.96 %) -3.7% BM_ZFlat/20 7648 7663 527.1MB/s man (59.36 %) -0.2% Sum of all benchmarks 25493635 25482069 +0.0% A=dehao R=sesse
This commit is contained in:
parent
11ccdfb868
commit
86eb8b152b
|
@ -93,7 +93,7 @@ static inline int FindMatchLength(const char* s1,
|
||||||
// the first non-matching bit and use that to calculate the total
|
// the first non-matching bit and use that to calculate the total
|
||||||
// length of the match.
|
// length of the match.
|
||||||
while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
||||||
if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
|
if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
|
||||||
s2 += 8;
|
s2 += 8;
|
||||||
matched += 8;
|
matched += 8;
|
||||||
} else {
|
} else {
|
||||||
|
@ -108,7 +108,7 @@ static inline int FindMatchLength(const char* s1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (PREDICT_TRUE(s2 < s2_limit)) {
|
while (PREDICT_TRUE(s2 < s2_limit)) {
|
||||||
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
if (s1[matched] == *s2) {
|
||||||
++s2;
|
++s2;
|
||||||
++matched;
|
++matched;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -138,7 +138,7 @@ namespace {
|
||||||
const int kMaxIncrementCopyOverflow = 10;
|
const int kMaxIncrementCopyOverflow = 10;
|
||||||
|
|
||||||
inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
|
inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
|
||||||
while (op - src < 8) {
|
while (PREDICT_FALSE(op - src < 8)) {
|
||||||
UnalignedCopy64(src, op);
|
UnalignedCopy64(src, op);
|
||||||
len -= op - src;
|
len -= op - src;
|
||||||
op += op - src;
|
op += op - src;
|
||||||
|
@ -215,7 +215,7 @@ static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
|
||||||
|
|
||||||
static inline char* EmitCopy(char* op, size_t offset, int len) {
|
static inline char* EmitCopy(char* op, size_t offset, int len) {
|
||||||
// Emit 64 byte copies but make sure to keep at least four bytes reserved
|
// Emit 64 byte copies but make sure to keep at least four bytes reserved
|
||||||
while (len >= 68) {
|
while (PREDICT_FALSE(len >= 68)) {
|
||||||
op = EmitCopyLessThan64(op, offset, 64);
|
op = EmitCopyLessThan64(op, offset, 64);
|
||||||
len -= 64;
|
len -= 64;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue