Change a few branch annotations that profiling found to be wrong.

Overall performance is neutral or slightly positive.

Westmere (64-bit, opt):

Benchmark               Base (ns)  New (ns)                                Improvement
--------------------------------------------------------------------------------------
BM_UFlat/0                  73798     71464  1.3GB/s  html                    +3.3%
BM_UFlat/1                 715223    704318  953.5MB/s  urls                  +1.5%
BM_UFlat/2                   8137      8871  13.0GB/s  jpg                    -8.3%
BM_UFlat/3                    200       204  935.5MB/s  jpg_200               -2.0%
BM_UFlat/4                  21627     21281  4.5GB/s  pdf                     +1.6%
BM_UFlat/5                 302806    290350  1.3GB/s  html4                   +4.3%
BM_UFlat/6                 218920    219017  664.1MB/s  txt1                  -0.0%
BM_UFlat/7                 190437    191212  626.1MB/s  txt2                  -0.4%
BM_UFlat/8                 584192    580484  703.4MB/s  txt3                  +0.6%
BM_UFlat/9                 776537    779055  591.6MB/s  txt4                  -0.3%
BM_UFlat/10                 76056     72606  1.5GB/s  pb                      +4.8%
BM_UFlat/11                235962    239043  737.4MB/s  gaviota               -1.3%
BM_UFlat/12                 28049     28000  840.1MB/s  cp                    +0.2%
BM_UFlat/13                 12225     12021  886.9MB/s  c                     +1.7%
BM_UFlat/14                  3362      3544  1004.0MB/s  lsp                  -5.1%
BM_UFlat/15                937015    939206  1048.9MB/s  xls                  -0.2%
BM_UFlat/16                   236       233  823.1MB/s  xls_200               +1.3%
BM_UFlat/17                373170    361947  1.3GB/s  bin                     +3.1%
BM_UFlat/18                   264       264  725.5MB/s  bin_200               +0.0%
BM_UFlat/19                 42834     43577  839.2MB/s  sum                   -1.7%
BM_UFlat/20                  4770      4736  853.6MB/s  man                   +0.7%
BM_UValidate/0              39671     39944  2.4GB/s  html                    -0.7%
BM_UValidate/1             443391    443391  1.5GB/s  urls                    +0.0%
BM_UValidate/2                163       163  703.3GB/s  jpg                   +0.0%
BM_UValidate/3                113       112  1.7GB/s  jpg_200                 +0.9%
BM_UValidate/4               7555      7608  12.6GB/s  pdf                    -0.7%
BM_ZFlat/0                 157616    157568  621.5MB/s  html (22.31 %)        +0.0%
BM_ZFlat/1                1997290   2014486  333.4MB/s  urls (47.77 %)        -0.9%
BM_ZFlat/2                  23035     22237  5.2GB/s  jpg (99.95 %)           +3.6%
BM_ZFlat/3                    539       540  354.5MB/s  jpg_200 (73.00 %)     -0.2%
BM_ZFlat/4                  80709     81369  1.2GB/s  pdf (81.85 %)           -0.8%
BM_ZFlat/5                 639059    639220  613.0MB/s  html4 (22.51 %)       -0.0%
BM_ZFlat/6                 577203    583370  249.3MB/s  txt1 (57.87 %)        -1.1%
BM_ZFlat/7                 510887    516094  232.0MB/s  txt2 (61.93 %)        -1.0%
BM_ZFlat/8                1535843   1556973  262.2MB/s  txt3 (54.92 %)        -1.4%
BM_ZFlat/9                2070068   2102380  219.3MB/s  txt4 (66.22 %)        -1.5%
BM_ZFlat/10                152396    152148  745.5MB/s  pb (19.64 %)          +0.2%
BM_ZFlat/11                447367    445859  395.4MB/s  gaviota (37.72 %)     +0.3%
BM_ZFlat/12                 76375     76797  306.3MB/s  cp (48.12 %)          -0.5%
BM_ZFlat/13                 31518     31987  333.3MB/s  c (42.40 %)           -1.5%
BM_ZFlat/14                 10598     10827  328.6MB/s  lsp (48.37 %)         -2.1%
BM_ZFlat/15               1782243   1802728  546.5MB/s  xls (41.23 %)         -1.1%
BM_ZFlat/16                   526       539  355.0MB/s  xls_200 (78.00 %)     -2.4%
BM_ZFlat/17                598141    597311  822.1MB/s  bin (18.11 %)         +0.1%
BM_ZFlat/18                   121       120  1.6GB/s  bin_200 (7.50 %)        +0.8%
BM_ZFlat/19                109981    112173  326.0MB/s  sum (48.96 %)         -2.0%
BM_ZFlat/20                 14355     14575  277.4MB/s  man (59.36 %)         -1.5%
Sum of all benchmarks    33882722  33879325                                   +0.0%

Sandy Bridge (64-bit, opt):

Benchmark               Base (ns)  New (ns)                                Improvement
--------------------------------------------------------------------------------------
BM_UFlat/0                  43764     41600  2.3GB/s  html                    +5.2%
BM_UFlat/1                 517990    507058  1.3GB/s  urls                    +2.2%
BM_UFlat/2                   6625      5529  20.8GB/s  jpg                   +19.8%
BM_UFlat/3                    154       155  1.2GB/s  jpg_200                 -0.6%
BM_UFlat/4                  12795     11747  8.1GB/s  pdf                     +8.9%
BM_UFlat/5                 200335    193413  2.0GB/s  html4                   +3.6%
BM_UFlat/6                 156574    156426  929.2MB/s  txt1                  +0.1%
BM_UFlat/7                 137574    137464  870.4MB/s  txt2                  +0.1%
BM_UFlat/8                 422551    421603  967.4MB/s  txt3                  +0.2%
BM_UFlat/9                 577749    578985  795.6MB/s  txt4                  -0.2%
BM_UFlat/10                 42329     39362  2.8GB/s  pb                      +7.5%
BM_UFlat/11                170615    169751  1037.9MB/s  gaviota              +0.5%
BM_UFlat/12                 12800     12719  1.8GB/s  cp                      +0.6%
BM_UFlat/13                  6585      6579  1.6GB/s  c                       +0.1%
BM_UFlat/14                  2066      2044  1.7GB/s  lsp                     +1.1%
BM_UFlat/15                750861    746911  1.3GB/s  xls                     +0.5%
BM_UFlat/16                   188       192  996.0MB/s  xls_200               -2.1%
BM_UFlat/17                271622    264333  1.8GB/s  bin                     +2.8%
BM_UFlat/18                   208       207  923.6MB/s  bin_200               +0.5%
BM_UFlat/19                 24667     24845  1.4GB/s  sum                     -0.7%
BM_UFlat/20                  2663      2662  1.5GB/s  man                     +0.0%
BM_ZFlat/0                 115173    115624  846.5MB/s  html (22.31 %)        -0.4%
BM_ZFlat/1                1530331   1537769  436.5MB/s  urls (47.77 %)        -0.5%
BM_ZFlat/2                  17503     17013  6.8GB/s  jpg (99.95 %)           +2.9%
BM_ZFlat/3                    385       385  496.3MB/s  jpg_200 (73.00 %)     +0.0%
BM_ZFlat/4                  61753     61540  1.6GB/s  pdf (81.85 %)           +0.3%
BM_ZFlat/5                 484806    483356  810.1MB/s  html4 (22.51 %)       +0.3%
BM_ZFlat/6                 464143    467609  310.9MB/s  txt1 (57.87 %)        -0.7%
BM_ZFlat/7                 410315    413319  289.5MB/s  txt2 (61.93 %)        -0.7%
BM_ZFlat/8                1244082   1249381  326.5MB/s  txt3 (54.92 %)        -0.4%
BM_ZFlat/9                1696914   1709685  269.4MB/s  txt4 (66.22 %)        -0.7%
BM_ZFlat/10                104148    103372  1096.7MB/s  pb (19.64 %)         +0.8%
BM_ZFlat/11                363522    359722  489.8MB/s  gaviota (37.72 %)     +1.1%
BM_ZFlat/12                 47021     50095  469.3MB/s  cp (48.12 %)          -6.1%
BM_ZFlat/13                 16888     16985  627.4MB/s  c (42.40 %)           -0.6%
BM_ZFlat/14                  5496      5469  650.3MB/s  lsp (48.37 %)         +0.5%
BM_ZFlat/15               1460713   1448760  679.5MB/s  xls (41.23 %)         +0.8%
BM_ZFlat/16                   387       393  486.8MB/s  xls_200 (78.00 %)     -1.5%
BM_ZFlat/17                457654    451462  1086.6MB/s  bin (18.11 %)        +1.4%
BM_ZFlat/18                    97        87  2.1GB/s  bin_200 (7.50 %)       +11.5%
BM_ZFlat/19                 77904     80924  451.7MB/s  sum (48.96 %)         -3.7%
BM_ZFlat/20                  7648      7663  527.1MB/s  man (59.36 %)         -0.2%
Sum of all benchmarks    25493635  25482069                                   +0.0%

A=dehao
R=sesse
This commit is contained in:
Steinar H. Gunderson 2015-06-22 15:41:30 +02:00
parent 11ccdfb868
commit 86eb8b152b
2 changed files with 4 additions and 4 deletions

View File

@ -93,7 +93,7 @@ static inline int FindMatchLength(const char* s1,
// the first non-matching bit and use that to calculate the total // the first non-matching bit and use that to calculate the total
// length of the match. // length of the match.
while (PREDICT_TRUE(s2 <= s2_limit - 8)) { while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
s2 += 8; s2 += 8;
matched += 8; matched += 8;
} else { } else {
@ -108,7 +108,7 @@ static inline int FindMatchLength(const char* s1,
} }
} }
while (PREDICT_TRUE(s2 < s2_limit)) { while (PREDICT_TRUE(s2 < s2_limit)) {
if (PREDICT_TRUE(s1[matched] == *s2)) { if (s1[matched] == *s2) {
++s2; ++s2;
++matched; ++matched;
} else { } else {

View File

@ -138,7 +138,7 @@ namespace {
const int kMaxIncrementCopyOverflow = 10; const int kMaxIncrementCopyOverflow = 10;
inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) { inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
while (op - src < 8) { while (PREDICT_FALSE(op - src < 8)) {
UnalignedCopy64(src, op); UnalignedCopy64(src, op);
len -= op - src; len -= op - src;
op += op - src; op += op - src;
@ -215,7 +215,7 @@ static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
static inline char* EmitCopy(char* op, size_t offset, int len) { static inline char* EmitCopy(char* op, size_t offset, int len) {
// Emit 64 byte copies but make sure to keep at least four bytes reserved // Emit 64 byte copies but make sure to keep at least four bytes reserved
while (len >= 68) { while (PREDICT_FALSE(len >= 68)) {
op = EmitCopyLessThan64(op, offset, 64); op = EmitCopyLessThan64(op, offset, 64);
len -= 64; len -= 64;
} }