From fa341e51cb7f6bce69a7577f4000381a03f61c70 Mon Sep 17 00:00:00 2001 From: Leo Koppel Date: Tue, 31 Oct 2017 14:00:39 -0400 Subject: [PATCH] Improve BM_SetInsert example (#465) * Fix BM_SetInsert example Move declaration of `std::set data` outside the timing loop, so that the destructor is not timed. * Speed up BM_SetInsert test Since the time taken to ConstructRandomSet() is so large compared to the time to insert one element, but only the latter is used to determine number of iterations, this benchmark now takes an extremely long time to run in benchmark_test. Speed it up two ways: - Increase the Ranges() parameters - Cache ConstructRandomSet() result (it's not random anyway), and do only O(N) copy every iteration * Fix same issue in BM_MapLookup test * Make BM_SetInsert test consistent with README - Use the same Ranges everywhere, but increase the 2nd range - Change order of Args() calls in README to more closely match the result of Ranges - Don't cache ConstructRandomSet, since it doesn't make sense in README - Get a smaller optimization inside it, by givint a hint to insert() --- README.md | 17 +++++++++-------- include/benchmark/benchmark.h | 17 +++++++++-------- test/benchmark_test.cc | 10 +++++++--- test/map_test.cc | 3 ++- 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 09ab18c6..6eb63f20 100644 --- a/README.md +++ b/README.md @@ -84,22 +84,23 @@ insertion. ```c++ static void BM_SetInsert(benchmark::State& state) { + std::set data; for (auto _ : state) { state.PauseTiming(); - std::set data = ConstructRandomSet(state.range(0)); + data = ConstructRandomSet(state.range(0)); state.ResumeTiming(); for (int j = 0; j < state.range(1); ++j) data.insert(RandomNumber()); } } BENCHMARK(BM_SetInsert) - ->Args({1<<10, 1}) - ->Args({1<<10, 8}) - ->Args({1<<10, 64}) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) ->Args({1<<10, 512}) - ->Args({8<<10, 1}) - ->Args({8<<10, 8}) - ->Args({8<<10, 64}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) ->Args({8<<10, 512}); ``` @@ -109,7 +110,7 @@ product of the two specified ranges and will generate a benchmark for each such pair. ```c++ -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}}); +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` For more complex patterns of inputs, passing a custom function to `Apply` allows diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 357e2424..d529e4bf 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -72,29 +72,30 @@ BENCHMARK(BM_memcpy)->Range(8, 8<<10); // example, the following code defines a family of microbenchmarks for // measuring the speed of set insertion. static void BM_SetInsert(benchmark::State& state) { + set data; for (auto _ : state) { state.PauseTiming(); - set data = ConstructRandomSet(state.range(0)); + data = ConstructRandomSet(state.range(0)); state.ResumeTiming(); for (int j = 0; j < state.range(1); ++j) data.insert(RandomNumber()); } } BENCHMARK(BM_SetInsert) - ->Args({1<<10, 1}) - ->Args({1<<10, 8}) - ->Args({1<<10, 64}) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) ->Args({1<<10, 512}) - ->Args({8<<10, 1}) - ->Args({8<<10, 8}) - ->Args({8<<10, 64}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) ->Args({8<<10, 512}); // The preceding code is quite repetitive, and can be replaced with // the following short-hand. The following macro will pick a few // appropriate arguments in the product of the two specified ranges // and will generate a microbenchmark for each such pair. -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}}); +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); // For more complex patterns of inputs, passing a custom function // to Apply allows programmatic specification of an diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 39590816..d4326012 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -42,7 +42,7 @@ double CalculatePi(int depth) { std::set ConstructRandomSet(int size) { std::set s; - for (int i = 0; i < size; ++i) s.insert(i); + for (int i = 0; i < size; ++i) s.insert(s.end(), i); return s; } @@ -82,16 +82,20 @@ BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); static void BM_SetInsert(benchmark::State& state) { + std::set data; for (auto _ : state) { state.PauseTiming(); - std::set data = ConstructRandomSet(state.range(0)); + data = ConstructRandomSet(state.range(0)); state.ResumeTiming(); for (int j = 0; j < state.range(1); ++j) data.insert(rand()); } state.SetItemsProcessed(state.iterations() * state.range(1)); state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); } -BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {1, 10}}); + +// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, +// non-timed part of each iteration will make the benchmark take forever. +BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); template diff --git a/test/map_test.cc b/test/map_test.cc index e7a26de1..0f8238d9 100644 --- a/test/map_test.cc +++ b/test/map_test.cc @@ -18,9 +18,10 @@ std::map ConstructRandomMap(int size) { // Basic version. static void BM_MapLookup(benchmark::State& state) { const int size = state.range(0); + std::map m; for (auto _ : state) { state.PauseTiming(); - std::map m = ConstructRandomMap(size); + m = ConstructRandomMap(size); state.ResumeTiming(); for (int i = 0; i < size; ++i) { benchmark::DoNotOptimize(m.find(rand() % size));