2015-03-12 22:03:33 +00:00
|
|
|
|
2017-07-04 22:31:47 +00:00
|
|
|
#include "benchmark/benchmark.h"
|
2015-03-12 22:03:33 +00:00
|
|
|
|
2016-10-07 18:04:50 +00:00
|
|
|
#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
|
2015-03-12 22:03:33 +00:00
|
|
|
|
|
|
|
void BM_empty(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(state.iterations());
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_empty);
|
|
|
|
BENCHMARK(BM_empty)->ThreadPerCpu();
|
|
|
|
|
|
|
|
void BM_spin_empty(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int x = 0; x < state.range(0); ++x) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(x);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_empty);
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();
|
|
|
|
|
|
|
|
void BM_spin_pause_before(benchmark::State& state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before);
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
|
|
|
|
|
|
|
|
void BM_spin_pause_during(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2015-03-12 22:03:33 +00:00
|
|
|
state.PauseTiming();
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
state.ResumeTiming();
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_during);
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
|
|
|
|
|
2015-03-26 18:26:07 +00:00
|
|
|
void BM_pause_during(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2015-03-26 18:26:07 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
state.ResumeTiming();
|
|
|
|
}
|
|
|
|
}
|
2015-03-26 18:56:52 +00:00
|
|
|
BENCHMARK(BM_pause_during);
|
|
|
|
BENCHMARK(BM_pause_during)->ThreadPerCpu();
|
2015-03-27 20:37:53 +00:00
|
|
|
BENCHMARK(BM_pause_during)->UseRealTime();
|
|
|
|
BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
|
2015-03-26 18:26:07 +00:00
|
|
|
|
2015-03-12 22:03:33 +00:00
|
|
|
void BM_spin_pause_after(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_after);
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
|
|
|
|
|
|
|
|
void BM_spin_pause_before_and_after(benchmark::State& state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-04 19:30:14 +00:00
|
|
|
for (int i = 0; i < state.range(0); ++i) {
|
2015-03-27 20:35:46 +00:00
|
|
|
benchmark::DoNotOptimize(i);
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
|
|
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
|
|
|
|
|
|
|
|
void BM_empty_stop_start(benchmark::State& state) {
|
2017-10-17 18:17:02 +00:00
|
|
|
for (auto _ : state) {
|
2016-10-07 18:04:50 +00:00
|
|
|
}
|
2015-03-12 22:03:33 +00:00
|
|
|
}
|
|
|
|
BENCHMARK(BM_empty_stop_start);
|
|
|
|
BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
|
|
|
|
|
2017-10-10 15:56:42 +00:00
|
|
|
|
|
|
|
void BM_KeepRunning(benchmark::State& state) {
|
Iteration counts should be `uint64_t` globally. (#817)
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
https://github.com/google/benchmark/blob/7a1c37028359ca9d386d719a6ad527743cf1b753/include/benchmark/benchmark.h#L744
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
2019-05-13 09:33:11 +00:00
|
|
|
benchmark::IterationCount iter_count = 0;
|
2018-05-24 09:33:19 +00:00
|
|
|
assert(iter_count == state.iterations());
|
2017-10-10 15:56:42 +00:00
|
|
|
while (state.KeepRunning()) {
|
|
|
|
++iter_count;
|
|
|
|
}
|
2018-02-10 04:57:04 +00:00
|
|
|
assert(iter_count == state.iterations());
|
2017-10-10 15:56:42 +00:00
|
|
|
}
|
|
|
|
BENCHMARK(BM_KeepRunning);
|
|
|
|
|
2018-02-10 04:57:04 +00:00
|
|
|
void BM_KeepRunningBatch(benchmark::State& state) {
|
|
|
|
// Choose a prime batch size to avoid evenly dividing max_iterations.
|
Iteration counts should be `uint64_t` globally. (#817)
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
https://github.com/google/benchmark/blob/7a1c37028359ca9d386d719a6ad527743cf1b753/include/benchmark/benchmark.h#L744
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
2019-05-13 09:33:11 +00:00
|
|
|
const benchmark::IterationCount batch_size = 101;
|
|
|
|
benchmark::IterationCount iter_count = 0;
|
2018-02-10 04:57:04 +00:00
|
|
|
while (state.KeepRunningBatch(batch_size)) {
|
|
|
|
iter_count += batch_size;
|
|
|
|
}
|
|
|
|
assert(state.iterations() == iter_count);
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_KeepRunningBatch);
|
|
|
|
|
2017-10-10 15:56:42 +00:00
|
|
|
void BM_RangedFor(benchmark::State& state) {
|
Iteration counts should be `uint64_t` globally. (#817)
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
https://github.com/google/benchmark/blob/7a1c37028359ca9d386d719a6ad527743cf1b753/include/benchmark/benchmark.h#L744
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
2019-05-13 09:33:11 +00:00
|
|
|
benchmark::IterationCount iter_count = 0;
|
2017-10-10 15:56:42 +00:00
|
|
|
for (auto _ : state) {
|
|
|
|
++iter_count;
|
|
|
|
}
|
|
|
|
assert(iter_count == state.max_iterations);
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_RangedFor);
|
|
|
|
|
2018-02-21 07:54:19 +00:00
|
|
|
// Ensure that StateIterator provides all the necessary typedefs required to
|
|
|
|
// instantiate std::iterator_traits.
|
|
|
|
static_assert(std::is_same<
|
|
|
|
typename std::iterator_traits<benchmark::State::StateIterator>::value_type,
|
|
|
|
typename benchmark::State::StateIterator::value_type>::value, "");
|
|
|
|
|
2017-12-04 01:45:07 +00:00
|
|
|
BENCHMARK_MAIN();
|