diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index ed55c88a..a99ae9bc 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -113,6 +113,14 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); +Use `Benchmark::MinTime(double t)` to set the minimum time used to run the +benchmark. This option overrides the `benchmark_min_time` flag. + +void BM_test(benchmark::State& state) { + ... body ... +} +BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. + In a multithreaded test, it is guaranteed that none of the threads will start until all have called KeepRunning, and all will have finished before KeepRunning returns false. As such, any global setup or teardown you want to do can be @@ -247,14 +255,6 @@ public: // within each benchmark iteration, if possible. void ResumeTiming(); - // If a particular benchmark is I/O bound, or if for some reason CPU - // timings are not representative, call this method from within the - // benchmark routine. If called, the elapsed time will be used to - // control how many iterations are run, and in the printing of - // items/second or MB/seconds values. If not called, the cpu time - // used by the benchmark will be used. - void UseRealTime(); - // Set the number of bytes processed by the current benchmark // execution. This routine is typically called once at the end of a // throughput oriented benchmark. If this routine is called with a @@ -401,6 +401,17 @@ class Benchmark { // Threads, etc. Benchmark* Apply(void (*func)(Benchmark* benchmark)); + // Set the minimum amount of time to use when running this benchmark. This + // option overrides the `benchmark_min_time` flag. + Benchmark* MinTime(double t); + + // If a particular benchmark is I/O bound, or if for some reason CPU + // timings are not representative, call this method. If called, the elapsed + // time will be used to control how many iterations are run, and in the + // printing of items/second or MB/seconds values. If not called, the cpu + // time used by the benchmark will be used. + Benchmark* UseRealTime(); + // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling // of some piece of code. diff --git a/src/benchmark.cc b/src/benchmark.cc index 9643a2ab..01ef5595 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -43,10 +43,6 @@ DEFINE_string(benchmark_filter, ".", "If this flag is the string \"all\", all benchmarks linked " "into the process are run."); -DEFINE_int32(benchmark_iterations, 0, - "Total number of iterations per benchmark. 0 means the benchmarks " - "are time-based."); - DEFINE_double(benchmark_min_time, 0.5, "Minimum number of seconds we should run benchmark before " "results are considered significant. For cpu-time based " @@ -92,6 +88,10 @@ GetBenchmarkLock() namespace { +bool IsZero(double n) { + return std::abs(n) < std::numeric_limits::epsilon(); +} + // For non-dense Range, intermediate values are powers of kRangeMultiplier. static const int kRangeMultiplier = 8; static const int kMaxIterations = 1000000000; @@ -104,10 +104,6 @@ std::string* GetReportLabel() { return &label; } -// Should this benchmark base decisions off of real time rather than -// cpu time? -bool use_real_time GUARDED_BY(GetBenchmarkLock()); - // TODO(ericwf): support MallocCounter. //static benchmark::MallocCounter *benchmark_mc; @@ -257,6 +253,8 @@ struct Benchmark::Instance { int arg1; bool has_arg2; int arg2; + bool use_real_time; + double min_time; int threads; // Number of concurrent threads to use bool multithreaded; // Is benchmark multi-threaded? }; @@ -296,6 +294,8 @@ public: void DenseRange(int start, int limit); void ArgPair(int start, int limit); void RangePair(int lo1, int hi1, int lo2, int hi2); + void MinTime(double n); + void UseRealTime(); void Threads(int t); void ThreadRange(int min_threads, int max_threads); void ThreadPerCpu(); @@ -309,6 +309,8 @@ private: Function* function_; int arg_count_; std::vector< std::pair > args_; // Args for all benchmark runs + double min_time_; + bool use_real_time_; std::vector thread_counts_; std::size_t registration_index_; @@ -388,6 +390,8 @@ bool BenchmarkFamilies::FindBenchmarks( instance.arg1 = args.first; instance.has_arg2 = family->arg_count_ == 2; instance.arg2 = args.second; + instance.min_time = family->min_time_; + instance.use_real_time = family->use_real_time_; instance.threads = num_threads; instance.multithreaded = !(family->thread_counts_.empty()); @@ -398,6 +402,12 @@ bool BenchmarkFamilies::FindBenchmarks( if (family->arg_count_ >= 2) { AppendHumanReadable(instance.arg2, &instance.name); } + if (!IsZero(family->min_time_)) { + instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); + } + if (family->use_real_time_) { + instance.name += "/real_time"; + } // Add the number of threads used to the name if (!family->thread_counts_.empty()) { @@ -412,7 +422,8 @@ bool BenchmarkFamilies::FindBenchmarks( } BenchmarkImp::BenchmarkImp(const char* name, Function* func) - : name_(name), function_(func), arg_count_(-1) { + : name_(name), function_(func), arg_count_(-1), + min_time_(0.0), use_real_time_(false) { registration_index_ = BenchmarkFamilies::GetInstance()->AddBenchmark(this); } @@ -467,6 +478,15 @@ void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) { } } +void BenchmarkImp::MinTime(double t) { + CHECK(t > 0.0); + min_time_ = t; +} + +void BenchmarkImp::UseRealTime() { + use_real_time_ = true; +} + void BenchmarkImp::Threads(int t) { CHECK_GT(t, 0); thread_counts_.push_back(t); @@ -545,6 +565,16 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { return this; } +Benchmark* Benchmark::MinTime(double t) { + imp_->MinTime(t); + return this; +} + +Benchmark* Benchmark::UseRealTime() { + imp_->UseRealTime(); + return this; +} + Benchmark* Benchmark::Threads(int t) { imp_->Threads(t); return this; @@ -585,8 +615,8 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) { - int iters = FLAGS_benchmark_iterations ? FLAGS_benchmark_iterations - : 1; + int iters = 1; + std::vector reports; std::vector pool; @@ -602,7 +632,6 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, { MutexLock l(GetBenchmarkLock()); GetReportLabel()->clear(); - use_real_time = false; } Notification done; @@ -637,22 +666,25 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Base decisions off of real time if requested by this benchmark. double seconds = cpu_accumulated_time; + if (b.use_real_time) { + seconds = real_accumulated_time; + } + std::string label; { MutexLock l(GetBenchmarkLock()); label = *GetReportLabel(); - if (use_real_time) { - seconds = real_accumulated_time; - } } + const double min_time = !IsZero(b.min_time) ? b.min_time + : FLAGS_benchmark_min_time; + // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. if ((i > 0) || - (iters == FLAGS_benchmark_iterations) || (iters >= kMaxIterations) || - (seconds >= FLAGS_benchmark_min_time) || - (real_accumulated_time >= 5*FLAGS_benchmark_min_time)) { + (seconds >= min_time) || + (real_accumulated_time >= 5*min_time)) { double bytes_per_second = 0; if (total.bytes_processed > 0 && seconds > 0.0) { bytes_per_second = (total.bytes_processed / seconds); @@ -678,13 +710,13 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // See how much iterations should be increased by // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = FLAGS_benchmark_min_time * 1.4 / std::max(seconds, 1e-9); + double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. Otherwise we use at most 10 times // expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (seconds / FLAGS_benchmark_min_time) > 0.1; + bool is_significant = (seconds / min_time) > 0.1; multiplier = is_significant ? multiplier : std::min(10.0, multiplier); if (multiplier <= 1.0) multiplier = 2.0; double next_iters = std::max(multiplier * iters, iters + 1.0); @@ -727,11 +759,6 @@ void State::ResumeTiming() { timer_manager->StartTimer(); } -void State::UseRealTime() { - MutexLock l(GetBenchmarkLock()); - use_real_time = true; -} - void State::SetLabel(const char* label) { CHECK(running_benchmark); MutexLock l(GetBenchmarkLock()); @@ -749,26 +776,14 @@ void RunMatchingBenchmarks(const std::string& spec, auto families = benchmark::internal::BenchmarkFamilies::GetInstance(); if (!families->FindBenchmarks(spec, &benchmarks)) return; - // Determine the width of the name field using a minimum width of 10. - // Also determine max number of threads needed. size_t name_field_width = 10; for (const internal::Benchmark::Instance& benchmark : benchmarks) { - // Add width for _stddev and threads:XX - if (benchmark.threads > 1 && FLAGS_benchmark_repetitions > 1) { - name_field_width = - std::max(name_field_width, benchmark.name.size() + 17); - } else if (benchmark.threads > 1) { - name_field_width = - std::max(name_field_width, benchmark.name.size() + 10); - } else if (FLAGS_benchmark_repetitions > 1) { - name_field_width = - std::max(name_field_width, benchmark.name.size() + 7); - } else { - name_field_width = - std::max(name_field_width, benchmark.name.size()); - } + name_field_width = + std::max(name_field_width, benchmark.name.size()); } + if (FLAGS_benchmark_repetitions > 1) + name_field_width += std::strlen("_stddev"); // Print header here BenchmarkReporter::Context context; @@ -824,7 +839,6 @@ void PrintUsageAndExit() { fprintf(stdout, "benchmark" " [--benchmark_filter=]\n" - " [--benchmark_iterations=]\n" " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_format=]\n" @@ -839,8 +853,6 @@ void ParseCommandLineFlags(int* argc, const char** argv) { if ( ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseInt32Flag(argv[i], "benchmark_iterations", - &FLAGS_benchmark_iterations) || ParseDoubleFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8fdb6f3e..4c217c12 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,8 +25,11 @@ add_test(filter_regex_wildcard filter_test --benchmark_filter=.*Calculate.* 16) add_test(filter_regex_begin filter_test --benchmark_filter=^BM_Calculate.* 16) add_test(filter_regex_end filter_test --benchmark_filter=.*Pi$ 8) +compile_benchmark_test(options_test) +add_test(options_benchmarks options_test) + compile_benchmark_test(basic_test) -add_test(basic basic_test) +add_test(basic_benchmark basic_test) compile_benchmark_test(cxx03_test) set_target_properties(cxx03_test diff --git a/test/basic_test.cc b/test/basic_test.cc index f7b45be7..e3d114ad 100644 --- a/test/basic_test.cc +++ b/test/basic_test.cc @@ -61,16 +61,8 @@ void BM_pause_during(benchmark::State& state) { } BENCHMARK(BM_pause_during); BENCHMARK(BM_pause_during)->ThreadPerCpu(); - -void BM_pause_during_realtime(benchmark::State& state) { - state.UseRealTime(); - while(state.KeepRunning()) { - state.PauseTiming(); - state.ResumeTiming(); - } -} -BENCHMARK(BM_pause_during_realtime); -BENCHMARK(BM_pause_during_realtime)->ThreadPerCpu(); +BENCHMARK(BM_pause_during)->UseRealTime(); +BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { while(state.KeepRunning()) { diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 2fb33558..a23f82f3 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -58,19 +58,7 @@ static void BM_Factorial(benchmark::State& state) { state.SetLabel(ss.str()); } BENCHMARK(BM_Factorial); - -static void BM_FactorialRealTime(benchmark::State& state) { - state.UseRealTime(); - - int fac_42 = 0; - while (state.KeepRunning()) - fac_42 = Factorial(8); - // Prevent compiler optimizations - std::stringstream ss; - ss << fac_42; - state.SetLabel(ss.str()); -} -BENCHMARK(BM_FactorialRealTime); +BENCHMARK(BM_Factorial)->UseRealTime(); static void BM_CalculatePiRange(benchmark::State& state) { double pi = 0.0; diff --git a/test/options_test.cc b/test/options_test.cc new file mode 100644 index 00000000..ec8c2a16 --- /dev/null +++ b/test/options_test.cc @@ -0,0 +1,18 @@ +#include "benchmark/benchmark_api.h" + +void BM_basic(benchmark::State& state) { + while (state.KeepRunning()) { + } +} +BENCHMARK(BM_basic); +BENCHMARK(BM_basic)->Arg(42); +BENCHMARK(BM_basic)->Range(1, 8); +BENCHMARK(BM_basic)->DenseRange(10, 15); +BENCHMARK(BM_basic)->ArgPair(42, 42); +BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512); +BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->UseRealTime(); +BENCHMARK(BM_basic)->ThreadRange(2, 4); +BENCHMARK(BM_basic)->ThreadPerCpu(); + +BENCHMARK_MAIN()