From a94b0a67414991936ef8871fe0b8bf8541107989 Mon Sep 17 00:00:00 2001 From: Dominic Hamon Date: Thu, 19 Dec 2013 16:45:53 -0800 Subject: [PATCH 1/4] Remove duplicated macros header --- src/cycleclock.h | 6 ++- src/macros.h | 110 ----------------------------------------------- src/sysinfo.cc | 2 +- src/walltime.cc | 2 +- 4 files changed, 6 insertions(+), 114 deletions(-) delete mode 100644 src/macros.h diff --git a/src/cycleclock.h b/src/cycleclock.h index 1b7d4c99..be917003 100644 --- a/src/cycleclock.h +++ b/src/cycleclock.h @@ -39,15 +39,17 @@ extern "C" uint64_t __rdtsc(); #endif #include +#include "benchmark/macros.h" + +namespace benchmark { // NOTE: only i386 and x86_64 have been well tested. // PPC, sparc, alpha, and ia64 are based on // http://peter.kuscsik.com/wordpress/?p=14 // with modifications by m3b. See also // https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h -namespace benchmark { namespace cycleclock { // This should return the number of cycles since power-on. Thread-safe. - static inline int64_t Now() { + inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() { #if defined(OS_MACOSX) // this goes at the top because we need ALL Macs, regardless of // architecture, to return the number of "mach time units" that diff --git a/src/macros.h b/src/macros.h deleted file mode 100644 index b4703282..00000000 --- a/src/macros.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef BENCHMARK_MACROS_H_ -#define BENCHMARK_MACROS_H_ - -#include - -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&); - -// The arraysize(arr) macro returns the # of elements in an array arr. -// The expression is a compile-time constant, and therefore can be -// used in defining new arrays, for example. If you use arraysize on -// a pointer by mistake, you will get a compile-time error. -// -// One caveat is that, for C++03, arraysize() doesn't accept any array of -// an anonymous type or a type defined inside a function. In these rare -// cases, you have to use the unsafe ARRAYSIZE() macro below. This is -// due to a limitation in C++03's template system. The limitation has -// been removed in C++11. - -// This template function declaration is used in defining arraysize. -// Note that the function doesn't need an implementation, as we only -// use its type. -template -char (&ArraySizeHelper(T (&array)[N]))[N]; - -// That gcc wants both of these prototypes seems mysterious. VC, for -// its part, can't decide which to use (another mystery). Matching of -// template overloads: the final frontier. -#ifndef COMPILER_MSVC -template -char (&ArraySizeHelper(const T (&array)[N]))[N]; -#endif - -#define arraysize(array) (sizeof(ArraySizeHelper(array))) - -// The STATIC_ASSERT macro can be used to verify that a compile time -// expression is true. For example, you could use it to verify the -// size of a static array: -// -// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES, -// content_type_names_incorrect_size); -// -// or to make sure a struct is smaller than a certain size: -// -// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large); -// -// The second argument to the macro is the name of the variable. If -// the expression is false, most compilers will issue a warning/error -// containing the name of the variable. - -template -struct StaticAssert { -}; - -#define STATIC_ASSERT(expr, msg) \ - typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] - -// Implementation details of STATIC_ASSERT: -// -// - STATIC_ASSERT works by defining an array type that has -1 -// elements (and thus is invalid) when the expression is false. -// -// - The simpler definition -// -// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] -// -// does not work, as gcc supports variable-length arrays whose sizes -// are determined at run-time (this is gcc's extension and not part -// of the C++ standard). As a result, gcc fails to reject the -// following code with the simple definition: -// -// int foo; -// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is -// // not a compile-time constant. -// -// - By using the type StaticAssert<(bool(expr))>, we ensures that -// expr is a compile-time constant. (Template arguments must be -// determined at compile-time.) -// -// - The outer parentheses in StaticAssert<(bool(expr))> are necessary -// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written -// -// StaticAssert -// -// instead, these compilers will refuse to compile -// -// STATIC_ASSERT(5 > 0, some_message); -// -// (They seem to think the ">" in "5 > 0" marks the end of the -// template argument list.) -// -// - The array size is (bool(expr) ? 1 : -1), instead of simply -// -// ((expr) ? 1 : -1). -// -// This is to avoid running into a bug in MS VC 7.1, which -// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. - -#define CHECK(b) do { if (!(b)) assert(false); } while(0) -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) -#define CHECK_LE(a, b) CHECK((a) <= (b)) -#define CHECK_GT(a, b) CHECK((a) > (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) - - -#define ATTRIBUTE_UNUSED __attribute__ ((unused)) - -#endif // BENCHMARK_MACROS_H_ diff --git a/src/sysinfo.cc b/src/sysinfo.cc index b8dc3cbd..8e323301 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -13,8 +13,8 @@ #include #include +#include "benchmark/macros.h" #include "cycleclock.h" -#include "macros.h" #include "mutex_lock.h" #include "sleep.h" diff --git a/src/walltime.cc b/src/walltime.cc index 835b973e..d0bec27c 100644 --- a/src/walltime.cc +++ b/src/walltime.cc @@ -8,8 +8,8 @@ #include #include +#include "benchmark/macros.h" #include "cycleclock.h" -#include "macros.h" #include "sysinfo.h" namespace benchmark { From b3f0d71e508828bb4add65a7574c52fef1949998 Mon Sep 17 00:00:00 2001 From: Dominic Hamon Date: Thu, 19 Dec 2013 17:16:40 -0800 Subject: [PATCH 2/4] benchmark_min_time now works as expected --- include/benchmark/benchmark.h | 1 - src/benchmark.cc | 83 +++++++++++++++++++---------------- src/sleep.cc | 11 +---- src/sleep.h | 5 +++ 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index d5dec23c..a34f8e68 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -233,7 +233,6 @@ class State { bool MaybeStop(); void NewInterval(); bool AllStarting(); - bool RunAnotherInterval() const; void Run(); diff --git a/src/benchmark.cc b/src/benchmark.cc index 0d93eb1e..a43a56e4 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -29,11 +29,9 @@ DEFINE_string(benchmark_filter, ".", "If this flag is the string \"all\", all benchmarks linked " "into the process are run."); -DEFINE_int32(benchmark_min_iters, 100, - "Minimum number of iterations per benchmark"); - -DEFINE_int32(benchmark_max_iters, 1000000000, - "Maximum number of iterations per benchmark"); +DEFINE_int32(benchmark_iterations, 0, + "Total number of iterations per benchmark. 0 means the benchmarks " + "are time-based."); DEFINE_double(benchmark_min_time, 0.5, "Minimum number of seconds we should run benchmark before " @@ -69,15 +67,13 @@ DECLARE_string(heap_check); : NULL ) namespace benchmark { - namespace { - // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. -static const char kBigSIUnits[] = "kMGTPEZY"; +const char kBigSIUnits[] = "kMGTPEZY"; // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. -static const char kBigIECUnits[] = "KMGTPEZY"; +const char kBigIECUnits[] = "KMGTPEZY"; // milli, micro, nano, pico, femto, atto, zepto, yocto. -static const char kSmallSIUnits[] = "munpfazy"; +const char kSmallSIUnits[] = "munpfazy"; // We require that all three arrays have the same size. static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), @@ -427,9 +423,8 @@ void UseRealTime() { void PrintUsageAndExit() { fprintf(stdout, "benchmark [--benchmark_filter=]\n" -// TODO " [--benchmark_min_iters=]\n" -// TODO " [--benchmark_max_iters=]\n" -// TODO " [--benchmark_min_time=]\n" +// TODO " [--benchmark_iterations=]\n" + " [--benchmark_min_time=]\n" // " [--benchmark_memory_usage]\n" // TODO " [--benchmark_repetitions=]\n" " [--color_print={true|false}]\n" @@ -441,16 +436,16 @@ void ParseCommandLineFlags(int* argc, const char** argv) { for (int i = 1; i < *argc; ++i) { if (ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - /* TODO(dominic) - ParseInt32Flag(argv[i], "benchmark_min_iters", - &FLAGS_benchmark_min_iters) || - ParseInt32Flag(argv[i], "benchmark_max_iters", - &FLAGS_benchmark_max_iters) || + /* TODO + ParseInt32Flag(argv[i], "benchmark_iterations", + &FLAGS_benchmark_iterations) || + */ ParseDoubleFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || // TODO(dominic) // ParseBoolFlag(argv[i], "gbenchmark_memory_usage", // &FLAGS_gbenchmark_memory_usage) || + /* ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || */ @@ -504,7 +499,7 @@ class State::FastClock { t = MyCPUUsage() + ChildrenCPUUsage(); break; } - return static_cast(t * 1e6); + return static_cast(t * kNumMicrosPerSecond); } // Reinitialize if necessary (since clock type may be change once benchmark @@ -577,6 +572,10 @@ struct State::SharedState { SharedState(const internal::Benchmark::Instance* b, int t) : instance(b), starting(0), stopping(0), threads(t) { + pthread_mutex_init(&mu, nullptr); + } + ~SharedState() { + pthread_mutex_destroy(&mu); } DISALLOW_COPY_AND_ASSIGN(SharedState); }; @@ -907,13 +906,17 @@ State::State(FastClock* clock, SharedState* s, int t) pause_time_(0.0), total_iterations_(0), interval_micros_( - static_cast(1e6 * FLAGS_benchmark_min_time / + static_cast(kNumMicrosPerSecond * FLAGS_benchmark_min_time / FLAGS_benchmark_repetitions)) { + CHECK(clock != nullptr); + CHECK(s != nullptr); } bool State::KeepRunning() { // Fast path - if (!clock_->HasReached(stop_time_micros_ + pause_time_)) { + if ((FLAGS_benchmark_iterations == 0 && + !clock_->HasReached(stop_time_micros_ + pause_time_)) || + iterations_ < FLAGS_benchmark_iterations) { ++iterations_; return true; } @@ -1029,12 +1032,12 @@ void State::NewInterval() { } bool State::FinishInterval() { - if (iterations_ < FLAGS_benchmark_min_iters / FLAGS_benchmark_repetitions && - interval_micros_ < 5000000) { + if (FLAGS_benchmark_iterations != 0 && + iterations_ < FLAGS_benchmark_iterations / FLAGS_benchmark_repetitions) { interval_micros_ *= 2; #ifdef DEBUG - std::cout << "Interval was too short; trying again for " - << interval_micros_ << " useconds.\n"; + std::cout << "Not enough iterations in interval; " + << "Trying again for " << interval_micros_ << " useconds.\n"; #endif is_continuation_ = false; NewInterval(); @@ -1058,11 +1061,25 @@ bool State::FinishInterval() { bool keep_going = false; { mutex_lock l(&shared_->mu); + + // Either replace the last or add a new data point. if (is_continuation_) shared_->runs.back() = data; else shared_->runs.push_back(data); - keep_going = RunAnotherInterval(); + + if (FLAGS_benchmark_iterations != 0) { + // If we need more iterations, run another interval as a continuation. + keep_going = total_iterations_ < FLAGS_benchmark_iterations; + is_continuation_ = keep_going; + } else { + // If this is a repetition, run another interval as a new data point. + keep_going = + shared_->runs.size() < + static_cast(FLAGS_benchmark_repetitions); + is_continuation_ = !keep_going; + } + if (!keep_going) { ++shared_->stopping; if (shared_->stopping < shared_->threads) { @@ -1076,23 +1093,11 @@ bool State::FinishInterval() { } } - if (state_ == STATE_RUNNING) { - is_continuation_ = true; + if (state_ == STATE_RUNNING) NewInterval(); - } return keep_going; } -bool State::RunAnotherInterval() const { - if (total_iterations_ < FLAGS_benchmark_min_iters) - return true; - if (total_iterations_ > FLAGS_benchmark_max_iters) - return false; - if (static_cast(shared_->runs.size()) >= FLAGS_benchmark_repetitions) - return false; - return true; -} - bool State::MaybeStop() { mutex_lock l(&shared_->mu); if (shared_->stopping < shared_->threads) { diff --git a/src/sleep.cc b/src/sleep.cc index f1bdf1fc..27ce2cf0 100644 --- a/src/sleep.cc +++ b/src/sleep.cc @@ -5,22 +5,14 @@ namespace benchmark { #ifdef OS_WINDOWS - // Window's _sleep takes milliseconds argument. void SleepForMilliseconds(int milliseconds) { _sleep(milliseconds); } void SleepForSeconds(double seconds) { - SleepForMilliseconds(static_cast(seconds * 1000)); + SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); } - #else // OS_WINDOWS - -static const int64_t kNumMillisPerSecond = 1000LL; -static const int64_t kNumMicrosPerMilli = 1000LL; -static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL; -static const int64_t kNumNanosPerMicro = 1000LL; - void SleepForMicroseconds(int64_t microseconds) { struct timespec sleep_time; sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; @@ -36,7 +28,6 @@ void SleepForMilliseconds(int milliseconds) { void SleepForSeconds(double seconds) { SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); } - #endif // OS_WINDOWS } // end namespace benchmark diff --git a/src/sleep.h b/src/sleep.h index 1b60a45a..437b5c70 100644 --- a/src/sleep.h +++ b/src/sleep.h @@ -4,6 +4,11 @@ #include namespace benchmark { +const int64_t kNumMillisPerSecond = 1000LL; +const int64_t kNumMicrosPerMilli = 1000LL; +const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL; +const int64_t kNumNanosPerMicro = 1000LL; + void SleepForMicroseconds(int64_t microseconds); void SleepForMilliseconds(int milliseconds); void SleepForSeconds(double seconds); From 3a6f24c67be2d76533a541e22e542295aa16a9da Mon Sep 17 00:00:00 2001 From: Dominic Hamon Date: Thu, 19 Dec 2013 17:21:34 -0800 Subject: [PATCH 3/4] benchmark_iterations works as expected --- src/benchmark.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index a43a56e4..4d3ea19e 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -423,7 +423,7 @@ void UseRealTime() { void PrintUsageAndExit() { fprintf(stdout, "benchmark [--benchmark_filter=]\n" -// TODO " [--benchmark_iterations=]\n" + " [--benchmark_iterations=]\n" " [--benchmark_min_time=]\n" // " [--benchmark_memory_usage]\n" // TODO " [--benchmark_repetitions=]\n" @@ -436,10 +436,8 @@ void ParseCommandLineFlags(int* argc, const char** argv) { for (int i = 1; i < *argc; ++i) { if (ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - /* TODO ParseInt32Flag(argv[i], "benchmark_iterations", &FLAGS_benchmark_iterations) || - */ ParseDoubleFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || // TODO(dominic) From 902fb9122625eb30a02be4ce078a00c70603b44e Mon Sep 17 00:00:00 2001 From: Dominic Hamon Date: Fri, 20 Dec 2013 14:38:15 -0800 Subject: [PATCH 4/4] benchmark_repetitions now work --- src/benchmark.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 4d3ea19e..fe7c1699 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -269,8 +269,9 @@ void ComputeStats(const std::vector& reports, // Accumulators. Stat1_d real_accumulated_time_stat; Stat1_d cpu_accumulated_time_stat; - Stat1_d bytes_per_second_stat; Stat1_d items_per_second_stat; + Stat1_d bytes_per_second_stat; + Stat1_d iterations_stat; Stat1MinMax_d max_heapbytes_used_stat; int total_iters = 0; @@ -278,20 +279,20 @@ void ComputeStats(const std::vector& reports, for (std::vector::const_iterator it = reports.begin(); it != reports.end(); ++it) { CHECK_EQ(reports[0].benchmark_name, it->benchmark_name); - total_iters += it->iterations; real_accumulated_time_stat += Stat1_d(it->real_accumulated_time/it->iterations, it->iterations); cpu_accumulated_time_stat += Stat1_d(it->cpu_accumulated_time/it->iterations, it->iterations); items_per_second_stat += Stat1_d(it->items_per_second, it->iterations); bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations); + iterations_stat += Stat1_d(it->iterations, it->iterations); max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used, it->iterations); } // Get the data from the accumulator to BenchmarkRunData's. mean_data->benchmark_name = reports[0].benchmark_name + "_mean"; - mean_data->iterations = total_iters; + mean_data->iterations = iterations_stat.Mean(); mean_data->real_accumulated_time = real_accumulated_time_stat.Sum(); mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Sum(); mean_data->bytes_per_second = bytes_per_second_stat.Mean(); @@ -309,7 +310,7 @@ void ComputeStats(const std::vector& reports, stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev"; stddev_data->report_label = mean_data->report_label; - stddev_data->iterations = total_iters; + stddev_data->iterations = iterations_stat.StdDev(); // We multiply by total_iters since PrintRunData expects a total time. stddev_data->real_accumulated_time = real_accumulated_time_stat.StdDev() * total_iters; @@ -426,7 +427,7 @@ void PrintUsageAndExit() { " [--benchmark_iterations=]\n" " [--benchmark_min_time=]\n" // " [--benchmark_memory_usage]\n" -// TODO " [--benchmark_repetitions=]\n" + " [--benchmark_repetitions=]\n" " [--color_print={true|false}]\n" " [--v=]\n"); exit(0); @@ -443,10 +444,8 @@ void ParseCommandLineFlags(int* argc, const char** argv) { // TODO(dominic) // ParseBoolFlag(argv[i], "gbenchmark_memory_usage", // &FLAGS_gbenchmark_memory_usage) || - /* ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || - */ ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc; ++j)