Merging in work from branch

2013-12-20 14:39:09 -08:00 · 2013-12-20 14:39:09 -08:00 · d44b01a9cc
parent d4ed240426 902fb91226
commit d44b01a9cc
8 changed files with 55 additions and 170 deletions
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -233,7 +233,6 @@ class State {
  bool MaybeStop();
  void NewInterval();
  bool AllStarting();
-  bool RunAnotherInterval() const;

  void Run();

--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -30,11 +30,9 @@ DEFINE_string(benchmark_filter, ".",
              "If this flag is the string \"all\", all benchmarks linked "
              "into the process are run.");

-DEFINE_int32(benchmark_min_iters, 100,
-             "Minimum number of iterations per benchmark");
-
-DEFINE_int32(benchmark_max_iters, 1000000000,
-             "Maximum number of iterations per benchmark");
+DEFINE_int32(benchmark_iterations, 0,
+             "Total number of iterations per benchmark. 0 means the benchmarks "
+             "are time-based.");

 DEFINE_double(benchmark_min_time, 0.5,
              "Minimum number of seconds we should run benchmark before "
@ -70,15 +68,13 @@ DECLARE_string(heap_check);
    : NULL )

 namespace benchmark {
-
 namespace {
-
 // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
-static const char kBigSIUnits[] = "kMGTPEZY";
+const char kBigSIUnits[] = "kMGTPEZY";
 // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
-static const char kBigIECUnits[] = "KMGTPEZY";
+const char kBigIECUnits[] = "KMGTPEZY";
 // milli, micro, nano, pico, femto, atto, zepto, yocto.
-static const char kSmallSIUnits[] = "munpfazy";
+const char kSmallSIUnits[] = "munpfazy";

 // We require that all three arrays have the same size.
 static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
@ -274,8 +270,9 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
  // Accumulators.
  Stat1_d real_accumulated_time_stat;
  Stat1_d cpu_accumulated_time_stat;
-  Stat1_d bytes_per_second_stat;
  Stat1_d items_per_second_stat;
+  Stat1_d bytes_per_second_stat;
+  Stat1_d iterations_stat;
  Stat1MinMax_d max_heapbytes_used_stat;
  int total_iters = 0;

@ -283,20 +280,20 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
  for (std::vector<BenchmarkRunData>::const_iterator it = reports.begin();
       it != reports.end(); ++it) {
    CHECK_EQ(reports[0].benchmark_name, it->benchmark_name);
-    total_iters += it->iterations;
    real_accumulated_time_stat +=
        Stat1_d(it->real_accumulated_time/it->iterations, it->iterations);
    cpu_accumulated_time_stat +=
        Stat1_d(it->cpu_accumulated_time/it->iterations, it->iterations);
    items_per_second_stat += Stat1_d(it->items_per_second, it->iterations);
    bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations);
+    iterations_stat += Stat1_d(it->iterations, it->iterations);
    max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used,
                                             it->iterations);
  }

  // Get the data from the accumulator to BenchmarkRunData's.
  mean_data->benchmark_name = reports[0].benchmark_name + "_mean";
-  mean_data->iterations = total_iters;
+  mean_data->iterations = iterations_stat.Mean();
  mean_data->real_accumulated_time = real_accumulated_time_stat.Sum();
  mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Sum();
  mean_data->bytes_per_second = bytes_per_second_stat.Mean();
@ -314,7 +311,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,

  stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev";
  stddev_data->report_label = mean_data->report_label;
-  stddev_data->iterations = total_iters;
+  stddev_data->iterations = iterations_stat.StdDev();
  // We multiply by total_iters since PrintRunData expects a total time.
  stddev_data->real_accumulated_time =
      real_accumulated_time_stat.StdDev() * total_iters;
@ -428,11 +425,10 @@ void UseRealTime() {

 void PrintUsageAndExit() {
  fprintf(stdout, "benchmark [--benchmark_filter=<regex>]\n"
-// TODO           "          [--benchmark_min_iters=<min_iters>]\n"
-// TODO           "          [--benchmark_max_iters=<max_iters>]\n"
-// TODO           "          [--benchmark_min_time=<min_time>]\n"
+                  "          [--benchmark_iterations=<iterations>]\n"
+                  "          [--benchmark_min_time=<min_time>]\n"
 //                "          [--benchmark_memory_usage]\n"
-// TODO           "          [--benchmark_repetitions=<num_repetitions>]\n"
+                  "          [--benchmark_repetitions=<num_repetitions>]\n"
                  "          [--color_print={true|false}]\n"
                  "          [--v=<verbosity>]\n");
  exit(0);
@ -442,11 +438,8 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
  for (int i = 1; i < *argc; ++i) {
    if (ParseStringFlag(argv[i], "benchmark_filter",
                        &FLAGS_benchmark_filter) ||
-        /* TODO(dominic)
-        ParseInt32Flag(argv[i], "benchmark_min_iters",
-                       &FLAGS_benchmark_min_iters) ||
-        ParseInt32Flag(argv[i], "benchmark_max_iters",
-                       &FLAGS_benchmark_max_iters) ||
+        ParseInt32Flag(argv[i], "benchmark_iterations",
+                       &FLAGS_benchmark_iterations) ||
        ParseDoubleFlag(argv[i], "benchmark_min_time",
                        &FLAGS_benchmark_min_time) ||
        // TODO(dominic)
@ -454,7 +447,6 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
 //                      &FLAGS_gbenchmark_memory_usage) ||
        ParseInt32Flag(argv[i], "benchmark_repetitions",
                       &FLAGS_benchmark_repetitions) ||
-                       */
        ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) ||
        ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
      for (int j = i; j != *argc; ++j)
@ -504,7 +496,7 @@ class State::FastClock {
        t = MyCPUUsage() + ChildrenCPUUsage();
        break;
    }
-    return static_cast<int64_t>(t * 1e6);
+    return static_cast<int64_t>(t * kNumMicrosPerSecond);
  }

  // Reinitialize if necessary (since clock type may be change once benchmark
@ -912,13 +904,17 @@ State::State(FastClock* clock, SharedState* s, int t)
      pause_time_(0.0),
      total_iterations_(0),
      interval_micros_(
-          static_cast<int64_t>(1e6 * FLAGS_benchmark_min_time /
+          static_cast<int64_t>(kNumMicrosPerSecond * FLAGS_benchmark_min_time /
                               FLAGS_benchmark_repetitions)) {
+  CHECK(clock != nullptr);
+  CHECK(s != nullptr);
 }

 bool State::KeepRunning() {
  // Fast path
-  if (!clock_->HasReached(stop_time_micros_ + pause_time_)) {
+  if ((FLAGS_benchmark_iterations == 0 &&
+       !clock_->HasReached(stop_time_micros_ + pause_time_)) ||
+      iterations_ < FLAGS_benchmark_iterations) {
    ++iterations_;
    return true;
  }
@ -1034,12 +1030,12 @@ void State::NewInterval() {
 }

 bool State::FinishInterval() {
-  if (iterations_ < FLAGS_benchmark_min_iters / FLAGS_benchmark_repetitions &&
-      interval_micros_ < 5000000) {
+  if (FLAGS_benchmark_iterations != 0 &&
+      iterations_ < FLAGS_benchmark_iterations / FLAGS_benchmark_repetitions) {
    interval_micros_ *= 2;
 #ifdef DEBUG
-    std::cout << "Interval was too short; trying again for "
-              << interval_micros_ << " useconds.\n";
+    std::cout << "Not enough iterations in interval; "
+              << "Trying again for " << interval_micros_ << " useconds.\n";
 #endif
    is_continuation_ = false;
    NewInterval();
@ -1063,11 +1059,25 @@ bool State::FinishInterval() {
  bool keep_going = false;
  {
    mutex_lock l(&shared_->mu);
+
+    // Either replace the last or add a new data point.
    if (is_continuation_)
      shared_->runs.back() = data;
    else
      shared_->runs.push_back(data);
-    keep_going = RunAnotherInterval();
+
+    if (FLAGS_benchmark_iterations != 0) {
+      // If we need more iterations, run another interval as a continuation.
+      keep_going = total_iterations_ < FLAGS_benchmark_iterations;
+      is_continuation_ = keep_going;
+    } else {
+      // If this is a repetition, run another interval as a new data point.
+      keep_going =
+          shared_->runs.size() <
+              static_cast<size_t>(FLAGS_benchmark_repetitions);
+      is_continuation_ = !keep_going;
+    }
+
    if (!keep_going) {
      ++shared_->stopping;
      if (shared_->stopping < shared_->threads) {
@ -1081,23 +1091,11 @@ bool State::FinishInterval() {
    }
  }

-  if (state_ == STATE_RUNNING) {
-    is_continuation_ = true;
+  if (state_ == STATE_RUNNING)
    NewInterval();
-  }
  return keep_going;
 }

-bool State::RunAnotherInterval() const {
-  if (total_iterations_ < FLAGS_benchmark_min_iters)
-    return true;
-  if (total_iterations_ > FLAGS_benchmark_max_iters)
-    return false;
-  if (static_cast<int>(shared_->runs.size()) >= FLAGS_benchmark_repetitions)
-    return false;
-  return true;
-}
-
 bool State::MaybeStop() {
  mutex_lock l(&shared_->mu);
  if (shared_->stopping < shared_->threads) {
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@ -39,15 +39,17 @@ extern "C" uint64_t __rdtsc();
 #endif
 #include <sys/time.h>

+#include "benchmark/macros.h"
+
+namespace benchmark {
 // NOTE: only i386 and x86_64 have been well tested.
 // PPC, sparc, alpha, and ia64 are based on
 //    http://peter.kuscsik.com/wordpress/?p=14
 // with modifications by m3b.  See also
 //    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
-namespace benchmark {
 namespace cycleclock {
  // This should return the number of cycles since power-on.  Thread-safe.
-  static inline int64_t Now() {
+  inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
 #if defined(OS_MACOSX)
    // this goes at the top because we need ALL Macs, regardless of
    // architecture, to return the number of "mach time units" that
--- a/src/macros.h
+++ b/src/macros.h
@ -1,110 +0,0 @@
-#ifndef BENCHMARK_MACROS_H_
-#define BENCHMARK_MACROS_H_
-
-#include <assert.h>
-
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);               \
-  void operator=(const TypeName&);
-
-// The arraysize(arr) macro returns the # of elements in an array arr.
-// The expression is a compile-time constant, and therefore can be
-// used in defining new arrays, for example.  If you use arraysize on
-// a pointer by mistake, you will get a compile-time error.
-//
-// One caveat is that, for C++03, arraysize() doesn't accept any array of
-// an anonymous type or a type defined inside a function.  In these rare
-// cases, you have to use the unsafe ARRAYSIZE() macro below.  This is
-// due to a limitation in C++03's template system.  The limitation has
-// been removed in C++11.
-
-// This template function declaration is used in defining arraysize.
-// Note that the function doesn't need an implementation, as we only
-// use its type.
-template <typename T, size_t N>
-char (&ArraySizeHelper(T (&array)[N]))[N];
-
-// That gcc wants both of these prototypes seems mysterious. VC, for
-// its part, can't decide which to use (another mystery). Matching of
-// template overloads: the final frontier.
-#ifndef COMPILER_MSVC
-template <typename T, size_t N>
-char (&ArraySizeHelper(const T (&array)[N]))[N];
-#endif
-
-#define arraysize(array) (sizeof(ArraySizeHelper(array)))
-
-// The STATIC_ASSERT macro can be used to verify that a compile time
-// expression is true. For example, you could use it to verify the
-// size of a static array:
-//
-//   STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
-//                  content_type_names_incorrect_size);
-//
-// or to make sure a struct is smaller than a certain size:
-//
-//   STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
-//
-// The second argument to the macro is the name of the variable. If
-// the expression is false, most compilers will issue a warning/error
-// containing the name of the variable.
-
-template <bool>
-struct StaticAssert {
-};
-
-#define STATIC_ASSERT(expr, msg) \
-  typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
-
-// Implementation details of STATIC_ASSERT:
-//
-// - STATIC_ASSERT works by defining an array type that has -1
-//   elements (and thus is invalid) when the expression is false.
-//
-// - The simpler definition
-//
-//     #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
-//
-//   does not work, as gcc supports variable-length arrays whose sizes
-//   are determined at run-time (this is gcc's extension and not part
-//   of the C++ standard).  As a result, gcc fails to reject the
-//   following code with the simple definition:
-//
-//     int foo;
-//     STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
-//                               // not a compile-time constant.
-//
-// - By using the type StaticAssert<(bool(expr))>, we ensures that
-//   expr is a compile-time constant.  (Template arguments must be
-//   determined at compile-time.)
-//
-// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
-//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
-//
-//     StaticAssert<bool(expr)>
-//
-//   instead, these compilers will refuse to compile
-//
-//     STATIC_ASSERT(5 > 0, some_message);
-//
-//   (They seem to think the ">" in "5 > 0" marks the end of the
-//   template argument list.)
-//
-// - The array size is (bool(expr) ? 1 : -1), instead of simply
-//
-//     ((expr) ? 1 : -1).
-//
-//   This is to avoid running into a bug in MS VC 7.1, which
-//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
-
-#define CHECK(b) do { if (!(b)) assert(false); } while(0)
-#define CHECK_EQ(a, b) CHECK((a) == (b))
-#define CHECK_GE(a, b) CHECK((a) >= (b))
-#define CHECK_LE(a, b) CHECK((a) <= (b))
-#define CHECK_GT(a, b) CHECK((a) > (b))
-#define CHECK_LT(a, b) CHECK((a) < (b))
-
-
-#define ATTRIBUTE_UNUSED  __attribute__ ((unused))
-
-#endif  // BENCHMARK_MACROS_H_
--- a/src/sleep.cc
+++ b/src/sleep.cc
@ -5,22 +5,14 @@

 namespace benchmark {
 #ifdef OS_WINDOWS
-
 // Window's _sleep takes milliseconds argument.
 void SleepForMilliseconds(int milliseconds) {
  _sleep(milliseconds);
 }
 void SleepForSeconds(double seconds) {
-  SleepForMilliseconds(static_cast<int>(seconds * 1000));
+  SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
 }
-
 #else  // OS_WINDOWS
-
-static const int64_t kNumMillisPerSecond = 1000LL;
-static const int64_t kNumMicrosPerMilli = 1000LL;
-static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
-static const int64_t kNumNanosPerMicro = 1000LL;
-
 void SleepForMicroseconds(int64_t microseconds) {
  struct timespec sleep_time;
  sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
@ -36,7 +28,6 @@ void SleepForMilliseconds(int milliseconds) {
 void SleepForSeconds(double seconds) {
  SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
 }
-
 #endif  // OS_WINDOWS
 }  // end namespace benchmark

--- a/src/sleep.h
+++ b/src/sleep.h
@ -4,6 +4,11 @@
 #include <stdint.h>

 namespace benchmark {
+const int64_t kNumMillisPerSecond = 1000LL;
+const int64_t kNumMicrosPerMilli = 1000LL;
+const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
+const int64_t kNumNanosPerMicro = 1000LL;
+
 void SleepForMicroseconds(int64_t microseconds);
 void SleepForMilliseconds(int milliseconds);
 void SleepForSeconds(double seconds);
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@ -13,8 +13,8 @@
 #include <iostream>
 #include <limits>

+#include "benchmark/macros.h"
 #include "cycleclock.h"
-#include "macros.h"
 #include "mutex_lock.h"
 #include "sleep.h"

--- a/src/walltime.cc
+++ b/src/walltime.cc
@ -8,8 +8,8 @@
 #include <atomic>
 #include <limits>

+#include "benchmark/macros.h"
 #include "cycleclock.h"
-#include "macros.h"
 #include "sysinfo.h"

 namespace benchmark {