Adopt new benchmark timing internals.

This patch adopts a new internal structure for how timings are performed. Currently every iteration of a benchmark checks to see if it has been running for an appropriate amount of time. Checking the clock introduces noise into the timings and this can cause inconsistent output from each benchmark. Now every iteration of a benchmark only checks an iteration count to see if it should stop running. The iteration count is determined before hand by testing the benchmark on a series of increasing iteration counts until a suitable count is found. This increases the amount of time it takes to run the actual benchmarks but it also greatly increases the accuracy of the results. This patch introduces some breaking changes. The notable breaking changes are: 1. Benchmarks run on multiple threads no generate a report per thread. Instead only a single report is generated. 2. ::benchmark::UseRealTime() was removed and replaced with State::UseRealTime().
2015-03-12 18:03:33 -04:00 · 2015-03-12 18:03:33 -04:00 · 7a767012f1
parent 7c6a7e3084
commit 7a767012f1
11 changed files with 1044 additions and 1007 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -44,6 +44,10 @@ add_cxx_compiler_flag(-pedantic-errors)
 add_cxx_compiler_flag(-fno-strict-aliasing RELEASE)

 add_cxx_compiler_flag(-Wthread-safety)
+if (HAVE_WTHREAD_SAFETY)
+  add_definitions(-DHAVE_WTHREAD_SAFETY)
+  cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
+endif()

 # C++ feature checks
 cxx_feature_check(STD_REGEX)
--- a/cmake/thread_safety_attributes.cpp
+++ b/cmake/thread_safety_attributes.cpp
@ -0,0 +1,4 @@
+#define HAVE_THREAD_SAFETY_ATTRIBUTES
+#include "../src/mutex.h"
+
+int main() {}
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -135,7 +135,8 @@ BENCHMARK(BM_MultiThreaded)->Threads(4);
 #ifndef BENCHMARK_BENCHMARK_H_
 #define BENCHMARK_BENCHMARK_H_

-#include <stdint.h>
+#include <cassert>
+#include <cstdint>

 #include <functional>
 #include <memory>
@ -153,10 +154,7 @@ void Initialize(int* argc, const char** argv);

 // Otherwise, run all benchmarks specified by the --benchmark_filter flag,
 // and exit after running the benchmarks.
-void RunSpecifiedBenchmarks(const BenchmarkReporter* reporter = nullptr);
-
-// ------------------------------------------------------
-// Routines that can be called from within a benchmark
+void RunSpecifiedBenchmarks(const BenchmarkReporter* reporter = NULL);

 // If this routine is called, peak memory allocation past this point in the
 // benchmark is reported at the end of the benchmark report line. (It is
@ -165,14 +163,6 @@ void RunSpecifiedBenchmarks(const BenchmarkReporter* reporter = nullptr);
 // TODO(dominic)
 // void MemoryUsage();

-// If a particular benchmark is I/O bound, or if for some reason CPU
-// timings are not representative, call this method from within the
-// benchmark routine.  If called, the elapsed time will be used to
-// control how many iterations are run, and in the printing of
-// items/second or MB/seconds values.  If not called, the cpu time
-// used by the benchmark will be used.
-void UseRealTime();
-
 namespace internal {
 class Benchmark;
 class BenchmarkFamilies;
@ -181,13 +171,63 @@ class BenchmarkFamilies;
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
 class State {
- public:
-  // Returns true iff the benchmark should continue through another iteration.
-  bool KeepRunning();
+public:
+  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i);

+  // Returns true iff the benchmark should continue through another iteration.
+  // NOTE: A benchmark may not return from the test until KeepRunning() has
+  // returned false.
+  bool KeepRunning() {
+    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
+        ResumeTiming();
+        started_ = true;
+    }
+    bool const res = total_iterations_++ < max_iterations;
+    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
+        assert(started_);
+        PauseTiming();
+        // Total iterations now is one greater than max iterations. Fix this.
+        total_iterations_ = max_iterations;
+    }
+    return res;
+  }
+
+  // REQUIRES: timer is running
+  // Stop the benchmark timer.  If not called, the timer will be
+  // automatically stopped after KeepRunning() returns false for the first time.
+  //
+  // For threaded benchmarks the PauseTiming() function acts
+  // like a barrier.  I.e., the ith call by a particular thread to this
+  // function will block until all threads have made their ith call.
+  // The timer will stop when the last thread has called this function.
+  //
+  // NOTE: PauseTiming()/ResumeTiming() are relatively
+  // heavyweight, and so their use should generally be avoided
+  // within each benchmark iteration, if possible.
  void PauseTiming();
+
+  // REQUIRES: timer is not running
+  // Start the benchmark timer.  The timer is NOT running on entrance to the
+  // benchmark function. It begins running after the first call to KeepRunning()
+  //
+  // For threaded benchmarks the ResumeTiming() function acts
+  // like a barrier.  I.e., the ith call by a particular thread to this
+  // function will block until all threads have made their ith call.
+  // The timer will start when the last thread has called this function.
+  //
+  // NOTE: PauseTiming()/ResumeTiming() are relatively
+  // heavyweight, and so their use should generally be avoided
+  // within each benchmark iteration, if possible.
  void ResumeTiming();

+  // If a particular benchmark is I/O bound, or if for some reason CPU
+  // timings are not representative, call this method from within the
+  // benchmark routine.  If called, the elapsed time will be used to
+  // control how many iterations are run, and in the printing of
+  // items/second or MB/seconds values.  If not called, the cpu time
+  // used by the benchmark will be used.
+  void UseRealTime();
+
  // Set the number of bytes processed by the current benchmark
  // execution.  This routine is typically called once at the end of a
  // throughput oriented benchmark.  If this routine is called with a
@ -195,7 +235,15 @@ class State {
  // per iteration.
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  void SetBytesProcessed(int64_t bytes);
+  BENCHMARK_ALWAYS_INLINE
+  void SetBytesProcessed(size_t bytes) {
+    bytes_processed_ = bytes;
+  }
+
+  BENCHMARK_ALWAYS_INLINE
+  size_t bytes_processed() const {
+    return bytes_processed_;
+  }

  // If this routine is called with items > 0, then an items/s
  // label is printed on the benchmark report line for the currently
@ -203,94 +251,76 @@ class State {
  // benchmark where a processing items/second output is desired.
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  void SetItemsProcessed(int64_t items);
+  BENCHMARK_ALWAYS_INLINE
+  void SetItemsProcessed(size_t items) {
+    items_processed_ = items;
+  }
+
+  BENCHMARK_ALWAYS_INLINE
+  size_t items_processed() const {
+    return items_processed_;
+  }

  // If this routine is called, the specified label is printed at the
  // end of the benchmark report line for the currently executing
  // benchmark.  Example:
-  //  static void BM_Compress(benchmark::State& state) {
+  //  static void BM_Compress(int iters) {
  //    ...
  //    double compress = input_size / output_size;
-  //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
+  //    benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
  //  }
  // Produces output that looks like:
  //  BM_Compress   50         50   14115038  compress:27.3%
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
-  void SetLabel(const std::string& label);
+  void SetLabel(const char* label);
+
+  // Allow the use of std::string without actually including <string>.
+  // This function does not participate in overload resolution unless StringType
+  // has the nested typename `basic_string`. This typename should be provided
+  // as an injected class name in the case of std::string.
+  template <class StringType>
+  void SetLabel(StringType const & str,
+                typename StringType::basic_string* = 0) {
+    this->SetLabel(str.c_str());
+  }

  // Range arguments for this run. CHECKs if the argument has been set.
-  int range_x() const;
-  int range_y() const;
+  BENCHMARK_ALWAYS_INLINE
+  int range_x() const {
+    assert(has_range_x_);
+    ((void)has_range_x_); // Prevent unused warning.
+    return range_x_;
+  }

-  int64_t iterations() const { return total_iterations_; }
+  BENCHMARK_ALWAYS_INLINE
+  int range_y() const {
+    assert(has_range_y_);
+    ((void)has_range_y_); // Prevent unused warning.
+    return range_y_;
+  }

+  BENCHMARK_ALWAYS_INLINE
+  size_t iterations() const { return total_iterations_; }
+
+private:
+  bool started_;
+  size_t total_iterations_;
+
+  bool has_range_x_;
+  int range_x_;
+
+  bool has_range_y_;
+  int range_y_;
+
+  size_t bytes_processed_;
+  size_t items_processed_;
+
+public:
  const int thread_index;
+  const size_t max_iterations;

- private:
-  class FastClock;
-  struct SharedState;
-  struct ThreadStats;
-
-  State(FastClock* clock, SharedState* s, int t);
-  bool StartRunning();
-  bool FinishInterval();
-  bool MaybeStop();
-  void NewInterval();
-  bool AllStarting();
-
-  static void* RunWrapper(void* arg);
-  void Run();
-  void RunAsThread();
-  void Wait();
-
-  enum EState {
-    STATE_INITIAL,   // KeepRunning hasn't been called
-    STATE_STARTING,  // KeepRunning called, waiting for other threads
-    STATE_RUNNING,   // Running and being timed
-    STATE_STOPPING,  // Not being timed but waiting for other threads
-    STATE_STOPPED    // Stopped
-  };
-
-  EState state_;
-
-  FastClock* clock_;
-
-  // State shared by all BenchmarkRun objects that belong to the same
-  // BenchmarkInstance
-  SharedState* shared_;
-
-  std::thread thread_;
-
-  // Custom label set by the user.
-  std::string label_;
-
-  // Each State object goes through a sequence of measurement intervals. By
-  // default each interval is approx. 100ms in length. The following stats are
-  // kept for each interval.
-  int64_t iterations_;
-  double start_cpu_;
-  double start_time_;
-  int64_t stop_time_micros_;
-
-  double start_pause_cpu_;
-  double pause_cpu_time_;
-  double start_pause_real_;
-  double pause_real_time_;
-
-  // Total number of iterations for all finished runs.
-  int64_t total_iterations_;
-
-  // Approximate time in microseconds for one interval of execution.
-  // Dynamically adjusted as needed.
-  int64_t interval_micros_;
-
-  // True if the current interval is the continuation of a previous one.
-  bool is_continuation_;
-
-  std::unique_ptr<ThreadStats> stats_;
-
-  friend class internal::Benchmark;
+private:
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
 };

@ -304,7 +334,6 @@ class BenchmarkReporter {
  struct Context {
    int num_cpus;
    double mhz_per_cpu;
-    // std::string cpu_info;
    bool cpu_scaling_enabled;

    // The number of chars in the longest benchmark name.
@ -312,19 +341,17 @@ class BenchmarkReporter {
  };

  struct Run {
-    Run()
-        : thread_index(-1),
-          iterations(1),
-          real_accumulated_time(0),
-          cpu_accumulated_time(0),
-          bytes_per_second(0),
-          items_per_second(0),
-          max_heapbytes_used(0) {}
+    Run() :
+      iterations(1),
+      real_accumulated_time(0),
+      cpu_accumulated_time(0),
+      bytes_per_second(0),
+      items_per_second(0),
+      max_heapbytes_used(0) {}

    std::string benchmark_name;
-    std::string report_label;
-    int thread_index;
-    int64_t iterations;
+    std::string report_label;  // Empty if not set by benchmark.
+    size_t iterations;
    double real_accumulated_time;
    double cpu_accumulated_time;

@ -350,22 +377,12 @@ class BenchmarkReporter {
  // benchmark, thus have the same name.
  virtual void ReportRuns(const std::vector<Run>& report) const = 0;

-  virtual ~BenchmarkReporter() {}
+  virtual ~BenchmarkReporter();
 };

 namespace internal {

-typedef std::function<void(State&)> BenchmarkFunction;
-
-// Run all benchmarks whose name is a partial match for the regular
-// expression in "spec". The results of benchmark runs are fed to "reporter".
-void RunMatchingBenchmarks(const std::string& spec,
-                           const BenchmarkReporter* reporter);
-
-// Extract the list of benchmark names that match the specified regular
-// expression.
-void FindMatchingBenchmarkNames(const std::string& re,
-                                std::vector<std::string>* benchmark_names);
+typedef void(Function)(State&);

 // ------------------------------------------------------
 // Benchmark registration object.  The BENCHMARK() macro expands
@ -375,8 +392,7 @@ void FindMatchingBenchmarkNames(const std::string& re,
 // chained into one expression.
 class Benchmark {
 public:
-  // The Benchmark takes ownership of the Callback pointed to by f.
-  Benchmark(const char* name, BenchmarkFunction f);
+  Benchmark(const char* name, Function* f);

  ~Benchmark();

@ -444,40 +460,25 @@ class Benchmark {
  // Used inside the benchmark implementation
  struct Instance;

-  // Measure the overhead of an empty benchmark to subtract later.
-  static void MeasureOverhead();
-
 private:
-  friend class BenchmarkFamilies;
-
-  std::vector<Benchmark::Instance> CreateBenchmarkInstances(size_t rangeXindex,
-                                                            size_t rangeYindex);
-
  std::string name_;
-  BenchmarkFunction function_;
-  size_t registration_index_;
-  std::vector<int> rangeX_;
-  std::vector<int> rangeY_;
+  Function* function_;
+  std::size_t registration_index_;
+  int arg_count_;
+  std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
  std::vector<int> thread_counts_;
-  std::mutex mutex_;

  // Special value placed in thread_counts_ to stand for NumCPUs()
  static const int kNumCpuMarker = -1;

-  // Special value used to indicate that no range is required.
-  static const size_t kNoRangeIndex = std::numeric_limits<size_t>::max();
-  static const int kNoRange = std::numeric_limits<int>::max();
-
  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
-  static double MeasurePeakHeapMemory(const Instance& b);
-  static void RunInstance(const Instance& b, const BenchmarkReporter* br);
-  friend class ::benchmark::State;
-  friend struct ::benchmark::internal::Benchmark::Instance;
-  friend void ::benchmark::internal::RunMatchingBenchmarks(
-      const std::string&, const BenchmarkReporter*);
+
+  friend class BenchmarkFamilies;
+
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
 };

+
 // ------------------------------------------------------
 // Internal implementation details follow; please ignore

@ -487,16 +488,16 @@ class ConsoleReporter : public BenchmarkReporter {
 public:
  virtual bool ReportContext(const Context& context) const;
  virtual void ReportRuns(const std::vector<Run>& reports) const;
-
 private:
-  std::string PrintMemoryUsage(double bytes) const;
  virtual void PrintRunData(const Run& report) const;
+  // TODO(ericwf): Find a better way to share this information.
  mutable size_t name_field_width_;
 };

 }  // end namespace internal
 }  // end namespace benchmark

+
 // ------------------------------------------------------
 // Macro to register benchmarks

@ -534,4 +535,11 @@ class ConsoleReporter : public BenchmarkReporter {
      __benchmark_, n, __LINE__) BENCHMARK_UNUSED =          \
      (new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))

+// Helper macro to create a main routine in a test that runs the benchmarks
+#define BENCHMARK_MAIN()                             \
+  int main(int argc, const char** argv) {            \
+    ::benchmark::Initialize(&argc, argv);            \
+    ::benchmark::RunSpecifiedBenchmarks();           \
+  }
+
 #endif  // BENCHMARK_BENCHMARK_H_
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -2,9 +2,8 @@
 include_directories(${PROJECT_SOURCE_DIR}/src)

 # Define the source files
-set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
-                 "log.cc" "sleep.cc" "string_util.cc" "sysinfo.cc"
-                 "walltime.cc")
+set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" "log.cc"
+                 "sleep.cc" "string_util.cc" "sysinfo.cc" "walltime.cc")
 # Determine the correct regular expression engine to use
 if(HAVE_STD_REGEX)
  set(RE_FILES "re_std.cc")
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
--- a/src/mutex.h
+++ b/src/mutex.h
@ -0,0 +1,142 @@
+#ifndef BENCHMARK_MUTEX_H_
+#define BENCHMARK_MUTEX_H_
+
+#include <mutex>
+#include <condition_variable>
+
+// Enable thread safety attributes only with clang.
+// The attributes can be safely erased when compiling with other compilers.
+#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif
+
+#define CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
+
+#define SCOPED_CAPABILITY \
+  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+#define GUARDED_BY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+
+#define PT_GUARDED_BY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+
+#define ACQUIRED_BEFORE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+
+#define ACQUIRED_AFTER(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+
+#define REQUIRES(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
+
+#define REQUIRES_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
+
+#define ACQUIRE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
+
+#define ACQUIRE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
+
+#define RELEASE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
+
+#define RELEASE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
+
+#define TRY_ACQUIRE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
+
+#define TRY_ACQUIRE_SHARED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
+
+#define EXCLUDES(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+
+#define ASSERT_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
+
+#define ASSERT_SHARED_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
+
+#define RETURN_CAPABILITY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+#define NO_THREAD_SAFETY_ANALYSIS \
+  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+
+
+namespace benchmark {
+
+typedef std::condition_variable Condition;
+
+// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
+// we can annotate them with thread safety attributes and use the
+// -Wthread-safety warning with clang. The standard library types cannot be
+// used directly because they do not provided the required annotations.
+class CAPABILITY("mutex") Mutex
+{
+public:
+  Mutex() {}
+
+  void lock() ACQUIRE() { mut_.lock(); }
+  void unlock() RELEASE() { mut_.unlock(); }
+  std::mutex& native_handle() {
+    return mut_;
+  }
+private:
+  std::mutex mut_;
+};
+
+
+class SCOPED_CAPABILITY MutexLock
+{
+  typedef std::unique_lock<std::mutex> MutexLockImp;
+public:
+  MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle())
+  { }
+  ~MutexLock() RELEASE() {}
+  MutexLockImp& native_handle() { return ml_; }
+private:
+  MutexLockImp ml_;
+};
+
+
+class Notification
+{
+public:
+  Notification() : notified_yet_(false) { }
+
+  void WaitForNotification() const EXCLUDES(mutex_) {
+    MutexLock m_lock(mutex_);
+    auto notified_fn = [this]() REQUIRES(mutex_) {
+                            return this->HasBeenNotified();
+                        };
+    cv_.wait(m_lock.native_handle(), notified_fn);
+  }
+
+  void Notify() EXCLUDES(mutex_) {
+    {
+      MutexLock lock(mutex_);
+      notified_yet_ = 1;
+    }
+    cv_.notify_all();
+  }
+
+private:
+  bool HasBeenNotified() const REQUIRES(mutex_) {
+    return notified_yet_;
+  }
+
+  mutable Mutex mutex_;
+  mutable std::condition_variable cv_;
+  bool notified_yet_ GUARDED_BY(mutex_);
+};
+
+} // end namespace benchmark
+
+#endif // BENCHMARK_MUTEX_H_
--- a/src/string_util.cc
+++ b/src/string_util.cc
@ -24,13 +24,13 @@ static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
 static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
              "Small SI and Big SI unit arrays must be the same size");

-static const int kUnitsSize = arraysize(kBigSIUnits);
+static const int64_t kUnitsSize = arraysize(kBigSIUnits);

 } // end anonymous namespace

 void ToExponentAndMantissa(double val, double thresh, int precision,
                           double one_k, std::string* mantissa,
-                           int* exponent) {
+                           int64_t* exponent) {
  std::stringstream mantissa_stream;

  if (val < 0) {
@ -80,10 +80,10 @@ void ToExponentAndMantissa(double val, double thresh, int precision,
  *mantissa = mantissa_stream.str();
 }

-std::string ExponentToPrefix(int exponent, bool iec) {
+std::string ExponentToPrefix(int64_t exponent, bool iec) {
  if (exponent == 0) return "";

-  const int index = (exponent > 0 ? exponent - 1 : -exponent - 1);
+  const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
  if (index >= kUnitsSize) return "";

  const char* array =
@ -97,7 +97,7 @@ std::string ExponentToPrefix(int exponent, bool iec) {
 std::string ToBinaryStringFullySpecified(double value, double threshold,
                                         int precision) {
  std::string mantissa;
-  int exponent;
+  int64_t exponent;
  ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa,
                        &exponent);
  return mantissa + ExponentToPrefix(exponent, false);
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@ -34,6 +34,7 @@
 #include "check.h"
 #include "cycleclock.h"
 #include "internal_macros.h"
+#include "log.h"
 #include "sleep.h"

 namespace benchmark {
@ -322,7 +323,7 @@ double MyCPUUsage() {
        return value;
      }
      // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
-      std::cout << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
+      VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
      use_cputime_ns = false;
    }
  }
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -20,3 +20,6 @@ add_test(filter_regex_none filter_test --benchmark_filter=monkey 0)
 add_test(filter_regex_wildcard filter_test --benchmark_filter=.*Calculate.* 16)
 add_test(filter_regex_begin filter_test --benchmark_filter=^BM_Calculate.* 16)
 add_test(filter_regex_end filter_test --benchmark_filter=.*Pi$ 8)
+
+compile_benchmark_test(basic_test)
+add_test(basic basic_test)
--- a/test/basic_test.cc
+++ b/test/basic_test.cc
@ -0,0 +1,105 @@
+
+#include <cstddef>
+
+#include "benchmark/benchmark.h"
+
+#define BASIC_BENCHMARK_TEST(x) \
+    BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
+
+void BM_empty(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    volatile std::size_t x = state.iterations();
+    ((void)x);
+  }
+}
+BENCHMARK(BM_empty);
+BENCHMARK(BM_empty)->ThreadPerCpu();
+
+void BM_spin_empty(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    for (int x = 0; x < state.range_x(); ++x) {
+      volatile int dummy = x;
+      ((void)dummy);
+    }
+  }
+}
+BASIC_BENCHMARK_TEST(BM_spin_empty);
+BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();
+
+void BM_spin_pause_before(benchmark::State& state) {
+  for (int i = 0; i < state.range_x(); ++i) {
+    volatile int dummy = i;
+    ((void)dummy);
+  }
+  while(state.KeepRunning()) {
+    for (int i = 0; i < state.range_x(); ++i) {
+      volatile int dummy = i;
+      ((void)dummy);
+    }
+  }
+}
+BASIC_BENCHMARK_TEST(BM_spin_pause_before);
+BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
+
+
+void BM_spin_pause_during(benchmark::State& state) {
+  while(state.KeepRunning()) {
+    state.PauseTiming();
+    for (int i = 0; i < state.range_x(); ++i) {
+      volatile int dummy = i;
+      ((void)dummy);
+    }
+    state.ResumeTiming();
+    for (int i = 0; i < state.range_x(); ++i) {
+      volatile int dummy = i;
+      ((void)dummy);
+    }
+  }
+}
+BASIC_BENCHMARK_TEST(BM_spin_pause_during);
+BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
+
+
+void BM_spin_pause_after(benchmark::State& state) {
+  while(state.KeepRunning()) {
+    for (int i = 0; i < state.range_x(); ++i) {
+      volatile int dummy = i;
+      ((void)dummy);
+    }
+  }
+  for (int i = 0; i < state.range_x(); ++i) {
+    volatile int dummy = i;
+    ((void)dummy);
+  }
+}
+BASIC_BENCHMARK_TEST(BM_spin_pause_after);
+BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
+
+
+void BM_spin_pause_before_and_after(benchmark::State& state) {
+  for (int i = 0; i < state.range_x(); ++i) {
+    volatile int dummy = i;
+    ((void)dummy);
+  }
+  while(state.KeepRunning()) {
+    for (int i = 0; i < state.range_x(); ++i) {
+      volatile int dummy = i;
+      ((void)dummy);
+    }
+  }
+  for (int i = 0; i < state.range_x(); ++i) {
+    volatile int dummy = i;
+    ((void)dummy);
+  }
+}
+BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
+BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
+
+
+void BM_empty_stop_start(benchmark::State& state) {
+  while (state.KeepRunning()) { }
+}
+BENCHMARK(BM_empty_stop_start);
+BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
+
+BENCHMARK_MAIN()
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@ -53,18 +53,22 @@ static void BM_Factorial(benchmark::State& state) {
  while (state.KeepRunning())
    fac_42 = Factorial(8);
  // Prevent compiler optimizations
-  std::cout << fac_42;
+  std::stringstream ss;
+  ss << fac_42;
+  state.SetLabel(ss.str());
 }
 BENCHMARK(BM_Factorial);

 static void BM_FactorialRealTime(benchmark::State& state) {
-  benchmark::UseRealTime();
+  state.UseRealTime();

  int fac_42 = 0;
  while (state.KeepRunning())
    fac_42 = Factorial(8);
  // Prevent compiler optimizations
-  std::cout << fac_42;
+  std::stringstream ss;
+  ss << fac_42;
+  state.SetLabel(ss.str());
 }
 BENCHMARK(BM_FactorialRealTime);

@ -158,12 +162,5 @@ static void BM_LongTest(benchmark::State& state) {
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);

-int main(int argc, const char* argv[]) {
-  benchmark::Initialize(&argc, argv);
-
-  assert(Factorial(8) == 40320);
-  assert(CalculatePi(1) == 0.0);
-
-  benchmark::RunSpecifiedBenchmarks();
-}
+BENCHMARK_MAIN()