From cba945e37dd8f336c7c8f5367f3c7d9498d5e09b Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 2 Sep 2016 21:34:34 -0600 Subject: [PATCH] Make `PauseTiming()` and `ResumeTiming()` per thread. (#286) * Change to using per-thread timers * fix bad assertions * fix copy paste error on windows * Fix thread safety annotations * Make null-log thread safe * remove remaining globals * use chrono for walltime since it is thread safe * consolidate timer functions * Add missing ctime include * Rename to be consistent with Google style * Format patch using clang-format * cleanup -Wthread-safety configuration * Don't trust _POSIX_FEATURE macros because OS X lies. * Fix OS X thread timings * attempt to fix mingw build * Attempt to make mingw work again * Revert old mingw workaround * improve diagnostics * Drastically improve OS X measurements * Use average real time instead of max --- CMakeLists.txt | 4 +- include/benchmark/benchmark_api.h | 59 +++-- src/CMakeLists.txt | 9 +- src/benchmark.cc | 404 +++++++++++------------------- src/check.h | 6 +- src/console_reporter.cc | 2 +- src/csv_reporter.cc | 2 +- src/json_reporter.cc | 2 +- src/log.cc | 40 --- src/log.h | 56 ++++- src/mutex.h | 70 ++++-- src/reporter.cc | 2 +- src/sysinfo.cc | 96 +------ src/sysinfo.h | 2 - src/timers.cc | 195 ++++++++++++++ src/timers.h | 48 ++++ src/walltime.cc | 263 ------------------- src/walltime.h | 17 -- tools/gbench/report.py | 9 +- 19 files changed, 539 insertions(+), 747 deletions(-) delete mode 100644 src/log.cc create mode 100644 src/timers.cc create mode 100644 src/timers.h delete mode 100644 src/walltime.cc delete mode 100644 src/walltime.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ef83007..86f61429 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,8 +87,7 @@ else() add_cxx_compiler_flag(-Wstrict-aliasing) endif() add_cxx_compiler_flag(-Wthread-safety) - if (HAVE_WTHREAD_SAFETY) - add_definitions(-DHAVE_WTHREAD_SAFETY) + if (HAVE_CXX_FLAG_WTHREAD_SAFETY) cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) endif() @@ -152,7 +151,6 @@ cxx_feature_check(STD_REGEX) cxx_feature_check(GNU_POSIX_REGEX) cxx_feature_check(POSIX_REGEX) cxx_feature_check(STEADY_CLOCK) - # Ensure we have pthreads find_package(Threads REQUIRED) diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 2357e884..7762a42f 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -270,31 +270,25 @@ enum BigO { // computational complexity for the benchmark. typedef double(BigOFunc)(int); +namespace internal { +class ThreadTimer; +class ThreadManager; +} + // State is passed to a running Benchmark and contains state for the // benchmark to use. class State { public: - State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads); - // Returns true if the benchmark should continue through another iteration. // NOTE: A benchmark may not return from the test until KeepRunning() has // returned false. bool KeepRunning() { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { - assert(!finished_); - started_ = true; - ResumeTiming(); + StartKeepRunning(); } bool const res = total_iterations_++ < max_iterations; if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { - assert(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { - PauseTiming(); - } - // Total iterations now is one greater than max iterations. Fix this. - total_iterations_ = max_iterations; - finished_ = true; + FinishKeepRunning(); } return res; } @@ -304,10 +298,11 @@ public: // Stop the benchmark timer. If not called, the timer will be // automatically stopped after KeepRunning() returns false for the first time. // - // For threaded benchmarks the PauseTiming() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all active threads have made their ith call. - // The timer will stop when the last thread has called this function. + // For threaded benchmarks the PauseTiming() function only pauses the timing + // for the current thread. + // + // NOTE: The "real time" measurement is per-thread. If different threads + // report different measurements the largest one is reported. // // NOTE: PauseTiming()/ResumeTiming() are relatively // heavyweight, and so their use should generally be avoided @@ -319,11 +314,6 @@ public: // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after the first call to KeepRunning() // - // For threaded benchmarks the ResumeTiming() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all active threads have made their ith call. - // The timer will start when the last thread has called this function. - // // NOTE: PauseTiming()/ResumeTiming() are relatively // heavyweight, and so their use should generally be avoided // within each benchmark iteration, if possible. @@ -335,10 +325,10 @@ public: // thread and report an error with the specified 'msg'. After this call // the user may explicitly 'return' from the benchmark. // - // For threaded benchmarks only the current thread stops executing. If - // multiple threads report an error only the first error message is used. - // The current thread is no longer considered 'active' by - // 'PauseTiming()' and 'ResumingTiming()'. + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report an error only the + // first error message is used. // // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit // the current scope immediately. If the function is called from within @@ -351,10 +341,8 @@ public: // is used instead of automatically measured time if UseManualTime() was // specified. // - // For threaded benchmarks the SetIterationTime() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all threads have made their ith call. - // The time will be set by the last thread to call this function. + // For threaded benchmarks the final value will be set to the largest + // reported values. void SetIterationTime(double seconds); // Set the number of bytes processed by the current benchmark @@ -465,7 +453,16 @@ public: const int threads; const size_t max_iterations; -private: + // TODO make me private + State(size_t max_iters, const std::vector& ranges, int thread_i, + int n_threads, internal::ThreadTimer* timer, + internal::ThreadManager* manager); + + private: + void StartKeepRunning(); + void FinishKeepRunning(); + internal::ThreadTimer* timer_; + internal::ThreadManager* manager_; BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State); }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 144ff924..f78a1fd3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,9 +8,9 @@ endif() # Define the source files set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" - "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" - "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" - "sysinfo.cc" "walltime.cc" "complexity.cc") + "console_reporter.cc" "csv_reporter.cc" + "json_reporter.cc" "reporter.cc" "sleep.cc" + "string_util.cc" "sysinfo.cc" "complexity.cc" "timers.cc") # Add headers to the list of source files. cmake does not require this, # but IDEs such as Visual Studio need this to add the headers # to the generated project. @@ -19,8 +19,7 @@ list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h" "${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h" "colorprint.h" "commandlineflags.h" "complexity.h" "cycleclock.h" "internal_macros.h" "log.h" "mutex.h" - "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" - "walltime.h") + "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "timers.h") unset(_d) # Determine the correct regular expression engine to use diff --git a/src/benchmark.cc b/src/benchmark.cc index bfac28c2..f0f0a43d 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -41,7 +41,7 @@ #include "stat.h" #include "string_util.h" #include "sysinfo.h" -#include "walltime.h" +#include "timers.h" DEFINE_bool(benchmark_list_tests, false, "Print a list of benchmarks. This option overrides all other " @@ -92,20 +92,7 @@ namespace internal { void UseCharPointer(char const volatile*) {} -// NOTE: This is a dummy "mutex" type used to denote the actual mutex -// returned by GetBenchmarkLock(). This is only used to placate the thread -// safety warnings by giving the return of GetBenchmarkLock() a name. -struct CAPABILITY("mutex") BenchmarkLockType {}; -BenchmarkLockType BenchmarkLockVar; - -} // end namespace internal - -inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) -GetBenchmarkLock() -{ - static Mutex lock; - return lock; -} +} // end namespace internal namespace { @@ -120,148 +107,111 @@ static const int kRangeMultiplier = 8; static const size_t kMaxFamilySize = 100; static const size_t kMaxIterations = 1000000000; -bool running_benchmark = false; +} // end namespace -// Global variable so that a benchmark can cause a little extra printing -std::string* GetReportLabel() { - static std::string label GUARDED_BY(GetBenchmarkLock()); - return &label; -} +namespace internal { -// Global variable so that a benchmark can report an error as a human readable -// string. If error_message is null no error occurred. -#if defined(_MSC_VER) && _MSC_VER <= 1800 -typedef char* error_message_type; -#else -typedef const char* error_message_type; -#endif +// NOTE: This is a dummy "mutex" type used to denote the actual mutex +// returned by GetBenchmarkMutex(). This is only used to placate the thread +// safety warnings by giving the return of GetBenchmarkLock() a name. +struct CAPABILITY("mutex") BenchmarkLockType {}; +BenchmarkLockType BenchmarkLockVar; -static std::atomic error_message = ATOMIC_VAR_INIT(nullptr); +class ThreadManager { + public: + ThreadManager(int num_threads) + : alive_threads_(num_threads), start_stop_barrier_(num_threads) {} -// TODO(ericwf): support MallocCounter. -//static benchmark::MallocCounter *benchmark_mc; + Mutex& GetBenchmarkMutex() const + RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) { + return benchmark_mutex_; + } -struct ThreadStats { - ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {} - int64_t bytes_processed; - int64_t items_processed; - int complexity_n; + bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { + return start_stop_barrier_.wait(); + } + + void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { + start_stop_barrier_.removeThread(); + if (--alive_threads_ == 0) { + MutexLock lock(end_cond_mutex_); + end_condition_.notify_all(); + } + } + + void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { + MutexLock lock(end_cond_mutex_); + end_condition_.wait(lock.native_handle(), + [this]() { return alive_threads_ == 0; }); + } + + public: + GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; + GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; + GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; + GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; + + private: + mutable Mutex benchmark_mutex_; + std::atomic alive_threads_; + Barrier start_stop_barrier_; + Mutex end_cond_mutex_; + Condition end_condition_; }; // Timer management class -class TimerManager { +class ThreadTimer { public: - TimerManager(int num_threads, Notification* done) - : num_threads_(num_threads), - running_threads_(num_threads), - done_(done), - running_(false), + ThreadTimer() + : running_(false), real_time_used_(0), cpu_time_used_(0), - manual_time_used_(0), - num_finalized_(0), - phase_number_(0), - entered_(0) - { + manual_time_used_(0) {} + + // Called by each thread + void StartTimer() { + running_ = true; + start_real_time_ = ChronoClockNow(); + start_cpu_time_ = ThreadCPUUsage(); } // Called by each thread - void StartTimer() EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - CHECK(!running_) << "Called StartTimer when timer is already running"; - running_ = true; - start_real_time_ = walltime::Now(); - start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage(); - } - } - if (last_thread) { - phase_condition_.notify_all(); - } + void StopTimer() { + CHECK(running_); + running_ = false; + real_time_used_ += ChronoClockNow() - start_real_time_; + cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_; } // Called by each thread - void StopTimer() EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - CHECK(running_) << "Called StopTimer when timer is already stopped"; - InternalStop(); - } - } - if (last_thread) { - phase_condition_.notify_all(); - } - } + void SetIterationTime(double seconds) { manual_time_used_ += seconds; } - // Called by each thread - void SetIterationTime(double seconds) EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - manual_time_used_ += seconds; - } - } - if (last_thread) { - phase_condition_.notify_all(); - } - } - - // Called by each thread - void Finalize() EXCLUDES(lock_) { - MutexLock l(lock_); - num_finalized_++; - if (num_finalized_ == num_threads_) { - CHECK(!running_) << - "The timer should be stopped before the timer is finalized"; - done_->Notify(); - } - } - - void RemoveErroredThread() EXCLUDES(lock_) { - MutexLock ml(lock_); - int last_thread = --running_threads_ == 0; - if (last_thread && running_) - InternalStop(); - else if (!last_thread) - phase_condition_.notify_all(); - } + bool running() const { return running_; } // REQUIRES: timer is not running - double real_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double real_time_used() { CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running - double cpu_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double cpu_time_used() { CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running - double manual_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double manual_time_used() { CHECK(!running_); return manual_time_used_; } private: - Mutex lock_; - Condition phase_condition_; - int num_threads_; - int running_threads_; - Notification* done_; - bool running_; // Is the timer running double start_real_time_; // If running_ double start_cpu_time_; // If running_ @@ -272,52 +222,8 @@ class TimerManager { // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_; - // How many threads have called Finalize() - int num_finalized_; - - // State for barrier management - int phase_number_; - int entered_; // Number of threads that have entered this barrier - - void InternalStop() REQUIRES(lock_) { - CHECK(running_); - running_ = false; - real_time_used_ += walltime::Now() - start_real_time_; - cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage()) - - start_cpu_time_); - } - - // Enter the barrier and wait until all other threads have also - // entered the barrier. Returns iff this is the last thread to - // enter the barrier. - bool Barrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); - entered_++; - if (entered_ < running_threads_) { - // Wait for all threads to enter - int phase_number_cp = phase_number_; - auto cb = [this, phase_number_cp]() { - return this->phase_number_ > phase_number_cp || - entered_ == running_threads_; // A thread has aborted in error - }; - phase_condition_.wait(ml.native_handle(), cb); - if (phase_number_ > phase_number_cp) - return false; - // else (running_threads_ == entered_) and we are the last thread. - } - // Last thread has reached the barrier - phase_number_++; - entered_ = 0; - return true; - } }; -// TimerManager for current run. -static std::unique_ptr timer_manager = nullptr; - -} // end namespace - -namespace internal { enum ReportMode : unsigned { RM_Unspecified, // The mode has not been manually specified @@ -802,32 +708,33 @@ namespace { // Adds the stats collected for the thread into *total. void RunInThread(const benchmark::internal::Benchmark::Instance* b, size_t iters, int thread_id, - ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { - State st(iters, b->arg, thread_id, b->threads); + internal::ThreadManager* manager) { + internal::ThreadTimer timer; + State st(iters, b->arg, thread_id, b->threads, &timer, manager); b->benchmark->Run(st); CHECK(st.iterations() == st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { - MutexLock l(GetBenchmarkLock()); - total->bytes_processed += st.bytes_processed(); - total->items_processed += st.items_processed(); - total->complexity_n += st.complexity_length_n(); + MutexLock l(manager->GetBenchmarkMutex()); + manager->cpu_time_used += timer.cpu_time_used(); + manager->real_time_used += timer.real_time_used(); + manager->manual_time_used += timer.manual_time_used(); + manager->bytes_processed += st.bytes_processed(); + manager->items_processed += st.items_processed(); + manager->complexity_n += st.complexity_length_n(); } - - timer_manager->Finalize(); + manager->NotifyThreadComplete(); } -std::vector -RunBenchmark(const benchmark::internal::Benchmark::Instance& b, - std::vector* complexity_reports) - EXCLUDES(GetBenchmarkLock()) { +std::vector RunBenchmark( + const benchmark::internal::Benchmark::Instance& b, + std::vector* complexity_reports) { std::vector reports; // return value size_t iters = 1; - + const int num_threads = b.multithreaded ? b.threads : 1; std::vector pool; - if (b.multithreaded) - pool.resize(b.threads); + if (num_threads > 1) pool.resize(num_threads -1); const int repeats = b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; @@ -841,17 +748,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Try benchmark VLOG(2) << "Running " << b.name << " for " << iters << "\n"; - { - MutexLock l(GetBenchmarkLock()); - GetReportLabel()->clear(); - } - error_message = nullptr; - - Notification done; - timer_manager = std::unique_ptr(new TimerManager(b.threads, &done)); - - ThreadStats total; - running_benchmark = true; + internal::ThreadManager manager(num_threads); if (b.multithreaded) { // If this is out first iteration of the while(true) loop then the // threads haven't been started and can't be joined. Otherwise we need @@ -861,19 +758,17 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, thread.join(); } for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti), &total); + pool[ti] = std::thread(&RunInThread, &b, iters, + static_cast(ti + 1), &manager); } - } else { - // Run directly in this thread - RunInThread(&b, iters, 0, &total); } - done.WaitForNotification(); - running_benchmark = false; + RunInThread(&b, iters, 0, &manager); + manager.WaitForAllThreads(); + MutexLock l(manager.GetBenchmarkMutex()); - const double cpu_accumulated_time = timer_manager->cpu_time_used(); - const double real_accumulated_time = timer_manager->real_time_used(); - const double manual_accumulated_time = timer_manager->manual_time_used(); - timer_manager.reset(); + const double cpu_accumulated_time = manager.cpu_time_used; + const double real_accumulated_time = manager.real_time_used / num_threads; + const double manual_accumulated_time = manager.manual_time_used / num_threads; VLOG(2) << "Ran in " << cpu_accumulated_time << "/" << real_accumulated_time << "\n"; @@ -886,41 +781,30 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, seconds = real_accumulated_time; } - std::string label; - { - MutexLock l(GetBenchmarkLock()); - label = *GetReportLabel(); - } - error_message_type error_msg = error_message; - const double min_time = !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; - // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. - if ((i > 0) || (error_msg != nullptr) || - (iters >= kMaxIterations) || - (seconds >= min_time) || - (real_accumulated_time >= 5*min_time)) { - + if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) || + (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) { // Create report about this benchmark run. BenchmarkReporter::Run report; report.benchmark_name = b.name; - report.error_occurred = error_msg != nullptr; - report.error_message = error_msg != nullptr ? error_msg : ""; - report.report_label = label; + report.error_occurred = manager.has_error_; + report.error_message = manager.error_message_; + report.report_label = manager.report_label_; // Report the total iterations across all threads. report.iterations = static_cast(iters) * b.threads; report.time_unit = b.time_unit; if (!report.error_occurred) { double bytes_per_second = 0; - if (total.bytes_processed > 0 && seconds > 0.0) { - bytes_per_second = (total.bytes_processed / seconds); + if (manager.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (manager.bytes_processed / seconds); } double items_per_second = 0; - if (total.items_processed > 0 && seconds > 0.0) { - items_per_second = (total.items_processed / seconds); + if (manager.items_processed > 0 && seconds > 0.0) { + items_per_second = (manager.items_processed / seconds); } if (b.use_manual_time) { @@ -931,7 +815,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, report.cpu_accumulated_time = cpu_accumulated_time; report.bytes_per_second = bytes_per_second; report.items_per_second = items_per_second; - report.complexity_n = total.complexity_n; + report.complexity_n = manager.complexity_n; report.complexity = b.complexity; report.complexity_lambda = b.complexity_lambda; if(report.complexity != oNone) @@ -981,55 +865,77 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } // namespace -State::State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads) - : started_(false), finished_(false), total_iterations_(0), +State::State(size_t max_iters, const std::vector& ranges, int thread_i, + int n_threads, internal::ThreadTimer* timer, + internal::ThreadManager* manager) + : started_(false), + finished_(false), + total_iterations_(0), range_(ranges), - bytes_processed_(0), items_processed_(0), + bytes_processed_(0), + items_processed_(0), complexity_n_(0), error_occurred_(false), thread_index(thread_i), threads(n_threads), - max_iterations(max_iters) -{ - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + max_iterations(max_iters), + timer_(timer), + manager_(manager) { + CHECK(max_iterations != 0) << "At least one iteration must be run"; + CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; } void State::PauseTiming() { // Add in time accumulated so far - CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); - timer_manager->StopTimer(); + timer_->StopTimer(); } void State::ResumeTiming() { - CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); - timer_manager->StartTimer(); + timer_->StartTimer(); } void State::SkipWithError(const char* msg) { CHECK(msg); error_occurred_ = true; - error_message_type expected_no_error_msg = nullptr; - error_message.compare_exchange_weak(expected_no_error_msg, - const_cast(msg)); - started_ = finished_ = true; + { + MutexLock l(manager_->GetBenchmarkMutex()); + if (manager_->has_error_ == false) { + manager_->error_message_ = msg; + manager_->has_error_ = true; + } + } total_iterations_ = max_iterations; - timer_manager->RemoveErroredThread(); + if (timer_->running()) timer_->StopTimer(); } void State::SetIterationTime(double seconds) { - CHECK(running_benchmark); - timer_manager->SetIterationTime(seconds); + timer_->SetIterationTime(seconds); } void State::SetLabel(const char* label) { - CHECK(running_benchmark); - MutexLock l(GetBenchmarkLock()); - *GetReportLabel() = label; + MutexLock l(manager_->GetBenchmarkMutex()); + manager_->report_label_ = label; +} + +void State::StartKeepRunning() { + CHECK(!started_ && !finished_); + started_ = true; + manager_->StartStopBarrier(); + if (!error_occurred_) ResumeTiming(); +} + +void State::FinishKeepRunning() { + CHECK(started_ && (!finished_ || error_occurred_)); + if (!error_occurred_) { + PauseTiming(); + } + // Total iterations now is one greater than max iterations. Fix this. + total_iterations_ = max_iterations; + finished_ = true; + manager_->StartStopBarrier(); } namespace internal { @@ -1231,13 +1137,7 @@ int InitializeStreams() { void Initialize(int* argc, char** argv) { internal::ParseCommandLineFlags(argc, argv); - internal::SetLogLevel(FLAGS_v); - // TODO remove this. It prints some output the first time it is called. - // We don't want to have this ouput printed during benchmarking. - MyCPUUsage(); - // The first call to walltime::Now initialized it. Call it once to - // prevent the initialization from happening in a benchmark. - walltime::Now(); + internal::LogLevel() = FLAGS_v; } } // end namespace benchmark diff --git a/src/check.h b/src/check.h index 4572babb..a17799bf 100644 --- a/src/check.h +++ b/src/check.h @@ -33,9 +33,7 @@ public: << check << "' failed. "; } - std::ostream& GetLog() { - return log_; - } + LogType& GetLog() { return log_; } BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; @@ -46,7 +44,7 @@ public: CheckHandler(const CheckHandler&) = delete; CheckHandler() = delete; private: - std::ostream& log_; + LogType& log_; }; } // end namespace internal diff --git a/src/console_reporter.cc b/src/console_reporter.cc index a663b936..2a35b787 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -28,7 +28,7 @@ #include "commandlineflags.h" #include "internal_macros.h" #include "string_util.h" -#include "walltime.h" +#include "timers.h" namespace benchmark { diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index 7bc7ef3d..1c137f3f 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -23,7 +23,7 @@ #include #include "string_util.h" -#include "walltime.h" +#include "timers.h" // File format reference: http://edoceo.com/utilitas/csv-file-format. diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 485d3052..092b4254 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -23,7 +23,7 @@ #include #include "string_util.h" -#include "walltime.h" +#include "timers.h" namespace benchmark { diff --git a/src/log.cc b/src/log.cc deleted file mode 100644 index b660309d..00000000 --- a/src/log.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "log.h" - -#include - -namespace benchmark { -namespace internal { - -int& LoggingLevelImp() { - static int level = 0; - return level; -} - -void SetLogLevel(int value) { - LoggingLevelImp() = value; -} - -int GetLogLevel() { - return LoggingLevelImp(); -} - -class NullLogBuffer : public std::streambuf -{ -public: - int overflow(int c) { - return c; - } -}; - -std::ostream& GetNullLogInstance() { - static NullLogBuffer log_buff; - static std::ostream null_log(&log_buff); - return null_log; -} - -std::ostream& GetErrorLogInstance() { - return std::clog; -} - -} // end namespace internal -} // end namespace benchmark \ No newline at end of file diff --git a/src/log.h b/src/log.h index 3777810e..e965d361 100644 --- a/src/log.h +++ b/src/log.h @@ -1,19 +1,63 @@ #ifndef BENCHMARK_LOG_H_ #define BENCHMARK_LOG_H_ +#include #include +#include "benchmark/macros.h" + namespace benchmark { namespace internal { -int GetLogLevel(); -void SetLogLevel(int level); +typedef std::basic_ostream&(EndLType)(std::basic_ostream&); -std::ostream& GetNullLogInstance(); -std::ostream& GetErrorLogInstance(); +class LogType { + friend LogType& GetNullLogInstance(); + friend LogType& GetErrorLogInstance(); -inline std::ostream& GetLogInstanceForLevel(int level) { - if (level <= GetLogLevel()) { + // FIXME: Add locking to output. + template + friend LogType& operator<<(LogType&, Tp const&); + friend LogType& operator<<(LogType&, EndLType*); + + private: + LogType(std::ostream* out) : out_(out) {} + std::ostream* out_; + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); +}; + +template +LogType& operator<<(LogType& log, Tp const& value) { + if (log.out_) { + *log.out_ << value; + } + return log; +} + +inline LogType& operator<<(LogType& log, EndLType* m) { + if (log.out_) { + *log.out_ << m; + } + return log; +} + +inline int& LogLevel() { + static int log_level = 0; + return log_level; +} + +inline LogType& GetNullLogInstance() { + static LogType log(nullptr); + return log; +} + +inline LogType& GetErrorLogInstance() { + static LogType log(&std::clog); + return log; +} + +inline LogType& GetLogInstanceForLevel(int level) { + if (level <= LogLevel()) { return GetErrorLogInstance(); } return GetNullLogInstance(); diff --git a/src/mutex.h b/src/mutex.h index f37ec35b..a6cf7f80 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -4,6 +4,8 @@ #include #include +#include "check.h" + // Enable thread safety attributes only with clang. // The attributes can be safely erased when compiling with other compilers. #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) @@ -105,36 +107,58 @@ private: MutexLockImp ml_; }; +class Barrier { + public: + Barrier(int num_threads) : running_threads_(num_threads) {} -class Notification -{ -public: - Notification() : notified_yet_(false) { } - - void WaitForNotification() const EXCLUDES(mutex_) { - MutexLock m_lock(mutex_); - auto notified_fn = [this]() REQUIRES(mutex_) { - return this->HasBeenNotified(); - }; - cv_.wait(m_lock.native_handle(), notified_fn); - } - - void Notify() EXCLUDES(mutex_) { + // Called by each thread + bool wait() EXCLUDES(lock_) { + bool last_thread = false; { - MutexLock lock(mutex_); - notified_yet_ = 1; + MutexLock ml(lock_); + last_thread = createBarrier(ml); } - cv_.notify_all(); + if (last_thread) phase_condition_.notify_all(); + return last_thread; } -private: - bool HasBeenNotified() const REQUIRES(mutex_) { - return notified_yet_; + void removeThread() EXCLUDES(lock_) { + MutexLock ml(lock_); + --running_threads_; + if (entered_ != 0) phase_condition_.notify_all(); } - mutable Mutex mutex_; - mutable std::condition_variable cv_; - bool notified_yet_ GUARDED_BY(mutex_); + private: + Mutex lock_; + Condition phase_condition_; + int running_threads_; + + // State for barrier management + int phase_number_ = 0; + int entered_ = 0; // Number of threads that have entered this barrier + + // Enter the barrier and wait until all other threads have also + // entered the barrier. Returns iff this is the last thread to + // enter the barrier. + bool createBarrier(MutexLock& ml) REQUIRES(lock_) { + CHECK_LT(entered_, running_threads_); + entered_++; + if (entered_ < running_threads_) { + // Wait for all threads to enter + int phase_number_cp = phase_number_; + auto cb = [this, phase_number_cp]() { + return this->phase_number_ > phase_number_cp || + entered_ == running_threads_; // A thread has aborted in error + }; + phase_condition_.wait(ml.native_handle(), cb); + if (phase_number_ > phase_number_cp) return false; + // else (running_threads_ == entered_) and we are the last thread. + } + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; + } }; } // end namespace benchmark diff --git a/src/reporter.cc b/src/reporter.cc index 51878590..188723f9 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "benchmark/reporter.h" -#include "walltime.h" +#include "timers.h" #include diff --git a/src/sysinfo.cc b/src/sysinfo.cc index b5b9851f..483ea8a9 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -52,7 +52,6 @@ namespace { std::once_flag cpuinfo_init; double cpuinfo_cycles_per_second = 1.0; int cpuinfo_num_cpus = 1; // Conservative guess -std::mutex cputimens_mutex; #if !defined BENCHMARK_OS_MACOSX const int64_t estimate_time_ms = 1000; @@ -288,102 +287,9 @@ void InitializeSystemInfo() { cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); #endif } + } // end namespace -// getrusage() based implementation of MyCPUUsage -static double MyCPUUsageRUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) == 0) { - return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); - } else { - return 0.0; - } -#else - HANDLE proc = GetCurrentProcess(); - FILETIME creation_time; - FILETIME exit_time; - FILETIME kernel_time; - FILETIME user_time; - ULARGE_INTEGER kernel; - ULARGE_INTEGER user; - GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); - kernel.HighPart = kernel_time.dwHighDateTime; - kernel.LowPart = kernel_time.dwLowDateTime; - user.HighPart = user_time.dwHighDateTime; - user.LowPart = user_time.dwLowDateTime; - return (static_cast(kernel.QuadPart) + - static_cast(user.QuadPart)) * 1e-7; -#endif // OS_WINDOWS -} - -#ifndef BENCHMARK_OS_WINDOWS -static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { - static int cputime_fd = -1; - if (cputime_fd == -1) { - cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); - if (cputime_fd < 0) { - cputime_fd = -1; - return false; - } - } - char buff[64]; - memset(buff, 0, sizeof(buff)); - if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { - close(cputime_fd); - cputime_fd = -1; - return false; - } - unsigned long long result = strtoull(buff, nullptr, 0); - if (result == (std::numeric_limits::max)()) { - close(cputime_fd); - cputime_fd = -1; - return false; - } - *cputime = static_cast(result) / 1e9; - return true; -} -#endif // OS_WINDOWS - -double MyCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - { - std::lock_guard l(cputimens_mutex); - static bool use_cputime_ns = true; - if (use_cputime_ns) { - double value; - if (MyCPUUsageCPUTimeNsLocked(&value)) { - return value; - } - // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). - VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; - use_cputime_ns = false; - } - } -#endif // OS_WINDOWS - return MyCPUUsageRUsage(); -} - -double ChildrenCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { - return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); - } else { - return 0.0; - } -#else - // TODO: Not sure what this even means on Windows - return 0.0; -#endif // OS_WINDOWS -} - double CyclesPerSecond(void) { std::call_once(cpuinfo_init, InitializeSystemInfo); return cpuinfo_cycles_per_second; diff --git a/src/sysinfo.h b/src/sysinfo.h index eaf77e07..c5d9916d 100644 --- a/src/sysinfo.h +++ b/src/sysinfo.h @@ -2,8 +2,6 @@ #define BENCHMARK_SYSINFO_H_ namespace benchmark { -double MyCPUUsage(); -double ChildrenCPUUsage(); int NumCPUs(); double CyclesPerSecond(); bool CpuScalingEnabled(); diff --git a/src/timers.cc b/src/timers.cc new file mode 100644 index 00000000..95e6ceb5 --- /dev/null +++ b/src/timers.cc @@ -0,0 +1,195 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "timers.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#include +#include +#else +#include +#include +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX +#include +#endif +#if defined(BENCHMARK_OS_MACOSX) +#include +#include +#include +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { + +// Suppress unused warnings on helper functions. +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +namespace { +#if defined(BENCHMARK_OS_WINDOWS) +double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { + ULARGE_INTEGER kernel; + ULARGE_INTEGER user; + kernel.HighPart = kernel_time.dwHighDateTime; + kernel.LowPart = kernel_time.dwLowDateTime; + user.HighPart = user_time.dwHighDateTime; + user.LowPart = user_time.dwLowDateTime; + return (static_cast(kernel.QuadPart) + + static_cast(user.QuadPart)) * + 1e-7; +} +#else +double MakeTime(struct timespec const& ts) { + return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); +} +double MakeTime(struct rusage ru) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); +} +#endif +#if defined(BENCHMARK_OS_MACOSX) +double MakeTime(thread_basic_info_data_t const& info) { + return (static_cast(info.user_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6 + + static_cast(info.system_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6); +} +#endif + +BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) { + std::cerr << "ERROR: " << msg << std::endl; + std::exit(EXIT_FAILURE); +} + +} // end namespace + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +double ProcessCPUUsage() { +#if defined(BENCHMARK_OS_WINDOWS) + HANDLE proc = GetCurrentProcess(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time)) + return MakeTime(kernel_time, user_time); + DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(CLOCK_PROCESS_CPUTIME_ID) + struct timespec spec; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) + return MakeTime(spec); + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); +#else + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) + return MakeTime(ru); + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); +#endif +} + +double ThreadCPUUsage() { +#if defined(BENCHMARK_OS_WINDOWS) + HANDLE this_thread = GetCurrentThread(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, + &user_time); + return MakeTime(kernel_time, user_time); +#elif defined(CLOCK_THREAD_CPUTIME_ID) + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) + return MakeTime(ts); + DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); +#elif defined(BENCHMARK_OS_MACOSX) + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t info; + mach_port_t thread = pthread_mach_thread_np(pthread_self()); + if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count) + == KERN_SUCCESS) { + return MakeTime(info); + } + DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); +#else +#error Per-thread timing is not available on your system. +#endif +} + +namespace { + +std::string DateTimeString(bool local) { + typedef std::chrono::system_clock Clock; + std::time_t now = Clock::to_time_t(Clock::now()); + const std::size_t kStorageSize = 128; + char storage[kStorageSize]; + std::size_t written; + + if (local) { +#if defined(BENCHMARK_OS_WINDOWS) + written = + std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); +#else + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::localtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif + } else { +#if defined(BENCHMARK_OS_WINDOWS) + written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); +#else + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::gmtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif + } + CHECK(written < kStorageSize); + ((void)written); // prevent unused variable in optimized mode. + return std::string(storage); +} + +} // end namespace + +std::string LocalDateTimeString() { return DateTimeString(true); } + +} // end namespace benchmark diff --git a/src/timers.h b/src/timers.h new file mode 100644 index 00000000..65606ccd --- /dev/null +++ b/src/timers.h @@ -0,0 +1,48 @@ +#ifndef BENCHMARK_TIMERS_H +#define BENCHMARK_TIMERS_H + +#include +#include + +namespace benchmark { + +// Return the CPU usage of the current process +double ProcessCPUUsage(); + +// Return the CPU usage of the children of the current process +double ChildrenCPUUsage(); + +// Return the CPU usage of the current thread +double ThreadCPUUsage(); + +#if defined(HAVE_STEADY_CLOCK) +template +struct ChooseSteadyClock { + typedef std::chrono::high_resolution_clock type; +}; + +template <> +struct ChooseSteadyClock { + typedef std::chrono::steady_clock type; +}; +#endif + +struct ChooseClockType { +#if defined(HAVE_STEADY_CLOCK) + typedef ChooseSteadyClock<>::type type; +#else + typedef std::chrono::high_resolution_clock type; +#endif +}; + +inline double ChronoClockNow() { + typedef ChooseClockType::type ClockType; + using FpSeconds = std::chrono::duration; + return FpSeconds(ClockType::now().time_since_epoch()).count(); +} + +std::string LocalDateTimeString(); + +} // end namespace benchmark + +#endif // BENCHMARK_TIMERS_H diff --git a/src/walltime.cc b/src/walltime.cc deleted file mode 100644 index 4bdbaa59..00000000 --- a/src/walltime.cc +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/macros.h" -#include "internal_macros.h" -#include "walltime.h" - -#if defined(BENCHMARK_OS_WINDOWS) -#include -#include // for timeval -#else -#include -#endif - -#include -#include -#include -#include - -#include -#include -#include - -#include "arraysize.h" -#include "check.h" -#include "cycleclock.h" -#include "log.h" -#include "sysinfo.h" - -namespace benchmark { -namespace walltime { - -namespace { - -#if defined(HAVE_STEADY_CLOCK) -template -struct ChooseSteadyClock { - typedef std::chrono::high_resolution_clock type; -}; - -template <> -struct ChooseSteadyClock { - typedef std::chrono::steady_clock type; -}; -#endif - -struct ChooseClockType { -#if defined(HAVE_STEADY_CLOCK) - typedef ChooseSteadyClock<>::type type; -#else - typedef std::chrono::high_resolution_clock type; -#endif -}; - -class WallTimeImp -{ -public: - WallTime Now(); - - static WallTimeImp& GetWallTimeImp() { - static WallTimeImp* imp = new WallTimeImp(); - return *imp; - } - -private: - WallTimeImp(); - // Helper routines to load/store a float from an AtomicWord. Required because - // g++ < 4.7 doesn't support std::atomic correctly. I cannot wait to - // get rid of this horror show. - void SetDrift(float f) { - int32_t w; - memcpy(&w, &f, sizeof(f)); - std::atomic_store(&drift_adjust_, w); - } - - float GetDrift() const { - float f; - int32_t w = std::atomic_load(&drift_adjust_); - memcpy(&f, &w, sizeof(f)); - return f; - } - - WallTime Slow() const { - struct timeval tv; -#if defined(BENCHMARK_OS_WINDOWS) - FILETIME file_time; - SYSTEMTIME system_time; - ULARGE_INTEGER ularge; - const unsigned __int64 epoch = 116444736000000000LL; - - GetSystemTime(&system_time); - SystemTimeToFileTime(&system_time, &file_time); - ularge.LowPart = file_time.dwLowDateTime; - ularge.HighPart = file_time.dwHighDateTime; - - tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000)); - tv.tv_usec = (long)(system_time.wMilliseconds * 1000); -#else - gettimeofday(&tv, nullptr); -#endif - return tv.tv_sec + tv.tv_usec * 1e-6; - } - -private: - static_assert(sizeof(float) <= sizeof(int32_t), - "type sizes don't allow the drift_adjust hack"); - - WallTime base_walltime_; - int64_t base_cycletime_; - int64_t cycles_per_second_; - double seconds_per_cycle_; - uint32_t last_adjust_time_; - std::atomic drift_adjust_; - int64_t max_interval_cycles_; - - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp); -}; - - -WallTime WallTimeImp::Now() { - WallTime now = 0.0; - WallTime result = 0.0; - int64_t ct = 0; - uint32_t top_bits = 0; - do { - ct = cycleclock::Now(); - int64_t cycle_delta = ct - base_cycletime_; - result = base_walltime_ + cycle_delta * seconds_per_cycle_; - - top_bits = static_cast(uint64_t(ct) >> 32); - // Recompute drift no more often than every 2^32 cycles. - // I.e., @2GHz, ~ every two seconds - if (top_bits == last_adjust_time_) { // don't need to recompute drift - return result + GetDrift(); - } - - now = Slow(); - } while (cycleclock::Now() - ct > max_interval_cycles_); - // We are now sure that "now" and "result" were produced within - // kMaxErrorInterval of one another. - - SetDrift(static_cast(now - result)); - last_adjust_time_ = top_bits; - return now; -} - - -WallTimeImp::WallTimeImp() - : base_walltime_(0.0), base_cycletime_(0), - cycles_per_second_(0), seconds_per_cycle_(0.0), - last_adjust_time_(0), drift_adjust_(0), - max_interval_cycles_(0) { - const double kMaxErrorInterval = 100e-6; - cycles_per_second_ = static_cast(CyclesPerSecond()); - CHECK(cycles_per_second_ != 0); - seconds_per_cycle_ = 1.0 / cycles_per_second_; - max_interval_cycles_ = - static_cast(cycles_per_second_ * kMaxErrorInterval); - do { - base_cycletime_ = cycleclock::Now(); - base_walltime_ = Slow(); - } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_); - // We are now sure that "base_walltime" and "base_cycletime" were produced - // within kMaxErrorInterval of one another. - - SetDrift(0.0); - last_adjust_time_ = static_cast(uint64_t(base_cycletime_) >> 32); -} - -WallTime CPUWalltimeNow() { - static WallTimeImp& imp = WallTimeImp::GetWallTimeImp(); - return imp.Now(); -} - -WallTime ChronoWalltimeNow() { - typedef ChooseClockType::type Clock; - typedef std::chrono::duration - FPSeconds; - static_assert(std::chrono::treat_as_floating_point::value, - "This type must be treated as a floating point type."); - auto now = Clock::now().time_since_epoch(); - return std::chrono::duration_cast(now).count(); -} - -bool UseCpuCycleClock() { - bool useWallTime = !CpuScalingEnabled(); - if (useWallTime) { - VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n"; - } else { - VLOG(1) << "Using std::chrono to provide walltime::Now().\n"; - } - return useWallTime; -} - - -} // end anonymous namespace - -// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is -// enabled at the start of the program then std::chrono::system_clock is used -// instead. -WallTime Now() -{ - static bool useCPUClock = UseCpuCycleClock(); - if (useCPUClock) { - return CPUWalltimeNow(); - } else { - return ChronoWalltimeNow(); - } -} - -} // end namespace walltime - - -namespace { - -std::string DateTimeString(bool local) { - typedef std::chrono::system_clock Clock; - std::time_t now = Clock::to_time_t(Clock::now()); - char storage[128]; - std::size_t written; - - if (local) { -#if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); -#else - std::tm timeinfo; - std::memset(&timeinfo, 0, sizeof(std::tm)); - ::localtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); -#endif - } else { -#if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); -#else - std::tm timeinfo; - std::memset(&timeinfo, 0, sizeof(std::tm)); - ::gmtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); -#endif - } - CHECK(written < arraysize(storage)); - ((void)written); // prevent unused variable in optimized mode. - return std::string(storage); -} - -} // end namespace - -std::string LocalDateTimeString() { - return DateTimeString(true); -} - -} // end namespace benchmark diff --git a/src/walltime.h b/src/walltime.h deleted file mode 100644 index 38c26f33..00000000 --- a/src/walltime.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef BENCHMARK_WALLTIME_H_ -#define BENCHMARK_WALLTIME_H_ - -#include - -namespace benchmark { -typedef double WallTime; - -namespace walltime { -WallTime Now(); -} // end namespace walltime - -std::string LocalDateTimeString(); - -} // end namespace benchmark - -#endif // BENCHMARK_WALLTIME_H_ diff --git a/tools/gbench/report.py b/tools/gbench/report.py index 44fa4a56..ac69b9be 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -59,6 +59,10 @@ def calculate_change(old_val, new_val): """ Return a float representing the decimal change between old_val and new_val. """ + if old_val == 0 and new_val == 0: + return 0.0 + if old_val == 0: + return float(new_val - old_val) / (float(old_val + new_val) / 2) return float(new_val - old_val) / abs(old_val) @@ -73,7 +77,7 @@ def generate_difference_report(json1, json2, use_color=True): if b['name'] == name: return b return None - first_line = "{:<{}s} Time CPU".format( + first_line = "{:<{}s} Time CPU Old New".format( 'Benchmark', first_col_width) output_strs = [first_line, '-' * len(first_line)] for bn in json1['benchmarks']: @@ -88,12 +92,13 @@ def generate_difference_report(json1, json2, use_color=True): return BC_WHITE else: return BC_CYAN - fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}" + fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc} {:4d} {:4d}" tres = calculate_change(bn['real_time'], other_bench['real_time']) cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) output_strs += [color_format(use_color, fmt_str, BC_HEADER, bn['name'], first_col_width, get_color(tres), tres, get_color(cpures), cpures, + bn['cpu_time'], other_bench['cpu_time'], endc=BC_ENDC)] return output_strs