mirror of https://github.com/google/benchmark.git
Make `PauseTiming()` and `ResumeTiming()` per thread. (#286)
* Change to using per-thread timers * fix bad assertions * fix copy paste error on windows * Fix thread safety annotations * Make null-log thread safe * remove remaining globals * use chrono for walltime since it is thread safe * consolidate timer functions * Add missing ctime include * Rename to be consistent with Google style * Format patch using clang-format * cleanup -Wthread-safety configuration * Don't trust _POSIX_FEATURE macros because OS X lies. * Fix OS X thread timings * attempt to fix mingw build * Attempt to make mingw work again * Revert old mingw workaround * improve diagnostics * Drastically improve OS X measurements * Use average real time instead of max
This commit is contained in:
parent
94c2a30a3e
commit
cba945e37d
|
@ -87,8 +87,7 @@ else()
|
|||
add_cxx_compiler_flag(-Wstrict-aliasing)
|
||||
endif()
|
||||
add_cxx_compiler_flag(-Wthread-safety)
|
||||
if (HAVE_WTHREAD_SAFETY)
|
||||
add_definitions(-DHAVE_WTHREAD_SAFETY)
|
||||
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
|
||||
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
|
||||
endif()
|
||||
|
||||
|
@ -152,7 +151,6 @@ cxx_feature_check(STD_REGEX)
|
|||
cxx_feature_check(GNU_POSIX_REGEX)
|
||||
cxx_feature_check(POSIX_REGEX)
|
||||
cxx_feature_check(STEADY_CLOCK)
|
||||
|
||||
# Ensure we have pthreads
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
|
|
|
@ -270,31 +270,25 @@ enum BigO {
|
|||
// computational complexity for the benchmark.
|
||||
typedef double(BigOFunc)(int);
|
||||
|
||||
namespace internal {
|
||||
class ThreadTimer;
|
||||
class ThreadManager;
|
||||
}
|
||||
|
||||
// State is passed to a running Benchmark and contains state for the
|
||||
// benchmark to use.
|
||||
class State {
|
||||
public:
|
||||
State(size_t max_iters, const std::vector<int>& ranges,
|
||||
int thread_i, int n_threads);
|
||||
|
||||
// Returns true if the benchmark should continue through another iteration.
|
||||
// NOTE: A benchmark may not return from the test until KeepRunning() has
|
||||
// returned false.
|
||||
bool KeepRunning() {
|
||||
if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
|
||||
assert(!finished_);
|
||||
started_ = true;
|
||||
ResumeTiming();
|
||||
StartKeepRunning();
|
||||
}
|
||||
bool const res = total_iterations_++ < max_iterations;
|
||||
if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
|
||||
assert(started_ && (!finished_ || error_occurred_));
|
||||
if (!error_occurred_) {
|
||||
PauseTiming();
|
||||
}
|
||||
// Total iterations now is one greater than max iterations. Fix this.
|
||||
total_iterations_ = max_iterations;
|
||||
finished_ = true;
|
||||
FinishKeepRunning();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
@ -304,10 +298,11 @@ public:
|
|||
// Stop the benchmark timer. If not called, the timer will be
|
||||
// automatically stopped after KeepRunning() returns false for the first time.
|
||||
//
|
||||
// For threaded benchmarks the PauseTiming() function acts
|
||||
// like a barrier. I.e., the ith call by a particular thread to this
|
||||
// function will block until all active threads have made their ith call.
|
||||
// The timer will stop when the last thread has called this function.
|
||||
// For threaded benchmarks the PauseTiming() function only pauses the timing
|
||||
// for the current thread.
|
||||
//
|
||||
// NOTE: The "real time" measurement is per-thread. If different threads
|
||||
// report different measurements the largest one is reported.
|
||||
//
|
||||
// NOTE: PauseTiming()/ResumeTiming() are relatively
|
||||
// heavyweight, and so their use should generally be avoided
|
||||
|
@ -319,11 +314,6 @@ public:
|
|||
// Start the benchmark timer. The timer is NOT running on entrance to the
|
||||
// benchmark function. It begins running after the first call to KeepRunning()
|
||||
//
|
||||
// For threaded benchmarks the ResumeTiming() function acts
|
||||
// like a barrier. I.e., the ith call by a particular thread to this
|
||||
// function will block until all active threads have made their ith call.
|
||||
// The timer will start when the last thread has called this function.
|
||||
//
|
||||
// NOTE: PauseTiming()/ResumeTiming() are relatively
|
||||
// heavyweight, and so their use should generally be avoided
|
||||
// within each benchmark iteration, if possible.
|
||||
|
@ -335,10 +325,10 @@ public:
|
|||
// thread and report an error with the specified 'msg'. After this call
|
||||
// the user may explicitly 'return' from the benchmark.
|
||||
//
|
||||
// For threaded benchmarks only the current thread stops executing. If
|
||||
// multiple threads report an error only the first error message is used.
|
||||
// The current thread is no longer considered 'active' by
|
||||
// 'PauseTiming()' and 'ResumingTiming()'.
|
||||
// For threaded benchmarks only the current thread stops executing and future
|
||||
// calls to `KeepRunning()` will block until all threads have completed
|
||||
// the `KeepRunning()` loop. If multiple threads report an error only the
|
||||
// first error message is used.
|
||||
//
|
||||
// NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
|
||||
// the current scope immediately. If the function is called from within
|
||||
|
@ -351,10 +341,8 @@ public:
|
|||
// is used instead of automatically measured time if UseManualTime() was
|
||||
// specified.
|
||||
//
|
||||
// For threaded benchmarks the SetIterationTime() function acts
|
||||
// like a barrier. I.e., the ith call by a particular thread to this
|
||||
// function will block until all threads have made their ith call.
|
||||
// The time will be set by the last thread to call this function.
|
||||
// For threaded benchmarks the final value will be set to the largest
|
||||
// reported values.
|
||||
void SetIterationTime(double seconds);
|
||||
|
||||
// Set the number of bytes processed by the current benchmark
|
||||
|
@ -465,7 +453,16 @@ public:
|
|||
const int threads;
|
||||
const size_t max_iterations;
|
||||
|
||||
private:
|
||||
// TODO make me private
|
||||
State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
|
||||
int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager);
|
||||
|
||||
private:
|
||||
void StartKeepRunning();
|
||||
void FinishKeepRunning();
|
||||
internal::ThreadTimer* timer_;
|
||||
internal::ThreadManager* manager_;
|
||||
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
|
||||
};
|
||||
|
||||
|
|
|
@ -8,9 +8,9 @@ endif()
|
|||
|
||||
# Define the source files
|
||||
set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
|
||||
"console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
|
||||
"log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
|
||||
"sysinfo.cc" "walltime.cc" "complexity.cc")
|
||||
"console_reporter.cc" "csv_reporter.cc"
|
||||
"json_reporter.cc" "reporter.cc" "sleep.cc"
|
||||
"string_util.cc" "sysinfo.cc" "complexity.cc" "timers.cc")
|
||||
# Add headers to the list of source files. cmake does not require this,
|
||||
# but IDEs such as Visual Studio need this to add the headers
|
||||
# to the generated project.
|
||||
|
@ -19,8 +19,7 @@ list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h"
|
|||
"${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h"
|
||||
"colorprint.h" "commandlineflags.h" "complexity.h"
|
||||
"cycleclock.h" "internal_macros.h" "log.h" "mutex.h"
|
||||
"re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h"
|
||||
"walltime.h")
|
||||
"re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "timers.h")
|
||||
unset(_d)
|
||||
|
||||
# Determine the correct regular expression engine to use
|
||||
|
|
404
src/benchmark.cc
404
src/benchmark.cc
|
@ -41,7 +41,7 @@
|
|||
#include "stat.h"
|
||||
#include "string_util.h"
|
||||
#include "sysinfo.h"
|
||||
#include "walltime.h"
|
||||
#include "timers.h"
|
||||
|
||||
DEFINE_bool(benchmark_list_tests, false,
|
||||
"Print a list of benchmarks. This option overrides all other "
|
||||
|
@ -92,20 +92,7 @@ namespace internal {
|
|||
|
||||
void UseCharPointer(char const volatile*) {}
|
||||
|
||||
// NOTE: This is a dummy "mutex" type used to denote the actual mutex
|
||||
// returned by GetBenchmarkLock(). This is only used to placate the thread
|
||||
// safety warnings by giving the return of GetBenchmarkLock() a name.
|
||||
struct CAPABILITY("mutex") BenchmarkLockType {};
|
||||
BenchmarkLockType BenchmarkLockVar;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)
|
||||
GetBenchmarkLock()
|
||||
{
|
||||
static Mutex lock;
|
||||
return lock;
|
||||
}
|
||||
} // end namespace internal
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -120,148 +107,111 @@ static const int kRangeMultiplier = 8;
|
|||
static const size_t kMaxFamilySize = 100;
|
||||
static const size_t kMaxIterations = 1000000000;
|
||||
|
||||
bool running_benchmark = false;
|
||||
} // end namespace
|
||||
|
||||
// Global variable so that a benchmark can cause a little extra printing
|
||||
std::string* GetReportLabel() {
|
||||
static std::string label GUARDED_BY(GetBenchmarkLock());
|
||||
return &label;
|
||||
}
|
||||
namespace internal {
|
||||
|
||||
// Global variable so that a benchmark can report an error as a human readable
|
||||
// string. If error_message is null no error occurred.
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1800
|
||||
typedef char* error_message_type;
|
||||
#else
|
||||
typedef const char* error_message_type;
|
||||
#endif
|
||||
// NOTE: This is a dummy "mutex" type used to denote the actual mutex
|
||||
// returned by GetBenchmarkMutex(). This is only used to placate the thread
|
||||
// safety warnings by giving the return of GetBenchmarkLock() a name.
|
||||
struct CAPABILITY("mutex") BenchmarkLockType {};
|
||||
BenchmarkLockType BenchmarkLockVar;
|
||||
|
||||
static std::atomic<error_message_type> error_message = ATOMIC_VAR_INIT(nullptr);
|
||||
class ThreadManager {
|
||||
public:
|
||||
ThreadManager(int num_threads)
|
||||
: alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
|
||||
|
||||
// TODO(ericwf): support MallocCounter.
|
||||
//static benchmark::MallocCounter *benchmark_mc;
|
||||
Mutex& GetBenchmarkMutex() const
|
||||
RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) {
|
||||
return benchmark_mutex_;
|
||||
}
|
||||
|
||||
struct ThreadStats {
|
||||
ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {}
|
||||
int64_t bytes_processed;
|
||||
int64_t items_processed;
|
||||
int complexity_n;
|
||||
bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
|
||||
return start_stop_barrier_.wait();
|
||||
}
|
||||
|
||||
void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
|
||||
start_stop_barrier_.removeThread();
|
||||
if (--alive_threads_ == 0) {
|
||||
MutexLock lock(end_cond_mutex_);
|
||||
end_condition_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
|
||||
MutexLock lock(end_cond_mutex_);
|
||||
end_condition_.wait(lock.native_handle(),
|
||||
[this]() { return alive_threads_ == 0; });
|
||||
}
|
||||
|
||||
public:
|
||||
GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0;
|
||||
GUARDED_BY(GetBenchmarkMutex()) std::string report_label_;
|
||||
GUARDED_BY(GetBenchmarkMutex()) std::string error_message_;
|
||||
GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false;
|
||||
|
||||
private:
|
||||
mutable Mutex benchmark_mutex_;
|
||||
std::atomic<int> alive_threads_;
|
||||
Barrier start_stop_barrier_;
|
||||
Mutex end_cond_mutex_;
|
||||
Condition end_condition_;
|
||||
};
|
||||
|
||||
// Timer management class
|
||||
class TimerManager {
|
||||
class ThreadTimer {
|
||||
public:
|
||||
TimerManager(int num_threads, Notification* done)
|
||||
: num_threads_(num_threads),
|
||||
running_threads_(num_threads),
|
||||
done_(done),
|
||||
running_(false),
|
||||
ThreadTimer()
|
||||
: running_(false),
|
||||
real_time_used_(0),
|
||||
cpu_time_used_(0),
|
||||
manual_time_used_(0),
|
||||
num_finalized_(0),
|
||||
phase_number_(0),
|
||||
entered_(0)
|
||||
{
|
||||
manual_time_used_(0) {}
|
||||
|
||||
// Called by each thread
|
||||
void StartTimer() {
|
||||
running_ = true;
|
||||
start_real_time_ = ChronoClockNow();
|
||||
start_cpu_time_ = ThreadCPUUsage();
|
||||
}
|
||||
|
||||
// Called by each thread
|
||||
void StartTimer() EXCLUDES(lock_) {
|
||||
bool last_thread = false;
|
||||
{
|
||||
MutexLock ml(lock_);
|
||||
last_thread = Barrier(ml);
|
||||
if (last_thread) {
|
||||
CHECK(!running_) << "Called StartTimer when timer is already running";
|
||||
running_ = true;
|
||||
start_real_time_ = walltime::Now();
|
||||
start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage();
|
||||
}
|
||||
}
|
||||
if (last_thread) {
|
||||
phase_condition_.notify_all();
|
||||
}
|
||||
void StopTimer() {
|
||||
CHECK(running_);
|
||||
running_ = false;
|
||||
real_time_used_ += ChronoClockNow() - start_real_time_;
|
||||
cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
|
||||
}
|
||||
|
||||
// Called by each thread
|
||||
void StopTimer() EXCLUDES(lock_) {
|
||||
bool last_thread = false;
|
||||
{
|
||||
MutexLock ml(lock_);
|
||||
last_thread = Barrier(ml);
|
||||
if (last_thread) {
|
||||
CHECK(running_) << "Called StopTimer when timer is already stopped";
|
||||
InternalStop();
|
||||
}
|
||||
}
|
||||
if (last_thread) {
|
||||
phase_condition_.notify_all();
|
||||
}
|
||||
}
|
||||
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
|
||||
|
||||
// Called by each thread
|
||||
void SetIterationTime(double seconds) EXCLUDES(lock_) {
|
||||
bool last_thread = false;
|
||||
{
|
||||
MutexLock ml(lock_);
|
||||
last_thread = Barrier(ml);
|
||||
if (last_thread) {
|
||||
manual_time_used_ += seconds;
|
||||
}
|
||||
}
|
||||
if (last_thread) {
|
||||
phase_condition_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
// Called by each thread
|
||||
void Finalize() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
num_finalized_++;
|
||||
if (num_finalized_ == num_threads_) {
|
||||
CHECK(!running_) <<
|
||||
"The timer should be stopped before the timer is finalized";
|
||||
done_->Notify();
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveErroredThread() EXCLUDES(lock_) {
|
||||
MutexLock ml(lock_);
|
||||
int last_thread = --running_threads_ == 0;
|
||||
if (last_thread && running_)
|
||||
InternalStop();
|
||||
else if (!last_thread)
|
||||
phase_condition_.notify_all();
|
||||
}
|
||||
bool running() const { return running_; }
|
||||
|
||||
// REQUIRES: timer is not running
|
||||
double real_time_used() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
double real_time_used() {
|
||||
CHECK(!running_);
|
||||
return real_time_used_;
|
||||
}
|
||||
|
||||
// REQUIRES: timer is not running
|
||||
double cpu_time_used() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
double cpu_time_used() {
|
||||
CHECK(!running_);
|
||||
return cpu_time_used_;
|
||||
}
|
||||
|
||||
// REQUIRES: timer is not running
|
||||
double manual_time_used() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
double manual_time_used() {
|
||||
CHECK(!running_);
|
||||
return manual_time_used_;
|
||||
}
|
||||
|
||||
private:
|
||||
Mutex lock_;
|
||||
Condition phase_condition_;
|
||||
int num_threads_;
|
||||
int running_threads_;
|
||||
Notification* done_;
|
||||
|
||||
bool running_; // Is the timer running
|
||||
double start_real_time_; // If running_
|
||||
double start_cpu_time_; // If running_
|
||||
|
@ -272,52 +222,8 @@ class TimerManager {
|
|||
// Manually set iteration time. User sets this with SetIterationTime(seconds).
|
||||
double manual_time_used_;
|
||||
|
||||
// How many threads have called Finalize()
|
||||
int num_finalized_;
|
||||
|
||||
// State for barrier management
|
||||
int phase_number_;
|
||||
int entered_; // Number of threads that have entered this barrier
|
||||
|
||||
void InternalStop() REQUIRES(lock_) {
|
||||
CHECK(running_);
|
||||
running_ = false;
|
||||
real_time_used_ += walltime::Now() - start_real_time_;
|
||||
cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage())
|
||||
- start_cpu_time_);
|
||||
}
|
||||
|
||||
// Enter the barrier and wait until all other threads have also
|
||||
// entered the barrier. Returns iff this is the last thread to
|
||||
// enter the barrier.
|
||||
bool Barrier(MutexLock& ml) REQUIRES(lock_) {
|
||||
CHECK_LT(entered_, running_threads_);
|
||||
entered_++;
|
||||
if (entered_ < running_threads_) {
|
||||
// Wait for all threads to enter
|
||||
int phase_number_cp = phase_number_;
|
||||
auto cb = [this, phase_number_cp]() {
|
||||
return this->phase_number_ > phase_number_cp ||
|
||||
entered_ == running_threads_; // A thread has aborted in error
|
||||
};
|
||||
phase_condition_.wait(ml.native_handle(), cb);
|
||||
if (phase_number_ > phase_number_cp)
|
||||
return false;
|
||||
// else (running_threads_ == entered_) and we are the last thread.
|
||||
}
|
||||
// Last thread has reached the barrier
|
||||
phase_number_++;
|
||||
entered_ = 0;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// TimerManager for current run.
|
||||
static std::unique_ptr<TimerManager> timer_manager = nullptr;
|
||||
|
||||
} // end namespace
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum ReportMode : unsigned {
|
||||
RM_Unspecified, // The mode has not been manually specified
|
||||
|
@ -802,32 +708,33 @@ namespace {
|
|||
// Adds the stats collected for the thread into *total.
|
||||
void RunInThread(const benchmark::internal::Benchmark::Instance* b,
|
||||
size_t iters, int thread_id,
|
||||
ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
|
||||
State st(iters, b->arg, thread_id, b->threads);
|
||||
internal::ThreadManager* manager) {
|
||||
internal::ThreadTimer timer;
|
||||
State st(iters, b->arg, thread_id, b->threads, &timer, manager);
|
||||
b->benchmark->Run(st);
|
||||
CHECK(st.iterations() == st.max_iterations) <<
|
||||
"Benchmark returned before State::KeepRunning() returned false!";
|
||||
{
|
||||
MutexLock l(GetBenchmarkLock());
|
||||
total->bytes_processed += st.bytes_processed();
|
||||
total->items_processed += st.items_processed();
|
||||
total->complexity_n += st.complexity_length_n();
|
||||
MutexLock l(manager->GetBenchmarkMutex());
|
||||
manager->cpu_time_used += timer.cpu_time_used();
|
||||
manager->real_time_used += timer.real_time_used();
|
||||
manager->manual_time_used += timer.manual_time_used();
|
||||
manager->bytes_processed += st.bytes_processed();
|
||||
manager->items_processed += st.items_processed();
|
||||
manager->complexity_n += st.complexity_length_n();
|
||||
}
|
||||
|
||||
timer_manager->Finalize();
|
||||
manager->NotifyThreadComplete();
|
||||
}
|
||||
|
||||
std::vector<BenchmarkReporter::Run>
|
||||
RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
||||
std::vector<BenchmarkReporter::Run>* complexity_reports)
|
||||
EXCLUDES(GetBenchmarkLock()) {
|
||||
std::vector<BenchmarkReporter::Run> RunBenchmark(
|
||||
const benchmark::internal::Benchmark::Instance& b,
|
||||
std::vector<BenchmarkReporter::Run>* complexity_reports) {
|
||||
std::vector<BenchmarkReporter::Run> reports; // return value
|
||||
|
||||
size_t iters = 1;
|
||||
|
||||
const int num_threads = b.multithreaded ? b.threads : 1;
|
||||
std::vector<std::thread> pool;
|
||||
if (b.multithreaded)
|
||||
pool.resize(b.threads);
|
||||
if (num_threads > 1) pool.resize(num_threads -1);
|
||||
|
||||
const int repeats = b.repetitions != 0 ? b.repetitions
|
||||
: FLAGS_benchmark_repetitions;
|
||||
|
@ -841,17 +748,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||
// Try benchmark
|
||||
VLOG(2) << "Running " << b.name << " for " << iters << "\n";
|
||||
|
||||
{
|
||||
MutexLock l(GetBenchmarkLock());
|
||||
GetReportLabel()->clear();
|
||||
}
|
||||
error_message = nullptr;
|
||||
|
||||
Notification done;
|
||||
timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
|
||||
|
||||
ThreadStats total;
|
||||
running_benchmark = true;
|
||||
internal::ThreadManager manager(num_threads);
|
||||
if (b.multithreaded) {
|
||||
// If this is out first iteration of the while(true) loop then the
|
||||
// threads haven't been started and can't be joined. Otherwise we need
|
||||
|
@ -861,19 +758,17 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||
thread.join();
|
||||
}
|
||||
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
||||
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti), &total);
|
||||
pool[ti] = std::thread(&RunInThread, &b, iters,
|
||||
static_cast<int>(ti + 1), &manager);
|
||||
}
|
||||
} else {
|
||||
// Run directly in this thread
|
||||
RunInThread(&b, iters, 0, &total);
|
||||
}
|
||||
done.WaitForNotification();
|
||||
running_benchmark = false;
|
||||
RunInThread(&b, iters, 0, &manager);
|
||||
manager.WaitForAllThreads();
|
||||
MutexLock l(manager.GetBenchmarkMutex());
|
||||
|
||||
const double cpu_accumulated_time = timer_manager->cpu_time_used();
|
||||
const double real_accumulated_time = timer_manager->real_time_used();
|
||||
const double manual_accumulated_time = timer_manager->manual_time_used();
|
||||
timer_manager.reset();
|
||||
const double cpu_accumulated_time = manager.cpu_time_used;
|
||||
const double real_accumulated_time = manager.real_time_used / num_threads;
|
||||
const double manual_accumulated_time = manager.manual_time_used / num_threads;
|
||||
|
||||
VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
|
||||
<< real_accumulated_time << "\n";
|
||||
|
@ -886,41 +781,30 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||
seconds = real_accumulated_time;
|
||||
}
|
||||
|
||||
std::string label;
|
||||
{
|
||||
MutexLock l(GetBenchmarkLock());
|
||||
label = *GetReportLabel();
|
||||
}
|
||||
error_message_type error_msg = error_message;
|
||||
|
||||
const double min_time = !IsZero(b.min_time) ? b.min_time
|
||||
: FLAGS_benchmark_min_time;
|
||||
|
||||
// If this was the first run, was elapsed time or cpu time large enough?
|
||||
// If this is not the first run, go with the current value of iter.
|
||||
if ((i > 0) || (error_msg != nullptr) ||
|
||||
(iters >= kMaxIterations) ||
|
||||
(seconds >= min_time) ||
|
||||
(real_accumulated_time >= 5*min_time)) {
|
||||
|
||||
if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) ||
|
||||
(seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) {
|
||||
// Create report about this benchmark run.
|
||||
BenchmarkReporter::Run report;
|
||||
report.benchmark_name = b.name;
|
||||
report.error_occurred = error_msg != nullptr;
|
||||
report.error_message = error_msg != nullptr ? error_msg : "";
|
||||
report.report_label = label;
|
||||
report.error_occurred = manager.has_error_;
|
||||
report.error_message = manager.error_message_;
|
||||
report.report_label = manager.report_label_;
|
||||
// Report the total iterations across all threads.
|
||||
report.iterations = static_cast<int64_t>(iters) * b.threads;
|
||||
report.time_unit = b.time_unit;
|
||||
|
||||
if (!report.error_occurred) {
|
||||
double bytes_per_second = 0;
|
||||
if (total.bytes_processed > 0 && seconds > 0.0) {
|
||||
bytes_per_second = (total.bytes_processed / seconds);
|
||||
if (manager.bytes_processed > 0 && seconds > 0.0) {
|
||||
bytes_per_second = (manager.bytes_processed / seconds);
|
||||
}
|
||||
double items_per_second = 0;
|
||||
if (total.items_processed > 0 && seconds > 0.0) {
|
||||
items_per_second = (total.items_processed / seconds);
|
||||
if (manager.items_processed > 0 && seconds > 0.0) {
|
||||
items_per_second = (manager.items_processed / seconds);
|
||||
}
|
||||
|
||||
if (b.use_manual_time) {
|
||||
|
@ -931,7 +815,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||
report.cpu_accumulated_time = cpu_accumulated_time;
|
||||
report.bytes_per_second = bytes_per_second;
|
||||
report.items_per_second = items_per_second;
|
||||
report.complexity_n = total.complexity_n;
|
||||
report.complexity_n = manager.complexity_n;
|
||||
report.complexity = b.complexity;
|
||||
report.complexity_lambda = b.complexity_lambda;
|
||||
if(report.complexity != oNone)
|
||||
|
@ -981,55 +865,77 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||
|
||||
} // namespace
|
||||
|
||||
State::State(size_t max_iters, const std::vector<int>& ranges,
|
||||
int thread_i, int n_threads)
|
||||
: started_(false), finished_(false), total_iterations_(0),
|
||||
State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
|
||||
int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager)
|
||||
: started_(false),
|
||||
finished_(false),
|
||||
total_iterations_(0),
|
||||
range_(ranges),
|
||||
bytes_processed_(0), items_processed_(0),
|
||||
bytes_processed_(0),
|
||||
items_processed_(0),
|
||||
complexity_n_(0),
|
||||
error_occurred_(false),
|
||||
thread_index(thread_i),
|
||||
threads(n_threads),
|
||||
max_iterations(max_iters)
|
||||
{
|
||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||
max_iterations(max_iters),
|
||||
timer_(timer),
|
||||
manager_(manager) {
|
||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||
}
|
||||
|
||||
void State::PauseTiming() {
|
||||
// Add in time accumulated so far
|
||||
CHECK(running_benchmark);
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
timer_manager->StopTimer();
|
||||
timer_->StopTimer();
|
||||
}
|
||||
|
||||
void State::ResumeTiming() {
|
||||
CHECK(running_benchmark);
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
timer_manager->StartTimer();
|
||||
timer_->StartTimer();
|
||||
}
|
||||
|
||||
void State::SkipWithError(const char* msg) {
|
||||
CHECK(msg);
|
||||
error_occurred_ = true;
|
||||
error_message_type expected_no_error_msg = nullptr;
|
||||
error_message.compare_exchange_weak(expected_no_error_msg,
|
||||
const_cast<error_message_type>(msg));
|
||||
started_ = finished_ = true;
|
||||
{
|
||||
MutexLock l(manager_->GetBenchmarkMutex());
|
||||
if (manager_->has_error_ == false) {
|
||||
manager_->error_message_ = msg;
|
||||
manager_->has_error_ = true;
|
||||
}
|
||||
}
|
||||
total_iterations_ = max_iterations;
|
||||
timer_manager->RemoveErroredThread();
|
||||
if (timer_->running()) timer_->StopTimer();
|
||||
}
|
||||
|
||||
void State::SetIterationTime(double seconds)
|
||||
{
|
||||
CHECK(running_benchmark);
|
||||
timer_manager->SetIterationTime(seconds);
|
||||
timer_->SetIterationTime(seconds);
|
||||
}
|
||||
|
||||
void State::SetLabel(const char* label) {
|
||||
CHECK(running_benchmark);
|
||||
MutexLock l(GetBenchmarkLock());
|
||||
*GetReportLabel() = label;
|
||||
MutexLock l(manager_->GetBenchmarkMutex());
|
||||
manager_->report_label_ = label;
|
||||
}
|
||||
|
||||
void State::StartKeepRunning() {
|
||||
CHECK(!started_ && !finished_);
|
||||
started_ = true;
|
||||
manager_->StartStopBarrier();
|
||||
if (!error_occurred_) ResumeTiming();
|
||||
}
|
||||
|
||||
void State::FinishKeepRunning() {
|
||||
CHECK(started_ && (!finished_ || error_occurred_));
|
||||
if (!error_occurred_) {
|
||||
PauseTiming();
|
||||
}
|
||||
// Total iterations now is one greater than max iterations. Fix this.
|
||||
total_iterations_ = max_iterations;
|
||||
finished_ = true;
|
||||
manager_->StartStopBarrier();
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
@ -1231,13 +1137,7 @@ int InitializeStreams() {
|
|||
|
||||
void Initialize(int* argc, char** argv) {
|
||||
internal::ParseCommandLineFlags(argc, argv);
|
||||
internal::SetLogLevel(FLAGS_v);
|
||||
// TODO remove this. It prints some output the first time it is called.
|
||||
// We don't want to have this ouput printed during benchmarking.
|
||||
MyCPUUsage();
|
||||
// The first call to walltime::Now initialized it. Call it once to
|
||||
// prevent the initialization from happening in a benchmark.
|
||||
walltime::Now();
|
||||
internal::LogLevel() = FLAGS_v;
|
||||
}
|
||||
|
||||
} // end namespace benchmark
|
||||
|
|
|
@ -33,9 +33,7 @@ public:
|
|||
<< check << "' failed. ";
|
||||
}
|
||||
|
||||
std::ostream& GetLog() {
|
||||
return log_;
|
||||
}
|
||||
LogType& GetLog() { return log_; }
|
||||
|
||||
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
|
||||
log_ << std::endl;
|
||||
|
@ -46,7 +44,7 @@ public:
|
|||
CheckHandler(const CheckHandler&) = delete;
|
||||
CheckHandler() = delete;
|
||||
private:
|
||||
std::ostream& log_;
|
||||
LogType& log_;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "commandlineflags.h"
|
||||
#include "internal_macros.h"
|
||||
#include "string_util.h"
|
||||
#include "walltime.h"
|
||||
#include "timers.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "string_util.h"
|
||||
#include "walltime.h"
|
||||
#include "timers.h"
|
||||
|
||||
// File format reference: http://edoceo.com/utilitas/csv-file-format.
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "string_util.h"
|
||||
#include "walltime.h"
|
||||
#include "timers.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
|
|
40
src/log.cc
40
src/log.cc
|
@ -1,40 +0,0 @@
|
|||
#include "log.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
int& LoggingLevelImp() {
|
||||
static int level = 0;
|
||||
return level;
|
||||
}
|
||||
|
||||
void SetLogLevel(int value) {
|
||||
LoggingLevelImp() = value;
|
||||
}
|
||||
|
||||
int GetLogLevel() {
|
||||
return LoggingLevelImp();
|
||||
}
|
||||
|
||||
class NullLogBuffer : public std::streambuf
|
||||
{
|
||||
public:
|
||||
int overflow(int c) {
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& GetNullLogInstance() {
|
||||
static NullLogBuffer log_buff;
|
||||
static std::ostream null_log(&log_buff);
|
||||
return null_log;
|
||||
}
|
||||
|
||||
std::ostream& GetErrorLogInstance() {
|
||||
return std::clog;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace benchmark
|
56
src/log.h
56
src/log.h
|
@ -1,19 +1,63 @@
|
|||
#ifndef BENCHMARK_LOG_H_
|
||||
#define BENCHMARK_LOG_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
|
||||
#include "benchmark/macros.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
int GetLogLevel();
|
||||
void SetLogLevel(int level);
|
||||
typedef std::basic_ostream<char>&(EndLType)(std::basic_ostream<char>&);
|
||||
|
||||
std::ostream& GetNullLogInstance();
|
||||
std::ostream& GetErrorLogInstance();
|
||||
class LogType {
|
||||
friend LogType& GetNullLogInstance();
|
||||
friend LogType& GetErrorLogInstance();
|
||||
|
||||
inline std::ostream& GetLogInstanceForLevel(int level) {
|
||||
if (level <= GetLogLevel()) {
|
||||
// FIXME: Add locking to output.
|
||||
template <class Tp>
|
||||
friend LogType& operator<<(LogType&, Tp const&);
|
||||
friend LogType& operator<<(LogType&, EndLType*);
|
||||
|
||||
private:
|
||||
LogType(std::ostream* out) : out_(out) {}
|
||||
std::ostream* out_;
|
||||
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
|
||||
};
|
||||
|
||||
template <class Tp>
|
||||
LogType& operator<<(LogType& log, Tp const& value) {
|
||||
if (log.out_) {
|
||||
*log.out_ << value;
|
||||
}
|
||||
return log;
|
||||
}
|
||||
|
||||
inline LogType& operator<<(LogType& log, EndLType* m) {
|
||||
if (log.out_) {
|
||||
*log.out_ << m;
|
||||
}
|
||||
return log;
|
||||
}
|
||||
|
||||
inline int& LogLevel() {
|
||||
static int log_level = 0;
|
||||
return log_level;
|
||||
}
|
||||
|
||||
inline LogType& GetNullLogInstance() {
|
||||
static LogType log(nullptr);
|
||||
return log;
|
||||
}
|
||||
|
||||
inline LogType& GetErrorLogInstance() {
|
||||
static LogType log(&std::clog);
|
||||
return log;
|
||||
}
|
||||
|
||||
inline LogType& GetLogInstanceForLevel(int level) {
|
||||
if (level <= LogLevel()) {
|
||||
return GetErrorLogInstance();
|
||||
}
|
||||
return GetNullLogInstance();
|
||||
|
|
70
src/mutex.h
70
src/mutex.h
|
@ -4,6 +4,8 @@
|
|||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
#include "check.h"
|
||||
|
||||
// Enable thread safety attributes only with clang.
|
||||
// The attributes can be safely erased when compiling with other compilers.
|
||||
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
|
||||
|
@ -105,36 +107,58 @@ private:
|
|||
MutexLockImp ml_;
|
||||
};
|
||||
|
||||
class Barrier {
|
||||
public:
|
||||
Barrier(int num_threads) : running_threads_(num_threads) {}
|
||||
|
||||
class Notification
|
||||
{
|
||||
public:
|
||||
Notification() : notified_yet_(false) { }
|
||||
|
||||
void WaitForNotification() const EXCLUDES(mutex_) {
|
||||
MutexLock m_lock(mutex_);
|
||||
auto notified_fn = [this]() REQUIRES(mutex_) {
|
||||
return this->HasBeenNotified();
|
||||
};
|
||||
cv_.wait(m_lock.native_handle(), notified_fn);
|
||||
}
|
||||
|
||||
void Notify() EXCLUDES(mutex_) {
|
||||
// Called by each thread
|
||||
bool wait() EXCLUDES(lock_) {
|
||||
bool last_thread = false;
|
||||
{
|
||||
MutexLock lock(mutex_);
|
||||
notified_yet_ = 1;
|
||||
MutexLock ml(lock_);
|
||||
last_thread = createBarrier(ml);
|
||||
}
|
||||
cv_.notify_all();
|
||||
if (last_thread) phase_condition_.notify_all();
|
||||
return last_thread;
|
||||
}
|
||||
|
||||
private:
|
||||
bool HasBeenNotified() const REQUIRES(mutex_) {
|
||||
return notified_yet_;
|
||||
void removeThread() EXCLUDES(lock_) {
|
||||
MutexLock ml(lock_);
|
||||
--running_threads_;
|
||||
if (entered_ != 0) phase_condition_.notify_all();
|
||||
}
|
||||
|
||||
mutable Mutex mutex_;
|
||||
mutable std::condition_variable cv_;
|
||||
bool notified_yet_ GUARDED_BY(mutex_);
|
||||
private:
|
||||
Mutex lock_;
|
||||
Condition phase_condition_;
|
||||
int running_threads_;
|
||||
|
||||
// State for barrier management
|
||||
int phase_number_ = 0;
|
||||
int entered_ = 0; // Number of threads that have entered this barrier
|
||||
|
||||
// Enter the barrier and wait until all other threads have also
|
||||
// entered the barrier. Returns iff this is the last thread to
|
||||
// enter the barrier.
|
||||
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
|
||||
CHECK_LT(entered_, running_threads_);
|
||||
entered_++;
|
||||
if (entered_ < running_threads_) {
|
||||
// Wait for all threads to enter
|
||||
int phase_number_cp = phase_number_;
|
||||
auto cb = [this, phase_number_cp]() {
|
||||
return this->phase_number_ > phase_number_cp ||
|
||||
entered_ == running_threads_; // A thread has aborted in error
|
||||
};
|
||||
phase_condition_.wait(ml.native_handle(), cb);
|
||||
if (phase_number_ > phase_number_cp) return false;
|
||||
// else (running_threads_ == entered_) and we are the last thread.
|
||||
}
|
||||
// Last thread has reached the barrier
|
||||
phase_number_++;
|
||||
entered_ = 0;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace benchmark
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "benchmark/reporter.h"
|
||||
#include "walltime.h"
|
||||
#include "timers.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
|
|
|
@ -52,7 +52,6 @@ namespace {
|
|||
std::once_flag cpuinfo_init;
|
||||
double cpuinfo_cycles_per_second = 1.0;
|
||||
int cpuinfo_num_cpus = 1; // Conservative guess
|
||||
std::mutex cputimens_mutex;
|
||||
|
||||
#if !defined BENCHMARK_OS_MACOSX
|
||||
const int64_t estimate_time_ms = 1000;
|
||||
|
@ -288,102 +287,9 @@ void InitializeSystemInfo() {
|
|||
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
// getrusage() based implementation of MyCPUUsage
|
||||
static double MyCPUUsageRUsage() {
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_SELF, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
#else
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
FILETIME creation_time;
|
||||
FILETIME exit_time;
|
||||
FILETIME kernel_time;
|
||||
FILETIME user_time;
|
||||
ULARGE_INTEGER kernel;
|
||||
ULARGE_INTEGER user;
|
||||
GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
|
||||
kernel.HighPart = kernel_time.dwHighDateTime;
|
||||
kernel.LowPart = kernel_time.dwLowDateTime;
|
||||
user.HighPart = user_time.dwHighDateTime;
|
||||
user.LowPart = user_time.dwLowDateTime;
|
||||
return (static_cast<double>(kernel.QuadPart) +
|
||||
static_cast<double>(user.QuadPart)) * 1e-7;
|
||||
#endif // OS_WINDOWS
|
||||
}
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
|
||||
static int cputime_fd = -1;
|
||||
if (cputime_fd == -1) {
|
||||
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
|
||||
if (cputime_fd < 0) {
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
char buff[64];
|
||||
memset(buff, 0, sizeof(buff));
|
||||
if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
|
||||
close(cputime_fd);
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
unsigned long long result = strtoull(buff, nullptr, 0);
|
||||
if (result == (std::numeric_limits<unsigned long long>::max)()) {
|
||||
close(cputime_fd);
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
*cputime = static_cast<double>(result) / 1e9;
|
||||
return true;
|
||||
}
|
||||
#endif // OS_WINDOWS
|
||||
|
||||
double MyCPUUsage() {
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
{
|
||||
std::lock_guard<std::mutex> l(cputimens_mutex);
|
||||
static bool use_cputime_ns = true;
|
||||
if (use_cputime_ns) {
|
||||
double value;
|
||||
if (MyCPUUsageCPUTimeNsLocked(&value)) {
|
||||
return value;
|
||||
}
|
||||
// Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
|
||||
VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
|
||||
use_cputime_ns = false;
|
||||
}
|
||||
}
|
||||
#endif // OS_WINDOWS
|
||||
return MyCPUUsageRUsage();
|
||||
}
|
||||
|
||||
double ChildrenCPUUsage() {
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
#else
|
||||
// TODO: Not sure what this even means on Windows
|
||||
return 0.0;
|
||||
#endif // OS_WINDOWS
|
||||
}
|
||||
|
||||
double CyclesPerSecond(void) {
|
||||
std::call_once(cpuinfo_init, InitializeSystemInfo);
|
||||
return cpuinfo_cycles_per_second;
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
#define BENCHMARK_SYSINFO_H_
|
||||
|
||||
namespace benchmark {
|
||||
double MyCPUUsage();
|
||||
double ChildrenCPUUsage();
|
||||
int NumCPUs();
|
||||
double CyclesPerSecond();
|
||||
bool CpuScalingEnabled();
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "timers.h"
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
#include <Shlwapi.h>
|
||||
#include <VersionHelpers.h>
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
|
||||
#include <unistd.h>
|
||||
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#if defined(BENCHMARK_OS_MACOSX)
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/mach_port.h>
|
||||
#include <mach/thread_act.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
|
||||
#include "check.h"
|
||||
#include "log.h"
|
||||
#include "sleep.h"
|
||||
#include "string_util.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
// Suppress unused warnings on helper functions.
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
|
||||
ULARGE_INTEGER kernel;
|
||||
ULARGE_INTEGER user;
|
||||
kernel.HighPart = kernel_time.dwHighDateTime;
|
||||
kernel.LowPart = kernel_time.dwLowDateTime;
|
||||
user.HighPart = user_time.dwHighDateTime;
|
||||
user.LowPart = user_time.dwLowDateTime;
|
||||
return (static_cast<double>(kernel.QuadPart) +
|
||||
static_cast<double>(user.QuadPart)) *
|
||||
1e-7;
|
||||
}
|
||||
#else
|
||||
double MakeTime(struct timespec const& ts) {
|
||||
return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
|
||||
}
|
||||
double MakeTime(struct rusage ru) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
|
||||
}
|
||||
#endif
|
||||
#if defined(BENCHMARK_OS_MACOSX)
|
||||
double MakeTime(thread_basic_info_data_t const& info) {
|
||||
return (static_cast<double>(info.user_time.seconds) +
|
||||
static_cast<double>(info.user_time.microseconds) * 1e-6 +
|
||||
static_cast<double>(info.system_time.seconds) +
|
||||
static_cast<double>(info.user_time.microseconds) * 1e-6);
|
||||
}
|
||||
#endif
|
||||
|
||||
BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
|
||||
std::cerr << "ERROR: " << msg << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
double ProcessCPUUsage() {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
FILETIME creation_time;
|
||||
FILETIME exit_time;
|
||||
FILETIME kernel_time;
|
||||
FILETIME user_time;
|
||||
if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time))
|
||||
return MakeTime(kernel_time, user_time);
|
||||
DiagnoseAndExit("GetProccessTimes() failed");
|
||||
#elif defined(CLOCK_PROCESS_CPUTIME_ID)
|
||||
struct timespec spec;
|
||||
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
|
||||
return MakeTime(spec);
|
||||
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
|
||||
#else
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_SELF, &ru) == 0)
|
||||
return MakeTime(ru);
|
||||
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
|
||||
#endif
|
||||
}
|
||||
|
||||
double ThreadCPUUsage() {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
HANDLE this_thread = GetCurrentThread();
|
||||
FILETIME creation_time;
|
||||
FILETIME exit_time;
|
||||
FILETIME kernel_time;
|
||||
FILETIME user_time;
|
||||
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
|
||||
&user_time);
|
||||
return MakeTime(kernel_time, user_time);
|
||||
#elif defined(CLOCK_THREAD_CPUTIME_ID)
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0)
|
||||
return MakeTime(ts);
|
||||
DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
|
||||
#elif defined(BENCHMARK_OS_MACOSX)
|
||||
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
|
||||
thread_basic_info_data_t info;
|
||||
mach_port_t thread = pthread_mach_thread_np(pthread_self());
|
||||
if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count)
|
||||
== KERN_SUCCESS) {
|
||||
return MakeTime(info);
|
||||
}
|
||||
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
|
||||
#else
|
||||
#error Per-thread timing is not available on your system.
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::string DateTimeString(bool local) {
|
||||
typedef std::chrono::system_clock Clock;
|
||||
std::time_t now = Clock::to_time_t(Clock::now());
|
||||
const std::size_t kStorageSize = 128;
|
||||
char storage[kStorageSize];
|
||||
std::size_t written;
|
||||
|
||||
if (local) {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
written =
|
||||
std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
|
||||
#else
|
||||
std::tm timeinfo;
|
||||
std::memset(&timeinfo, 0, sizeof(std::tm));
|
||||
::localtime_r(&now, &timeinfo);
|
||||
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
|
||||
#endif
|
||||
} else {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
|
||||
#else
|
||||
std::tm timeinfo;
|
||||
std::memset(&timeinfo, 0, sizeof(std::tm));
|
||||
::gmtime_r(&now, &timeinfo);
|
||||
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
|
||||
#endif
|
||||
}
|
||||
CHECK(written < kStorageSize);
|
||||
((void)written); // prevent unused variable in optimized mode.
|
||||
return std::string(storage);
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
std::string LocalDateTimeString() { return DateTimeString(true); }
|
||||
|
||||
} // end namespace benchmark
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef BENCHMARK_TIMERS_H
|
||||
#define BENCHMARK_TIMERS_H
|
||||
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
// Return the CPU usage of the current process
|
||||
double ProcessCPUUsage();
|
||||
|
||||
// Return the CPU usage of the children of the current process
|
||||
double ChildrenCPUUsage();
|
||||
|
||||
// Return the CPU usage of the current thread
|
||||
double ThreadCPUUsage();
|
||||
|
||||
#if defined(HAVE_STEADY_CLOCK)
|
||||
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
|
||||
struct ChooseSteadyClock {
|
||||
typedef std::chrono::high_resolution_clock type;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ChooseSteadyClock<false> {
|
||||
typedef std::chrono::steady_clock type;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct ChooseClockType {
|
||||
#if defined(HAVE_STEADY_CLOCK)
|
||||
typedef ChooseSteadyClock<>::type type;
|
||||
#else
|
||||
typedef std::chrono::high_resolution_clock type;
|
||||
#endif
|
||||
};
|
||||
|
||||
inline double ChronoClockNow() {
|
||||
typedef ChooseClockType::type ClockType;
|
||||
using FpSeconds = std::chrono::duration<double, std::chrono::seconds::period>;
|
||||
return FpSeconds(ClockType::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
std::string LocalDateTimeString();
|
||||
|
||||
} // end namespace benchmark
|
||||
|
||||
#endif // BENCHMARK_TIMERS_H
|
263
src/walltime.cc
263
src/walltime.cc
|
@ -1,263 +0,0 @@
|
|||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark/macros.h"
|
||||
#include "internal_macros.h"
|
||||
#include "walltime.h"
|
||||
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
#include <time.h>
|
||||
#include <winsock.h> // for timeval
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
|
||||
#include "arraysize.h"
|
||||
#include "check.h"
|
||||
#include "cycleclock.h"
|
||||
#include "log.h"
|
||||
#include "sysinfo.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace walltime {
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(HAVE_STEADY_CLOCK)
|
||||
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
|
||||
struct ChooseSteadyClock {
|
||||
typedef std::chrono::high_resolution_clock type;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ChooseSteadyClock<false> {
|
||||
typedef std::chrono::steady_clock type;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct ChooseClockType {
|
||||
#if defined(HAVE_STEADY_CLOCK)
|
||||
typedef ChooseSteadyClock<>::type type;
|
||||
#else
|
||||
typedef std::chrono::high_resolution_clock type;
|
||||
#endif
|
||||
};
|
||||
|
||||
class WallTimeImp
|
||||
{
|
||||
public:
|
||||
WallTime Now();
|
||||
|
||||
static WallTimeImp& GetWallTimeImp() {
|
||||
static WallTimeImp* imp = new WallTimeImp();
|
||||
return *imp;
|
||||
}
|
||||
|
||||
private:
|
||||
WallTimeImp();
|
||||
// Helper routines to load/store a float from an AtomicWord. Required because
|
||||
// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
|
||||
// get rid of this horror show.
|
||||
void SetDrift(float f) {
|
||||
int32_t w;
|
||||
memcpy(&w, &f, sizeof(f));
|
||||
std::atomic_store(&drift_adjust_, w);
|
||||
}
|
||||
|
||||
float GetDrift() const {
|
||||
float f;
|
||||
int32_t w = std::atomic_load(&drift_adjust_);
|
||||
memcpy(&f, &w, sizeof(f));
|
||||
return f;
|
||||
}
|
||||
|
||||
WallTime Slow() const {
|
||||
struct timeval tv;
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
FILETIME file_time;
|
||||
SYSTEMTIME system_time;
|
||||
ULARGE_INTEGER ularge;
|
||||
const unsigned __int64 epoch = 116444736000000000LL;
|
||||
|
||||
GetSystemTime(&system_time);
|
||||
SystemTimeToFileTime(&system_time, &file_time);
|
||||
ularge.LowPart = file_time.dwLowDateTime;
|
||||
ularge.HighPart = file_time.dwHighDateTime;
|
||||
|
||||
tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
|
||||
tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
|
||||
#else
|
||||
gettimeofday(&tv, nullptr);
|
||||
#endif
|
||||
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||
}
|
||||
|
||||
private:
|
||||
static_assert(sizeof(float) <= sizeof(int32_t),
|
||||
"type sizes don't allow the drift_adjust hack");
|
||||
|
||||
WallTime base_walltime_;
|
||||
int64_t base_cycletime_;
|
||||
int64_t cycles_per_second_;
|
||||
double seconds_per_cycle_;
|
||||
uint32_t last_adjust_time_;
|
||||
std::atomic<int32_t> drift_adjust_;
|
||||
int64_t max_interval_cycles_;
|
||||
|
||||
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
|
||||
};
|
||||
|
||||
|
||||
WallTime WallTimeImp::Now() {
|
||||
WallTime now = 0.0;
|
||||
WallTime result = 0.0;
|
||||
int64_t ct = 0;
|
||||
uint32_t top_bits = 0;
|
||||
do {
|
||||
ct = cycleclock::Now();
|
||||
int64_t cycle_delta = ct - base_cycletime_;
|
||||
result = base_walltime_ + cycle_delta * seconds_per_cycle_;
|
||||
|
||||
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
|
||||
// Recompute drift no more often than every 2^32 cycles.
|
||||
// I.e., @2GHz, ~ every two seconds
|
||||
if (top_bits == last_adjust_time_) { // don't need to recompute drift
|
||||
return result + GetDrift();
|
||||
}
|
||||
|
||||
now = Slow();
|
||||
} while (cycleclock::Now() - ct > max_interval_cycles_);
|
||||
// We are now sure that "now" and "result" were produced within
|
||||
// kMaxErrorInterval of one another.
|
||||
|
||||
SetDrift(static_cast<float>(now - result));
|
||||
last_adjust_time_ = top_bits;
|
||||
return now;
|
||||
}
|
||||
|
||||
|
||||
WallTimeImp::WallTimeImp()
|
||||
: base_walltime_(0.0), base_cycletime_(0),
|
||||
cycles_per_second_(0), seconds_per_cycle_(0.0),
|
||||
last_adjust_time_(0), drift_adjust_(0),
|
||||
max_interval_cycles_(0) {
|
||||
const double kMaxErrorInterval = 100e-6;
|
||||
cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
|
||||
CHECK(cycles_per_second_ != 0);
|
||||
seconds_per_cycle_ = 1.0 / cycles_per_second_;
|
||||
max_interval_cycles_ =
|
||||
static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
|
||||
do {
|
||||
base_cycletime_ = cycleclock::Now();
|
||||
base_walltime_ = Slow();
|
||||
} while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
|
||||
// We are now sure that "base_walltime" and "base_cycletime" were produced
|
||||
// within kMaxErrorInterval of one another.
|
||||
|
||||
SetDrift(0.0);
|
||||
last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
|
||||
}
|
||||
|
||||
WallTime CPUWalltimeNow() {
|
||||
static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
|
||||
return imp.Now();
|
||||
}
|
||||
|
||||
WallTime ChronoWalltimeNow() {
|
||||
typedef ChooseClockType::type Clock;
|
||||
typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
|
||||
FPSeconds;
|
||||
static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
|
||||
"This type must be treated as a floating point type.");
|
||||
auto now = Clock::now().time_since_epoch();
|
||||
return std::chrono::duration_cast<FPSeconds>(now).count();
|
||||
}
|
||||
|
||||
bool UseCpuCycleClock() {
|
||||
bool useWallTime = !CpuScalingEnabled();
|
||||
if (useWallTime) {
|
||||
VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
|
||||
} else {
|
||||
VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
|
||||
}
|
||||
return useWallTime;
|
||||
}
|
||||
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
|
||||
// enabled at the start of the program then std::chrono::system_clock is used
|
||||
// instead.
|
||||
WallTime Now()
|
||||
{
|
||||
static bool useCPUClock = UseCpuCycleClock();
|
||||
if (useCPUClock) {
|
||||
return CPUWalltimeNow();
|
||||
} else {
|
||||
return ChronoWalltimeNow();
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace walltime
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
std::string DateTimeString(bool local) {
|
||||
typedef std::chrono::system_clock Clock;
|
||||
std::time_t now = Clock::to_time_t(Clock::now());
|
||||
char storage[128];
|
||||
std::size_t written;
|
||||
|
||||
if (local) {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
|
||||
#else
|
||||
std::tm timeinfo;
|
||||
std::memset(&timeinfo, 0, sizeof(std::tm));
|
||||
::localtime_r(&now, &timeinfo);
|
||||
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
|
||||
#endif
|
||||
} else {
|
||||
#if defined(BENCHMARK_OS_WINDOWS)
|
||||
written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
|
||||
#else
|
||||
std::tm timeinfo;
|
||||
std::memset(&timeinfo, 0, sizeof(std::tm));
|
||||
::gmtime_r(&now, &timeinfo);
|
||||
written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
|
||||
#endif
|
||||
}
|
||||
CHECK(written < arraysize(storage));
|
||||
((void)written); // prevent unused variable in optimized mode.
|
||||
return std::string(storage);
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
std::string LocalDateTimeString() {
|
||||
return DateTimeString(true);
|
||||
}
|
||||
|
||||
} // end namespace benchmark
|
|
@ -1,17 +0,0 @@
|
|||
#ifndef BENCHMARK_WALLTIME_H_
|
||||
#define BENCHMARK_WALLTIME_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace benchmark {
|
||||
typedef double WallTime;
|
||||
|
||||
namespace walltime {
|
||||
WallTime Now();
|
||||
} // end namespace walltime
|
||||
|
||||
std::string LocalDateTimeString();
|
||||
|
||||
} // end namespace benchmark
|
||||
|
||||
#endif // BENCHMARK_WALLTIME_H_
|
|
@ -59,6 +59,10 @@ def calculate_change(old_val, new_val):
|
|||
"""
|
||||
Return a float representing the decimal change between old_val and new_val.
|
||||
"""
|
||||
if old_val == 0 and new_val == 0:
|
||||
return 0.0
|
||||
if old_val == 0:
|
||||
return float(new_val - old_val) / (float(old_val + new_val) / 2)
|
||||
return float(new_val - old_val) / abs(old_val)
|
||||
|
||||
|
||||
|
@ -73,7 +77,7 @@ def generate_difference_report(json1, json2, use_color=True):
|
|||
if b['name'] == name:
|
||||
return b
|
||||
return None
|
||||
first_line = "{:<{}s} Time CPU".format(
|
||||
first_line = "{:<{}s} Time CPU Old New".format(
|
||||
'Benchmark', first_col_width)
|
||||
output_strs = [first_line, '-' * len(first_line)]
|
||||
for bn in json1['benchmarks']:
|
||||
|
@ -88,12 +92,13 @@ def generate_difference_report(json1, json2, use_color=True):
|
|||
return BC_WHITE
|
||||
else:
|
||||
return BC_CYAN
|
||||
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}"
|
||||
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc} {:4d} {:4d}"
|
||||
tres = calculate_change(bn['real_time'], other_bench['real_time'])
|
||||
cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
|
||||
output_strs += [color_format(use_color, fmt_str,
|
||||
BC_HEADER, bn['name'], first_col_width,
|
||||
get_color(tres), tres, get_color(cpures), cpures,
|
||||
bn['cpu_time'], other_bench['cpu_time'],
|
||||
endc=BC_ENDC)]
|
||||
return output_strs
|
||||
|
||||
|
|
Loading…
Reference in New Issue