Merging in work from branch

This commit is contained in:
Dominic Hamon 2013-12-20 14:39:09 -08:00
commit d44b01a9cc
8 changed files with 55 additions and 170 deletions

View File

@ -233,7 +233,6 @@ class State {
bool MaybeStop();
void NewInterval();
bool AllStarting();
bool RunAnotherInterval() const;
void Run();

View File

@ -30,11 +30,9 @@ DEFINE_string(benchmark_filter, ".",
"If this flag is the string \"all\", all benchmarks linked "
"into the process are run.");
DEFINE_int32(benchmark_min_iters, 100,
"Minimum number of iterations per benchmark");
DEFINE_int32(benchmark_max_iters, 1000000000,
"Maximum number of iterations per benchmark");
DEFINE_int32(benchmark_iterations, 0,
"Total number of iterations per benchmark. 0 means the benchmarks "
"are time-based.");
DEFINE_double(benchmark_min_time, 0.5,
"Minimum number of seconds we should run benchmark before "
@ -70,15 +68,13 @@ DECLARE_string(heap_check);
: NULL )
namespace benchmark {
namespace {
// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
static const char kBigSIUnits[] = "kMGTPEZY";
const char kBigSIUnits[] = "kMGTPEZY";
// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
static const char kBigIECUnits[] = "KMGTPEZY";
const char kBigIECUnits[] = "KMGTPEZY";
// milli, micro, nano, pico, femto, atto, zepto, yocto.
static const char kSmallSIUnits[] = "munpfazy";
const char kSmallSIUnits[] = "munpfazy";
// We require that all three arrays have the same size.
static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
@ -274,8 +270,9 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
// Accumulators.
Stat1_d real_accumulated_time_stat;
Stat1_d cpu_accumulated_time_stat;
Stat1_d bytes_per_second_stat;
Stat1_d items_per_second_stat;
Stat1_d bytes_per_second_stat;
Stat1_d iterations_stat;
Stat1MinMax_d max_heapbytes_used_stat;
int total_iters = 0;
@ -283,20 +280,20 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
for (std::vector<BenchmarkRunData>::const_iterator it = reports.begin();
it != reports.end(); ++it) {
CHECK_EQ(reports[0].benchmark_name, it->benchmark_name);
total_iters += it->iterations;
real_accumulated_time_stat +=
Stat1_d(it->real_accumulated_time/it->iterations, it->iterations);
cpu_accumulated_time_stat +=
Stat1_d(it->cpu_accumulated_time/it->iterations, it->iterations);
items_per_second_stat += Stat1_d(it->items_per_second, it->iterations);
bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations);
iterations_stat += Stat1_d(it->iterations, it->iterations);
max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used,
it->iterations);
}
// Get the data from the accumulator to BenchmarkRunData's.
mean_data->benchmark_name = reports[0].benchmark_name + "_mean";
mean_data->iterations = total_iters;
mean_data->iterations = iterations_stat.Mean();
mean_data->real_accumulated_time = real_accumulated_time_stat.Sum();
mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Sum();
mean_data->bytes_per_second = bytes_per_second_stat.Mean();
@ -314,7 +311,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev";
stddev_data->report_label = mean_data->report_label;
stddev_data->iterations = total_iters;
stddev_data->iterations = iterations_stat.StdDev();
// We multiply by total_iters since PrintRunData expects a total time.
stddev_data->real_accumulated_time =
real_accumulated_time_stat.StdDev() * total_iters;
@ -428,11 +425,10 @@ void UseRealTime() {
void PrintUsageAndExit() {
fprintf(stdout, "benchmark [--benchmark_filter=<regex>]\n"
// TODO " [--benchmark_min_iters=<min_iters>]\n"
// TODO " [--benchmark_max_iters=<max_iters>]\n"
// TODO " [--benchmark_min_time=<min_time>]\n"
" [--benchmark_iterations=<iterations>]\n"
" [--benchmark_min_time=<min_time>]\n"
// " [--benchmark_memory_usage]\n"
// TODO " [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--color_print={true|false}]\n"
" [--v=<verbosity>]\n");
exit(0);
@ -442,11 +438,8 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
for (int i = 1; i < *argc; ++i) {
if (ParseStringFlag(argv[i], "benchmark_filter",
&FLAGS_benchmark_filter) ||
/* TODO(dominic)
ParseInt32Flag(argv[i], "benchmark_min_iters",
&FLAGS_benchmark_min_iters) ||
ParseInt32Flag(argv[i], "benchmark_max_iters",
&FLAGS_benchmark_max_iters) ||
ParseInt32Flag(argv[i], "benchmark_iterations",
&FLAGS_benchmark_iterations) ||
ParseDoubleFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
// TODO(dominic)
@ -454,7 +447,6 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
// &FLAGS_gbenchmark_memory_usage) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
*/
ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc; ++j)
@ -504,7 +496,7 @@ class State::FastClock {
t = MyCPUUsage() + ChildrenCPUUsage();
break;
}
return static_cast<int64_t>(t * 1e6);
return static_cast<int64_t>(t * kNumMicrosPerSecond);
}
// Reinitialize if necessary (since clock type may be change once benchmark
@ -912,13 +904,17 @@ State::State(FastClock* clock, SharedState* s, int t)
pause_time_(0.0),
total_iterations_(0),
interval_micros_(
static_cast<int64_t>(1e6 * FLAGS_benchmark_min_time /
static_cast<int64_t>(kNumMicrosPerSecond * FLAGS_benchmark_min_time /
FLAGS_benchmark_repetitions)) {
CHECK(clock != nullptr);
CHECK(s != nullptr);
}
bool State::KeepRunning() {
// Fast path
if (!clock_->HasReached(stop_time_micros_ + pause_time_)) {
if ((FLAGS_benchmark_iterations == 0 &&
!clock_->HasReached(stop_time_micros_ + pause_time_)) ||
iterations_ < FLAGS_benchmark_iterations) {
++iterations_;
return true;
}
@ -1034,12 +1030,12 @@ void State::NewInterval() {
}
bool State::FinishInterval() {
if (iterations_ < FLAGS_benchmark_min_iters / FLAGS_benchmark_repetitions &&
interval_micros_ < 5000000) {
if (FLAGS_benchmark_iterations != 0 &&
iterations_ < FLAGS_benchmark_iterations / FLAGS_benchmark_repetitions) {
interval_micros_ *= 2;
#ifdef DEBUG
std::cout << "Interval was too short; trying again for "
<< interval_micros_ << " useconds.\n";
std::cout << "Not enough iterations in interval; "
<< "Trying again for " << interval_micros_ << " useconds.\n";
#endif
is_continuation_ = false;
NewInterval();
@ -1063,11 +1059,25 @@ bool State::FinishInterval() {
bool keep_going = false;
{
mutex_lock l(&shared_->mu);
// Either replace the last or add a new data point.
if (is_continuation_)
shared_->runs.back() = data;
else
shared_->runs.push_back(data);
keep_going = RunAnotherInterval();
if (FLAGS_benchmark_iterations != 0) {
// If we need more iterations, run another interval as a continuation.
keep_going = total_iterations_ < FLAGS_benchmark_iterations;
is_continuation_ = keep_going;
} else {
// If this is a repetition, run another interval as a new data point.
keep_going =
shared_->runs.size() <
static_cast<size_t>(FLAGS_benchmark_repetitions);
is_continuation_ = !keep_going;
}
if (!keep_going) {
++shared_->stopping;
if (shared_->stopping < shared_->threads) {
@ -1081,23 +1091,11 @@ bool State::FinishInterval() {
}
}
if (state_ == STATE_RUNNING) {
is_continuation_ = true;
if (state_ == STATE_RUNNING)
NewInterval();
}
return keep_going;
}
bool State::RunAnotherInterval() const {
if (total_iterations_ < FLAGS_benchmark_min_iters)
return true;
if (total_iterations_ > FLAGS_benchmark_max_iters)
return false;
if (static_cast<int>(shared_->runs.size()) >= FLAGS_benchmark_repetitions)
return false;
return true;
}
bool State::MaybeStop() {
mutex_lock l(&shared_->mu);
if (shared_->stopping < shared_->threads) {

View File

@ -39,15 +39,17 @@ extern "C" uint64_t __rdtsc();
#endif
#include <sys/time.h>
#include "benchmark/macros.h"
namespace benchmark {
// NOTE: only i386 and x86_64 have been well tested.
// PPC, sparc, alpha, and ia64 are based on
// http://peter.kuscsik.com/wordpress/?p=14
// with modifications by m3b. See also
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
namespace benchmark {
namespace cycleclock {
// This should return the number of cycles since power-on. Thread-safe.
static inline int64_t Now() {
inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
#if defined(OS_MACOSX)
// this goes at the top because we need ALL Macs, regardless of
// architecture, to return the number of "mach time units" that

View File

@ -1,110 +0,0 @@
#ifndef BENCHMARK_MACROS_H_
#define BENCHMARK_MACROS_H_
#include <assert.h>
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&);
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// One caveat is that, for C++03, arraysize() doesn't accept any array of
// an anonymous type or a type defined inside a function. In these rare
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
// due to a limitation in C++03's template system. The limitation has
// been removed in C++11.
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef COMPILER_MSVC
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// The STATIC_ASSERT macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
// content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.
template <bool>
struct StaticAssert {
};
#define STATIC_ASSERT(expr, msg) \
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
// Implementation details of STATIC_ASSERT:
//
// - STATIC_ASSERT works by defining an array type that has -1
// elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
// does not work, as gcc supports variable-length arrays whose sizes
// are determined at run-time (this is gcc's extension and not part
// of the C++ standard). As a result, gcc fails to reject the
// following code with the simple definition:
//
// int foo;
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
// // not a compile-time constant.
//
// - By using the type StaticAssert<(bool(expr))>, we ensures that
// expr is a compile-time constant. (Template arguments must be
// determined at compile-time.)
//
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
//
// StaticAssert<bool(expr)>
//
// instead, these compilers will refuse to compile
//
// STATIC_ASSERT(5 > 0, some_message);
//
// (They seem to think the ">" in "5 > 0" marks the end of the
// template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
// ((expr) ? 1 : -1).
//
// This is to avoid running into a bug in MS VC 7.1, which
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
#define CHECK_LE(a, b) CHECK((a) <= (b))
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_LT(a, b) CHECK((a) < (b))
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
#endif // BENCHMARK_MACROS_H_

View File

@ -5,22 +5,14 @@
namespace benchmark {
#ifdef OS_WINDOWS
// Window's _sleep takes milliseconds argument.
void SleepForMilliseconds(int milliseconds) {
_sleep(milliseconds);
}
void SleepForSeconds(double seconds) {
SleepForMilliseconds(static_cast<int>(seconds * 1000));
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
}
#else // OS_WINDOWS
static const int64_t kNumMillisPerSecond = 1000LL;
static const int64_t kNumMicrosPerMilli = 1000LL;
static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
static const int64_t kNumNanosPerMicro = 1000LL;
void SleepForMicroseconds(int64_t microseconds) {
struct timespec sleep_time;
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
@ -36,7 +28,6 @@ void SleepForMilliseconds(int milliseconds) {
void SleepForSeconds(double seconds) {
SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
}
#endif // OS_WINDOWS
} // end namespace benchmark

View File

@ -4,6 +4,11 @@
#include <stdint.h>
namespace benchmark {
const int64_t kNumMillisPerSecond = 1000LL;
const int64_t kNumMicrosPerMilli = 1000LL;
const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
const int64_t kNumNanosPerMicro = 1000LL;
void SleepForMicroseconds(int64_t microseconds);
void SleepForMilliseconds(int milliseconds);
void SleepForSeconds(double seconds);

View File

@ -13,8 +13,8 @@
#include <iostream>
#include <limits>
#include "benchmark/macros.h"
#include "cycleclock.h"
#include "macros.h"
#include "mutex_lock.h"
#include "sleep.h"

View File

@ -8,8 +8,8 @@
#include <atomic>
#include <limits>
#include "benchmark/macros.h"
#include "cycleclock.h"
#include "macros.h"
#include "sysinfo.h"
namespace benchmark {