mirror of https://github.com/google/benchmark.git
Iteration counts should be `uint64_t` globally. (#817)
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
7a1c370283/include/benchmark/benchmark.h (L744)
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
This commit is contained in:
parent
2e7203aa94
commit
f92903cc53
|
@ -56,8 +56,7 @@ static void BM_memcpy(benchmark::State& state) {
|
||||||
memset(src, 'x', state.range(0));
|
memset(src, 'x', state.range(0));
|
||||||
for (auto _ : state)
|
for (auto _ : state)
|
||||||
memcpy(dst, src, state.range(0));
|
memcpy(dst, src, state.range(0));
|
||||||
state.SetBytesProcessed(int64_t(state.iterations()) *
|
state.SetBytesProcessed(state.iterations() * state.range(0));
|
||||||
int64_t(state.range(0)));
|
|
||||||
delete[] src; delete[] dst;
|
delete[] src; delete[] dst;
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
|
BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
|
||||||
|
@ -122,8 +121,7 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
|
||||||
q.Wait(&v);
|
q.Wait(&v);
|
||||||
}
|
}
|
||||||
// actually messages, not bytes:
|
// actually messages, not bytes:
|
||||||
state.SetBytesProcessed(
|
state.SetBytesProcessed(state.iterations() * state.range(0));
|
||||||
static_cast<int64_t>(state.iterations())*state.range(0));
|
|
||||||
}
|
}
|
||||||
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
|
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
|
||||||
|
|
||||||
|
@ -413,9 +411,11 @@ enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
|
||||||
// calculated automatically to the best fit.
|
// calculated automatically to the best fit.
|
||||||
enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
|
enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
|
||||||
|
|
||||||
|
typedef uint64_t IterationCount;
|
||||||
|
|
||||||
// BigOFunc is passed to a benchmark in order to specify the asymptotic
|
// BigOFunc is passed to a benchmark in order to specify the asymptotic
|
||||||
// computational complexity for the benchmark.
|
// computational complexity for the benchmark.
|
||||||
typedef double(BigOFunc)(int64_t);
|
typedef double(BigOFunc)(IterationCount);
|
||||||
|
|
||||||
// StatisticsFunc is passed to a benchmark in order to compute some descriptive
|
// StatisticsFunc is passed to a benchmark in order to compute some descriptive
|
||||||
// statistics over all the measurements of some type
|
// statistics over all the measurements of some type
|
||||||
|
@ -488,7 +488,7 @@ class State {
|
||||||
// while (state.KeepRunningBatch(1000)) {
|
// while (state.KeepRunningBatch(1000)) {
|
||||||
// // process 1000 elements
|
// // process 1000 elements
|
||||||
// }
|
// }
|
||||||
bool KeepRunningBatch(size_t n);
|
bool KeepRunningBatch(IterationCount n);
|
||||||
|
|
||||||
// REQUIRES: timer is running and 'SkipWithError(...)' has not been called
|
// REQUIRES: timer is running and 'SkipWithError(...)' has not been called
|
||||||
// by the current thread.
|
// by the current thread.
|
||||||
|
@ -627,7 +627,7 @@ class State {
|
||||||
int64_t range_y() const { return range(1); }
|
int64_t range_y() const { return range(1); }
|
||||||
|
|
||||||
BENCHMARK_ALWAYS_INLINE
|
BENCHMARK_ALWAYS_INLINE
|
||||||
size_t iterations() const {
|
IterationCount iterations() const {
|
||||||
if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
|
if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -638,15 +638,15 @@ class State {
|
||||||
: // items we expect on the first cache line (ie 64 bytes of the struct)
|
: // items we expect on the first cache line (ie 64 bytes of the struct)
|
||||||
// When total_iterations_ is 0, KeepRunning() and friends will return false.
|
// When total_iterations_ is 0, KeepRunning() and friends will return false.
|
||||||
// May be larger than max_iterations.
|
// May be larger than max_iterations.
|
||||||
size_t total_iterations_;
|
IterationCount total_iterations_;
|
||||||
|
|
||||||
// When using KeepRunningBatch(), batch_leftover_ holds the number of
|
// When using KeepRunningBatch(), batch_leftover_ holds the number of
|
||||||
// iterations beyond max_iters that were run. Used to track
|
// iterations beyond max_iters that were run. Used to track
|
||||||
// completed_iterations_ accurately.
|
// completed_iterations_ accurately.
|
||||||
size_t batch_leftover_;
|
IterationCount batch_leftover_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const size_t max_iterations;
|
const IterationCount max_iterations;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool started_;
|
bool started_;
|
||||||
|
@ -667,14 +667,14 @@ class State {
|
||||||
const int threads;
|
const int threads;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
|
State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||||
int n_threads, internal::ThreadTimer* timer,
|
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager);
|
internal::ThreadManager* manager);
|
||||||
|
|
||||||
void StartKeepRunning();
|
void StartKeepRunning();
|
||||||
// Implementation of KeepRunning() and KeepRunningBatch().
|
// Implementation of KeepRunning() and KeepRunningBatch().
|
||||||
// is_batch must be true unless n is 1.
|
// is_batch must be true unless n is 1.
|
||||||
bool KeepRunningInternal(size_t n, bool is_batch);
|
bool KeepRunningInternal(IterationCount n, bool is_batch);
|
||||||
void FinishKeepRunning();
|
void FinishKeepRunning();
|
||||||
internal::ThreadTimer* timer_;
|
internal::ThreadTimer* timer_;
|
||||||
internal::ThreadManager* manager_;
|
internal::ThreadManager* manager_;
|
||||||
|
@ -686,11 +686,11 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
|
||||||
return KeepRunningInternal(1, /*is_batch=*/false);
|
return KeepRunningInternal(1, /*is_batch=*/false);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(size_t n) {
|
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
|
||||||
return KeepRunningInternal(n, /*is_batch=*/true);
|
return KeepRunningInternal(n, /*is_batch=*/true);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(size_t n,
|
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
|
||||||
bool is_batch) {
|
bool is_batch) {
|
||||||
// total_iterations_ is set to 0 by the constructor, and always set to a
|
// total_iterations_ is set to 0 by the constructor, and always set to a
|
||||||
// nonzero value by StartKepRunning().
|
// nonzero value by StartKepRunning().
|
||||||
|
@ -754,7 +754,7 @@ struct State::StateIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t cached_;
|
IterationCount cached_;
|
||||||
State* const parent_;
|
State* const parent_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -858,7 +858,7 @@ class Benchmark {
|
||||||
// NOTE: This function should only be used when *exact* iteration control is
|
// NOTE: This function should only be used when *exact* iteration control is
|
||||||
// needed and never to control or limit how long a benchmark runs, where
|
// needed and never to control or limit how long a benchmark runs, where
|
||||||
// `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
|
// `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
|
||||||
Benchmark* Iterations(size_t n);
|
Benchmark* Iterations(IterationCount n);
|
||||||
|
|
||||||
// Specify the amount of times to repeat this benchmark. This option overrides
|
// Specify the amount of times to repeat this benchmark. This option overrides
|
||||||
// the `benchmark_repetitions` flag.
|
// the `benchmark_repetitions` flag.
|
||||||
|
@ -957,7 +957,7 @@ class Benchmark {
|
||||||
TimeUnit time_unit_;
|
TimeUnit time_unit_;
|
||||||
int range_multiplier_;
|
int range_multiplier_;
|
||||||
double min_time_;
|
double min_time_;
|
||||||
size_t iterations_;
|
IterationCount iterations_;
|
||||||
int repetitions_;
|
int repetitions_;
|
||||||
bool measure_process_cpu_time_;
|
bool measure_process_cpu_time_;
|
||||||
bool use_real_time_;
|
bool use_real_time_;
|
||||||
|
@ -1375,7 +1375,7 @@ class BenchmarkReporter {
|
||||||
bool error_occurred;
|
bool error_occurred;
|
||||||
std::string error_message;
|
std::string error_message;
|
||||||
|
|
||||||
int64_t iterations;
|
IterationCount iterations;
|
||||||
int64_t threads;
|
int64_t threads;
|
||||||
int64_t repetition_index;
|
int64_t repetition_index;
|
||||||
int64_t repetitions;
|
int64_t repetitions;
|
||||||
|
|
|
@ -121,8 +121,8 @@ void UseCharPointer(char const volatile*) {}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
|
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||||
int n_threads, internal::ThreadTimer* timer,
|
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager)
|
internal::ThreadManager* manager)
|
||||||
: total_iterations_(0),
|
: total_iterations_(0),
|
||||||
batch_leftover_(0),
|
batch_leftover_(0),
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
State BenchmarkInstance::Run(
|
State BenchmarkInstance::Run(IterationCount iters, int thread_id,
|
||||||
size_t iters, int thread_id, internal::ThreadTimer* timer,
|
internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager) const {
|
internal::ThreadManager* manager) const {
|
||||||
State st(iters, arg, thread_id, threads, timer, manager);
|
State st(iters, arg, thread_id, threads, timer, manager);
|
||||||
benchmark->Run(st);
|
benchmark->Run(st);
|
||||||
|
|
|
@ -32,10 +32,10 @@ struct BenchmarkInstance {
|
||||||
bool last_benchmark_instance;
|
bool last_benchmark_instance;
|
||||||
int repetitions;
|
int repetitions;
|
||||||
double min_time;
|
double min_time;
|
||||||
size_t iterations;
|
IterationCount iterations;
|
||||||
int threads; // Number of concurrent threads to us
|
int threads; // Number of concurrent threads to us
|
||||||
|
|
||||||
State Run(size_t iters, int thread_id, internal::ThreadTimer* timer,
|
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager) const;
|
internal::ThreadManager* manager) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -376,7 +376,7 @@ Benchmark* Benchmark::MinTime(double t) {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
Benchmark* Benchmark::Iterations(size_t n) {
|
Benchmark* Benchmark::Iterations(IterationCount n) {
|
||||||
CHECK(n > 0);
|
CHECK(n > 0);
|
||||||
CHECK(IsZero(min_time_));
|
CHECK(IsZero(min_time_));
|
||||||
iterations_ = n;
|
iterations_ = n;
|
||||||
|
|
|
@ -59,11 +59,12 @@ MemoryManager* memory_manager = nullptr;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static const size_t kMaxIterations = 1000000000;
|
static constexpr IterationCount kMaxIterations = 1000000000;
|
||||||
|
|
||||||
BenchmarkReporter::Run CreateRunReport(
|
BenchmarkReporter::Run CreateRunReport(
|
||||||
const benchmark::internal::BenchmarkInstance& b,
|
const benchmark::internal::BenchmarkInstance& b,
|
||||||
const internal::ThreadManager::Result& results, size_t memory_iterations,
|
const internal::ThreadManager::Result& results,
|
||||||
|
IterationCount memory_iterations,
|
||||||
const MemoryManager::Result& memory_result, double seconds,
|
const MemoryManager::Result& memory_result, double seconds,
|
||||||
int64_t repetition_index) {
|
int64_t repetition_index) {
|
||||||
// Create report about this benchmark run.
|
// Create report about this benchmark run.
|
||||||
|
@ -109,8 +110,8 @@ BenchmarkReporter::Run CreateRunReport(
|
||||||
|
|
||||||
// Execute one thread of benchmark b for the specified number of iterations.
|
// Execute one thread of benchmark b for the specified number of iterations.
|
||||||
// Adds the stats collected for the thread into *total.
|
// Adds the stats collected for the thread into *total.
|
||||||
void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id,
|
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
||||||
ThreadManager* manager) {
|
int thread_id, ThreadManager* manager) {
|
||||||
internal::ThreadTimer timer(
|
internal::ThreadTimer timer(
|
||||||
b->measure_process_cpu_time
|
b->measure_process_cpu_time
|
||||||
? internal::ThreadTimer::CreateProcessCpuTime()
|
? internal::ThreadTimer::CreateProcessCpuTime()
|
||||||
|
@ -187,13 +188,13 @@ class BenchmarkRunner {
|
||||||
|
|
||||||
std::vector<std::thread> pool;
|
std::vector<std::thread> pool;
|
||||||
|
|
||||||
size_t iters; // preserved between repetitions!
|
IterationCount iters; // preserved between repetitions!
|
||||||
// So only the first repetition has to find/calculate it,
|
// So only the first repetition has to find/calculate it,
|
||||||
// the other repetitions will just use that precomputed iteration count.
|
// the other repetitions will just use that precomputed iteration count.
|
||||||
|
|
||||||
struct IterationResults {
|
struct IterationResults {
|
||||||
internal::ThreadManager::Result results;
|
internal::ThreadManager::Result results;
|
||||||
size_t iters;
|
IterationCount iters;
|
||||||
double seconds;
|
double seconds;
|
||||||
};
|
};
|
||||||
IterationResults DoNIterations() {
|
IterationResults DoNIterations() {
|
||||||
|
@ -248,7 +249,7 @@ class BenchmarkRunner {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t PredictNumItersNeeded(const IterationResults& i) const {
|
IterationCount PredictNumItersNeeded(const IterationResults& i) const {
|
||||||
// See how much iterations should be increased by.
|
// See how much iterations should be increased by.
|
||||||
// Note: Avoid division by zero with max(seconds, 1ns).
|
// Note: Avoid division by zero with max(seconds, 1ns).
|
||||||
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
|
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
|
||||||
|
@ -262,10 +263,10 @@ class BenchmarkRunner {
|
||||||
if (multiplier <= 1.0) multiplier = 2.0;
|
if (multiplier <= 1.0) multiplier = 2.0;
|
||||||
|
|
||||||
// So what seems to be the sufficiently-large iteration count? Round up.
|
// So what seems to be the sufficiently-large iteration count? Round up.
|
||||||
const size_t max_next_iters =
|
const IterationCount max_next_iters =
|
||||||
0.5 + std::max(multiplier * i.iters, i.iters + 1.0);
|
0.5 + std::max(multiplier * i.iters, i.iters + 1.0);
|
||||||
// But we do have *some* sanity limits though..
|
// But we do have *some* sanity limits though..
|
||||||
const size_t next_iters = std::min(max_next_iters, kMaxIterations);
|
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
|
||||||
|
|
||||||
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
|
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
|
||||||
return next_iters; // round up before conversion to integer.
|
return next_iters; // round up before conversion to integer.
|
||||||
|
@ -319,11 +320,11 @@ class BenchmarkRunner {
|
||||||
|
|
||||||
// Oh, one last thing, we need to also produce the 'memory measurements'..
|
// Oh, one last thing, we need to also produce the 'memory measurements'..
|
||||||
MemoryManager::Result memory_result;
|
MemoryManager::Result memory_result;
|
||||||
size_t memory_iterations = 0;
|
IterationCount memory_iterations = 0;
|
||||||
if (memory_manager != nullptr) {
|
if (memory_manager != nullptr) {
|
||||||
// Only run a few iterations to reduce the impact of one-time
|
// Only run a few iterations to reduce the impact of one-time
|
||||||
// allocations in benchmarks that are not properly managed.
|
// allocations in benchmarks that are not properly managed.
|
||||||
memory_iterations = std::min<size_t>(16, iters);
|
memory_iterations = std::min<IterationCount>(16, iters);
|
||||||
memory_manager->Start();
|
memory_manager->Start();
|
||||||
std::unique_ptr<internal::ThreadManager> manager;
|
std::unique_ptr<internal::ThreadManager> manager;
|
||||||
manager.reset(new internal::ThreadManager(1));
|
manager.reset(new internal::ThreadManager(1));
|
||||||
|
|
|
@ -29,20 +29,23 @@ BigOFunc* FittingCurve(BigO complexity) {
|
||||||
static const double kLog2E = 1.44269504088896340736;
|
static const double kLog2E = 1.44269504088896340736;
|
||||||
switch (complexity) {
|
switch (complexity) {
|
||||||
case oN:
|
case oN:
|
||||||
return [](int64_t n) -> double { return static_cast<double>(n); };
|
return [](IterationCount n) -> double { return static_cast<double>(n); };
|
||||||
case oNSquared:
|
case oNSquared:
|
||||||
return [](int64_t n) -> double { return std::pow(n, 2); };
|
return [](IterationCount n) -> double { return std::pow(n, 2); };
|
||||||
case oNCubed:
|
case oNCubed:
|
||||||
return [](int64_t n) -> double { return std::pow(n, 3); };
|
return [](IterationCount n) -> double { return std::pow(n, 3); };
|
||||||
case oLogN:
|
case oLogN:
|
||||||
/* Note: can't use log2 because Android's GNU STL lacks it */
|
/* Note: can't use log2 because Android's GNU STL lacks it */
|
||||||
return [](int64_t n) { return kLog2E * log(static_cast<double>(n)); };
|
return
|
||||||
|
[](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
|
||||||
case oNLogN:
|
case oNLogN:
|
||||||
/* Note: can't use log2 because Android's GNU STL lacks it */
|
/* Note: can't use log2 because Android's GNU STL lacks it */
|
||||||
return [](int64_t n) { return kLog2E * n * log(static_cast<double>(n)); };
|
return [](IterationCount n) {
|
||||||
|
return kLog2E * n * log(static_cast<double>(n));
|
||||||
|
};
|
||||||
case o1:
|
case o1:
|
||||||
default:
|
default:
|
||||||
return [](int64_t) { return 1.0; };
|
return [](IterationCount) { return 1.0; };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
double Finish(Counter const& c, int64_t iterations, double cpu_time,
|
double Finish(Counter const& c, IterationCount iterations, double cpu_time,
|
||||||
double num_threads) {
|
double num_threads) {
|
||||||
double v = c.value;
|
double v = c.value;
|
||||||
if (c.flags & Counter::kIsRate) {
|
if (c.flags & Counter::kIsRate) {
|
||||||
|
@ -35,7 +35,8 @@ double Finish(Counter const& c, int64_t iterations, double cpu_time,
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Finish(UserCounters* l, int64_t iterations, double cpu_time, double num_threads) {
|
void Finish(UserCounters* l, IterationCount iterations, double cpu_time,
|
||||||
|
double num_threads) {
|
||||||
for (auto& c : *l) {
|
for (auto& c : *l) {
|
||||||
c.second.value = Finish(c.second, iterations, cpu_time, num_threads);
|
c.second.value = Finish(c.second, iterations, cpu_time, num_threads);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,8 @@ namespace benchmark {
|
||||||
|
|
||||||
// these counter-related functions are hidden to reduce API surface.
|
// these counter-related functions are hidden to reduce API surface.
|
||||||
namespace internal {
|
namespace internal {
|
||||||
void Finish(UserCounters* l, int64_t iterations, double time, double num_threads);
|
void Finish(UserCounters* l, IterationCount iterations, double time,
|
||||||
|
double num_threads);
|
||||||
void Increment(UserCounters* l, UserCounters const& r);
|
void Increment(UserCounters* l, UserCounters const& r);
|
||||||
bool SameNames(UserCounters const& l, UserCounters const& r);
|
bool SameNames(UserCounters const& l, UserCounters const& r);
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
|
@ -68,6 +68,12 @@ std::string FormatKV(std::string const& key, int64_t value) {
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string FormatKV(std::string const& key, IterationCount value) {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << '"' << StrEscape(key) << "\": " << value;
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
std::string FormatKV(std::string const& key, double value) {
|
std::string FormatKV(std::string const& key, double value) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << '"' << StrEscape(key) << "\": ";
|
ss << '"' << StrEscape(key) << "\": ";
|
||||||
|
|
|
@ -38,7 +38,7 @@ class ThreadManager {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
struct Result {
|
struct Result {
|
||||||
int64_t iterations = 0;
|
IterationCount iterations = 0;
|
||||||
double real_time_used = 0;
|
double real_time_used = 0;
|
||||||
double cpu_time_used = 0;
|
double cpu_time_used = 0;
|
||||||
double manual_time_used = 0;
|
double manual_time_used = 0;
|
||||||
|
|
|
@ -98,7 +98,7 @@ BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
|
||||||
|
|
||||||
|
|
||||||
void BM_KeepRunning(benchmark::State& state) {
|
void BM_KeepRunning(benchmark::State& state) {
|
||||||
size_t iter_count = 0;
|
benchmark::IterationCount iter_count = 0;
|
||||||
assert(iter_count == state.iterations());
|
assert(iter_count == state.iterations());
|
||||||
while (state.KeepRunning()) {
|
while (state.KeepRunning()) {
|
||||||
++iter_count;
|
++iter_count;
|
||||||
|
@ -109,8 +109,8 @@ BENCHMARK(BM_KeepRunning);
|
||||||
|
|
||||||
void BM_KeepRunningBatch(benchmark::State& state) {
|
void BM_KeepRunningBatch(benchmark::State& state) {
|
||||||
// Choose a prime batch size to avoid evenly dividing max_iterations.
|
// Choose a prime batch size to avoid evenly dividing max_iterations.
|
||||||
const size_t batch_size = 101;
|
const benchmark::IterationCount batch_size = 101;
|
||||||
size_t iter_count = 0;
|
benchmark::IterationCount iter_count = 0;
|
||||||
while (state.KeepRunningBatch(batch_size)) {
|
while (state.KeepRunningBatch(batch_size)) {
|
||||||
iter_count += batch_size;
|
iter_count += batch_size;
|
||||||
}
|
}
|
||||||
|
@ -119,7 +119,7 @@ void BM_KeepRunningBatch(benchmark::State& state) {
|
||||||
BENCHMARK(BM_KeepRunningBatch);
|
BENCHMARK(BM_KeepRunningBatch);
|
||||||
|
|
||||||
void BM_RangedFor(benchmark::State& state) {
|
void BM_RangedFor(benchmark::State& state) {
|
||||||
size_t iter_count = 0;
|
benchmark::IterationCount iter_count = 0;
|
||||||
for (auto _ : state) {
|
for (auto _ : state) {
|
||||||
++iter_count;
|
++iter_count;
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,9 +66,9 @@ void BM_Complexity_O1(benchmark::State& state) {
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
|
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
|
||||||
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
|
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
|
||||||
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity([](int64_t) {
|
BENCHMARK(BM_Complexity_O1)
|
||||||
return 1.0;
|
->Range(1, 1 << 18)
|
||||||
});
|
->Complexity([](benchmark::IterationCount) { return 1.0; });
|
||||||
|
|
||||||
const char *one_test_name = "BM_Complexity_O1";
|
const char *one_test_name = "BM_Complexity_O1";
|
||||||
const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
|
const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
|
||||||
|
@ -121,7 +121,9 @@ BENCHMARK(BM_Complexity_O_N)
|
||||||
BENCHMARK(BM_Complexity_O_N)
|
BENCHMARK(BM_Complexity_O_N)
|
||||||
->RangeMultiplier(2)
|
->RangeMultiplier(2)
|
||||||
->Range(1 << 10, 1 << 16)
|
->Range(1 << 10, 1 << 16)
|
||||||
->Complexity([](int64_t n) -> double { return static_cast<double>(n); });
|
->Complexity([](benchmark::IterationCount n) -> double {
|
||||||
|
return static_cast<double>(n);
|
||||||
|
});
|
||||||
BENCHMARK(BM_Complexity_O_N)
|
BENCHMARK(BM_Complexity_O_N)
|
||||||
->RangeMultiplier(2)
|
->RangeMultiplier(2)
|
||||||
->Range(1 << 10, 1 << 16)
|
->Range(1 << 10, 1 << 16)
|
||||||
|
@ -160,7 +162,7 @@ BENCHMARK(BM_Complexity_O_N_log_N)
|
||||||
BENCHMARK(BM_Complexity_O_N_log_N)
|
BENCHMARK(BM_Complexity_O_N_log_N)
|
||||||
->RangeMultiplier(2)
|
->RangeMultiplier(2)
|
||||||
->Range(1 << 10, 1 << 16)
|
->Range(1 << 10, 1 << 16)
|
||||||
->Complexity([](int64_t n) {
|
->Complexity([](benchmark::IterationCount n) {
|
||||||
return kLog2E * n * log(static_cast<double>(n));
|
return kLog2E * n * log(static_cast<double>(n));
|
||||||
});
|
});
|
||||||
BENCHMARK(BM_Complexity_O_N_log_N)
|
BENCHMARK(BM_Complexity_O_N_log_N)
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
void BM_empty(benchmark::State& state) {
|
void BM_empty(benchmark::State& state) {
|
||||||
while (state.KeepRunning()) {
|
while (state.KeepRunning()) {
|
||||||
volatile std::size_t x = state.iterations();
|
volatile benchmark::IterationCount x = state.iterations();
|
||||||
((void)x);
|
((void)x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ extern "C" int test_for_auto_loop() {
|
||||||
for (auto _ : S) {
|
for (auto _ : S) {
|
||||||
// CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]:
|
// CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]:
|
||||||
// CHECK-GNU-NEXT: subq $1, %rbx
|
// CHECK-GNU-NEXT: subq $1, %rbx
|
||||||
// CHECK-CLANG-NEXT: {{(addq \$1,|incq)}} %rax
|
// CHECK-CLANG-NEXT: {{(addq \$1, %rax|incq %rax|addq \$-1, %rbx)}}
|
||||||
// CHECK-NEXT: jne .L[[LOOP_HEAD]]
|
// CHECK-NEXT: jne .L[[LOOP_HEAD]]
|
||||||
benchmark::DoNotOptimize(x);
|
benchmark::DoNotOptimize(x);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue