mirror of https://github.com/google/benchmark.git
Fix CPU frequency estimation on riscv (#1549)
* Fix CPU frequency estimation on riscv * Cleanup code for CPU frequency estimation * Fix use before definition of the macro * Move the platform definitions back * Fix compilation error on windows * Remove unused sleep.h and sleep.cc
This commit is contained in:
parent
b111d01c1b
commit
3b19d7222d
|
@ -307,6 +307,7 @@ cxx_feature_check(STEADY_CLOCK)
|
|||
# Ensure we have pthreads
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
cxx_feature_check(PTHREAD_AFFINITY)
|
||||
|
||||
if (BENCHMARK_ENABLE_LIBPFM)
|
||||
find_package(PFM)
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
#include <pthread.h>
|
||||
int main() {
|
||||
cpu_set_t set;
|
||||
CPU_ZERO(&set);
|
||||
for (int i = 0; i < CPU_SETSIZE; ++i) {
|
||||
CPU_SET(i, &set);
|
||||
CPU_CLR(i, &set);
|
||||
}
|
||||
pthread_t self = pthread_self();
|
||||
int ret;
|
||||
ret = pthread_getaffinity_np(self, sizeof(set), &set);
|
||||
if (ret != 0) return ret;
|
||||
ret = pthread_setaffinity_np(self, sizeof(set), &set);
|
||||
if (ret != 0) return ret;
|
||||
return 0;
|
||||
}
|
|
@ -34,6 +34,11 @@ if (HAVE_LIBPFM)
|
|||
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
|
||||
endif()
|
||||
|
||||
# pthread affinity, if available
|
||||
if(HAVE_PTHREAD_AFFINITY)
|
||||
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
endif()
|
||||
|
||||
# Link threads.
|
||||
target_link_libraries(benchmark PRIVATE Threads::Threads)
|
||||
|
||||
|
|
|
@ -42,6 +42,10 @@
|
|||
#define BENCHMARK_OS_CYGWIN 1
|
||||
#elif defined(_WIN32)
|
||||
#define BENCHMARK_OS_WINDOWS 1
|
||||
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
|
||||
// We include windows.h which implicitly includes winapifamily.h for compatibility.
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#if defined(WINAPI_FAMILY_PARTITION)
|
||||
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
#define BENCHMARK_OS_WINDOWS_WIN32 1
|
||||
|
|
66
src/sleep.cc
66
src/sleep.cc
|
@ -1,66 +0,0 @@
|
|||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "sleep.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef BENCHMARK_OS_ZOS
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
// Window's Sleep takes milliseconds argument.
|
||||
void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
|
||||
}
|
||||
#else // BENCHMARK_OS_WINDOWS
|
||||
void SleepForMicroseconds(int microseconds) {
|
||||
#ifdef BENCHMARK_OS_ZOS
|
||||
// z/OS does not support nanosleep. Instead call sleep() and then usleep() to
|
||||
// sleep for the remaining microseconds because usleep() will fail if its
|
||||
// argument is greater than 1000000.
|
||||
div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
|
||||
int seconds = sleepTime.quot;
|
||||
while (seconds != 0) seconds = sleep(seconds);
|
||||
while (usleep(sleepTime.rem) == -1 && errno == EINTR)
|
||||
;
|
||||
#else
|
||||
struct timespec sleep_time;
|
||||
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
||||
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
|
||||
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
|
||||
; // Ignore signals and wait for the full interval to elapse.
|
||||
#endif
|
||||
}
|
||||
|
||||
void SleepForMilliseconds(int milliseconds) {
|
||||
SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
|
||||
}
|
||||
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
|
||||
}
|
||||
#endif // BENCHMARK_OS_WINDOWS
|
||||
} // end namespace benchmark
|
15
src/sleep.h
15
src/sleep.h
|
@ -1,15 +0,0 @@
|
|||
#ifndef BENCHMARK_SLEEP_H_
|
||||
#define BENCHMARK_SLEEP_H_
|
||||
|
||||
namespace benchmark {
|
||||
const int kNumMillisPerSecond = 1000;
|
||||
const int kNumMicrosPerMilli = 1000;
|
||||
const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
|
||||
const int kNumNanosPerMicro = 1000;
|
||||
const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
|
||||
|
||||
void SleepForMilliseconds(int milliseconds);
|
||||
void SleepForSeconds(double seconds);
|
||||
} // end namespace benchmark
|
||||
|
||||
#endif // BENCHMARK_SLEEP_H_
|
116
src/sysinfo.cc
116
src/sysinfo.cc
|
@ -46,6 +46,9 @@
|
|||
#if defined(BENCHMARK_OS_QURT)
|
||||
#include <qurt.h>
|
||||
#endif
|
||||
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
|
@ -62,15 +65,17 @@
|
|||
#include <limits>
|
||||
#include <locale>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "cycleclock.h"
|
||||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "sleep.h"
|
||||
#include "string_util.h"
|
||||
#include "timers.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace {
|
||||
|
@ -544,6 +549,80 @@ int GetNumCPUs() {
|
|||
BENCHMARK_UNREACHABLE();
|
||||
}
|
||||
|
||||
class ThreadAffinityGuard final {
|
||||
public:
|
||||
ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
|
||||
if (!reset_affinity)
|
||||
std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
|
||||
"frequency may be incorrect."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
~ThreadAffinityGuard() {
|
||||
if (!reset_affinity) return;
|
||||
|
||||
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
|
||||
&previous_affinity);
|
||||
if (ret == 0) return;
|
||||
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||
DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
|
||||
if (ret != 0) return;
|
||||
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||
PrintErrorAndDie("Failed to reset thread affinity");
|
||||
}
|
||||
|
||||
ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
|
||||
ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
|
||||
ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
|
||||
ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
|
||||
|
||||
private:
|
||||
bool SetAffinity() {
|
||||
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
int ret;
|
||||
self = pthread_self();
|
||||
ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
|
||||
&previous_affinity);
|
||||
if (ret != 0) return false;
|
||||
|
||||
cpu_set_t affinity;
|
||||
memcpy(&affinity, &previous_affinity, sizeof(affinity));
|
||||
|
||||
bool is_first_cpu = true;
|
||||
|
||||
for (int i = 0; i < CPU_SETSIZE; ++i)
|
||||
if (CPU_ISSET(i, &affinity)) {
|
||||
if (is_first_cpu)
|
||||
is_first_cpu = false;
|
||||
else
|
||||
CPU_CLR(i, &affinity);
|
||||
}
|
||||
|
||||
if (is_first_cpu) return false;
|
||||
|
||||
ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
|
||||
return ret == 0;
|
||||
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||
self = GetCurrentThread();
|
||||
DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
|
||||
previous_affinity = SetThreadAffinityMask(self, mask);
|
||||
return previous_affinity != 0;
|
||||
#else
|
||||
return false;
|
||||
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||
}
|
||||
|
||||
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
pthread_t self;
|
||||
cpu_set_t previous_affinity;
|
||||
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||
HANDLE self;
|
||||
DWORD_PTR previous_affinity;
|
||||
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||
bool reset_affinity;
|
||||
};
|
||||
|
||||
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
|
||||
// Currently, scaling is only used on linux path here,
|
||||
// suppress diagnostics about it being unused on other paths.
|
||||
|
@ -699,10 +778,39 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
|
|||
return 1000000000;
|
||||
#endif
|
||||
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
|
||||
static constexpr int estimate_time_ms = 1000;
|
||||
|
||||
// Make sure to use the same cycle counter when starting and stopping the
|
||||
// cycle timer. We just pin the current thread to a cpu in the previous
|
||||
// affinity set.
|
||||
ThreadAffinityGuard affinity_guard;
|
||||
|
||||
static constexpr double estimate_time_s = 1.0;
|
||||
const double start_time = ChronoClockNow();
|
||||
const auto start_ticks = cycleclock::Now();
|
||||
SleepForMilliseconds(estimate_time_ms);
|
||||
return static_cast<double>(cycleclock::Now() - start_ticks);
|
||||
|
||||
// Impose load instead of calling sleep() to make sure the cycle counter
|
||||
// works.
|
||||
using PRNG = std::minstd_rand;
|
||||
using Result = PRNG::result_type;
|
||||
PRNG rng(static_cast<Result>(start_ticks));
|
||||
|
||||
Result state = 0;
|
||||
|
||||
do {
|
||||
static constexpr size_t batch_size = 10000;
|
||||
rng.discard(batch_size);
|
||||
state += rng();
|
||||
|
||||
} while (ChronoClockNow() - start_time < estimate_time_s);
|
||||
|
||||
DoNotOptimize(state);
|
||||
|
||||
const auto end_ticks = cycleclock::Now();
|
||||
const double end_time = ChronoClockNow();
|
||||
|
||||
return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
|
||||
// Reset the affinity of current thread when the lifetime of affinity_guard
|
||||
// ends.
|
||||
}
|
||||
|
||||
std::vector<double> GetLoadAvg() {
|
||||
|
|
|
@ -59,7 +59,6 @@
|
|||
|
||||
#include "check.h"
|
||||
#include "log.h"
|
||||
#include "sleep.h"
|
||||
#include "string_util.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
|
Loading…
Reference in New Issue