mirror of https://github.com/google/benchmark.git
Fix CPU frequency estimation on riscv (#1549)
* Fix CPU frequency estimation on riscv * Cleanup code for CPU frequency estimation * Fix use before definition of the macro * Move the platform definitions back * Fix compilation error on windows * Remove unused sleep.h and sleep.cc
This commit is contained in:
parent
b111d01c1b
commit
3b19d7222d
|
@ -307,6 +307,7 @@ cxx_feature_check(STEADY_CLOCK)
|
||||||
# Ensure we have pthreads
|
# Ensure we have pthreads
|
||||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
cxx_feature_check(PTHREAD_AFFINITY)
|
||||||
|
|
||||||
if (BENCHMARK_ENABLE_LIBPFM)
|
if (BENCHMARK_ENABLE_LIBPFM)
|
||||||
find_package(PFM)
|
find_package(PFM)
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
#include <pthread.h>
|
||||||
|
int main() {
|
||||||
|
cpu_set_t set;
|
||||||
|
CPU_ZERO(&set);
|
||||||
|
for (int i = 0; i < CPU_SETSIZE; ++i) {
|
||||||
|
CPU_SET(i, &set);
|
||||||
|
CPU_CLR(i, &set);
|
||||||
|
}
|
||||||
|
pthread_t self = pthread_self();
|
||||||
|
int ret;
|
||||||
|
ret = pthread_getaffinity_np(self, sizeof(set), &set);
|
||||||
|
if (ret != 0) return ret;
|
||||||
|
ret = pthread_setaffinity_np(self, sizeof(set), &set);
|
||||||
|
if (ret != 0) return ret;
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -34,6 +34,11 @@ if (HAVE_LIBPFM)
|
||||||
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
|
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# pthread affinity, if available
|
||||||
|
if(HAVE_PTHREAD_AFFINITY)
|
||||||
|
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Link threads.
|
# Link threads.
|
||||||
target_link_libraries(benchmark PRIVATE Threads::Threads)
|
target_link_libraries(benchmark PRIVATE Threads::Threads)
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,10 @@
|
||||||
#define BENCHMARK_OS_CYGWIN 1
|
#define BENCHMARK_OS_CYGWIN 1
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
#define BENCHMARK_OS_WINDOWS 1
|
#define BENCHMARK_OS_WINDOWS 1
|
||||||
|
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
|
||||||
|
// We include windows.h which implicitly includes winapifamily.h for compatibility.
|
||||||
|
#define NOMINMAX
|
||||||
|
#include <windows.h>
|
||||||
#if defined(WINAPI_FAMILY_PARTITION)
|
#if defined(WINAPI_FAMILY_PARTITION)
|
||||||
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||||
#define BENCHMARK_OS_WINDOWS_WIN32 1
|
#define BENCHMARK_OS_WINDOWS_WIN32 1
|
||||||
|
|
66
src/sleep.cc
66
src/sleep.cc
|
@ -1,66 +0,0 @@
|
||||||
// Copyright 2015 Google Inc. All rights reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
#include "sleep.h"
|
|
||||||
|
|
||||||
#include <cerrno>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <ctime>
|
|
||||||
|
|
||||||
#include "internal_macros.h"
|
|
||||||
|
|
||||||
#ifdef BENCHMARK_OS_WINDOWS
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef BENCHMARK_OS_ZOS
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace benchmark {
|
|
||||||
#ifdef BENCHMARK_OS_WINDOWS
|
|
||||||
// Window's Sleep takes milliseconds argument.
|
|
||||||
void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
|
|
||||||
void SleepForSeconds(double seconds) {
|
|
||||||
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
|
|
||||||
}
|
|
||||||
#else // BENCHMARK_OS_WINDOWS
|
|
||||||
void SleepForMicroseconds(int microseconds) {
|
|
||||||
#ifdef BENCHMARK_OS_ZOS
|
|
||||||
// z/OS does not support nanosleep. Instead call sleep() and then usleep() to
|
|
||||||
// sleep for the remaining microseconds because usleep() will fail if its
|
|
||||||
// argument is greater than 1000000.
|
|
||||||
div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
|
|
||||||
int seconds = sleepTime.quot;
|
|
||||||
while (seconds != 0) seconds = sleep(seconds);
|
|
||||||
while (usleep(sleepTime.rem) == -1 && errno == EINTR)
|
|
||||||
;
|
|
||||||
#else
|
|
||||||
struct timespec sleep_time;
|
|
||||||
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
|
||||||
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
|
|
||||||
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
|
|
||||||
; // Ignore signals and wait for the full interval to elapse.
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void SleepForMilliseconds(int milliseconds) {
|
|
||||||
SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SleepForSeconds(double seconds) {
|
|
||||||
SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
|
|
||||||
}
|
|
||||||
#endif // BENCHMARK_OS_WINDOWS
|
|
||||||
} // end namespace benchmark
|
|
15
src/sleep.h
15
src/sleep.h
|
@ -1,15 +0,0 @@
|
||||||
#ifndef BENCHMARK_SLEEP_H_
|
|
||||||
#define BENCHMARK_SLEEP_H_
|
|
||||||
|
|
||||||
namespace benchmark {
|
|
||||||
const int kNumMillisPerSecond = 1000;
|
|
||||||
const int kNumMicrosPerMilli = 1000;
|
|
||||||
const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
|
|
||||||
const int kNumNanosPerMicro = 1000;
|
|
||||||
const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
|
|
||||||
|
|
||||||
void SleepForMilliseconds(int milliseconds);
|
|
||||||
void SleepForSeconds(double seconds);
|
|
||||||
} // end namespace benchmark
|
|
||||||
|
|
||||||
#endif // BENCHMARK_SLEEP_H_
|
|
116
src/sysinfo.cc
116
src/sysinfo.cc
|
@ -46,6 +46,9 @@
|
||||||
#if defined(BENCHMARK_OS_QURT)
|
#if defined(BENCHMARK_OS_QURT)
|
||||||
#include <qurt.h>
|
#include <qurt.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||||
|
#include <pthread.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
@ -62,15 +65,17 @@
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <random>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#include "benchmark/benchmark.h"
|
||||||
#include "check.h"
|
#include "check.h"
|
||||||
#include "cycleclock.h"
|
#include "cycleclock.h"
|
||||||
#include "internal_macros.h"
|
#include "internal_macros.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "sleep.h"
|
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
|
#include "timers.h"
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -544,6 +549,80 @@ int GetNumCPUs() {
|
||||||
BENCHMARK_UNREACHABLE();
|
BENCHMARK_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ThreadAffinityGuard final {
|
||||||
|
public:
|
||||||
|
ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
|
||||||
|
if (!reset_affinity)
|
||||||
|
std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
|
||||||
|
"frequency may be incorrect."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
~ThreadAffinityGuard() {
|
||||||
|
if (!reset_affinity) return;
|
||||||
|
|
||||||
|
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||||
|
int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
|
||||||
|
&previous_affinity);
|
||||||
|
if (ret == 0) return;
|
||||||
|
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||||
|
DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
|
||||||
|
if (ret != 0) return;
|
||||||
|
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||||
|
PrintErrorAndDie("Failed to reset thread affinity");
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
|
||||||
|
ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
|
||||||
|
ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
|
||||||
|
ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool SetAffinity() {
|
||||||
|
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||||
|
int ret;
|
||||||
|
self = pthread_self();
|
||||||
|
ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
|
||||||
|
&previous_affinity);
|
||||||
|
if (ret != 0) return false;
|
||||||
|
|
||||||
|
cpu_set_t affinity;
|
||||||
|
memcpy(&affinity, &previous_affinity, sizeof(affinity));
|
||||||
|
|
||||||
|
bool is_first_cpu = true;
|
||||||
|
|
||||||
|
for (int i = 0; i < CPU_SETSIZE; ++i)
|
||||||
|
if (CPU_ISSET(i, &affinity)) {
|
||||||
|
if (is_first_cpu)
|
||||||
|
is_first_cpu = false;
|
||||||
|
else
|
||||||
|
CPU_CLR(i, &affinity);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_first_cpu) return false;
|
||||||
|
|
||||||
|
ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
|
||||||
|
return ret == 0;
|
||||||
|
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||||
|
self = GetCurrentThread();
|
||||||
|
DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
|
||||||
|
previous_affinity = SetThreadAffinityMask(self, mask);
|
||||||
|
return previous_affinity != 0;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||||
|
pthread_t self;
|
||||||
|
cpu_set_t previous_affinity;
|
||||||
|
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
|
||||||
|
HANDLE self;
|
||||||
|
DWORD_PTR previous_affinity;
|
||||||
|
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
|
||||||
|
bool reset_affinity;
|
||||||
|
};
|
||||||
|
|
||||||
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
|
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
|
||||||
// Currently, scaling is only used on linux path here,
|
// Currently, scaling is only used on linux path here,
|
||||||
// suppress diagnostics about it being unused on other paths.
|
// suppress diagnostics about it being unused on other paths.
|
||||||
|
@ -699,10 +778,39 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
|
||||||
return 1000000000;
|
return 1000000000;
|
||||||
#endif
|
#endif
|
||||||
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
|
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
|
||||||
static constexpr int estimate_time_ms = 1000;
|
|
||||||
|
// Make sure to use the same cycle counter when starting and stopping the
|
||||||
|
// cycle timer. We just pin the current thread to a cpu in the previous
|
||||||
|
// affinity set.
|
||||||
|
ThreadAffinityGuard affinity_guard;
|
||||||
|
|
||||||
|
static constexpr double estimate_time_s = 1.0;
|
||||||
|
const double start_time = ChronoClockNow();
|
||||||
const auto start_ticks = cycleclock::Now();
|
const auto start_ticks = cycleclock::Now();
|
||||||
SleepForMilliseconds(estimate_time_ms);
|
|
||||||
return static_cast<double>(cycleclock::Now() - start_ticks);
|
// Impose load instead of calling sleep() to make sure the cycle counter
|
||||||
|
// works.
|
||||||
|
using PRNG = std::minstd_rand;
|
||||||
|
using Result = PRNG::result_type;
|
||||||
|
PRNG rng(static_cast<Result>(start_ticks));
|
||||||
|
|
||||||
|
Result state = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
static constexpr size_t batch_size = 10000;
|
||||||
|
rng.discard(batch_size);
|
||||||
|
state += rng();
|
||||||
|
|
||||||
|
} while (ChronoClockNow() - start_time < estimate_time_s);
|
||||||
|
|
||||||
|
DoNotOptimize(state);
|
||||||
|
|
||||||
|
const auto end_ticks = cycleclock::Now();
|
||||||
|
const double end_time = ChronoClockNow();
|
||||||
|
|
||||||
|
return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
|
||||||
|
// Reset the affinity of current thread when the lifetime of affinity_guard
|
||||||
|
// ends.
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<double> GetLoadAvg() {
|
std::vector<double> GetLoadAvg() {
|
||||||
|
|
|
@ -59,7 +59,6 @@
|
||||||
|
|
||||||
#include "check.h"
|
#include "check.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "sleep.h"
|
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
Loading…
Reference in New Issue