Support for Web platforms (#340)

* Implement cycleclock::Now for PNaCl

* Make cycleclock::Now compatible with NaCl/ARM

* Support Emscripten (Asm.js, WebAssembly)

* Rearrange #ifs from to handle specific cases first

* DoNotOptimize without inline asm for Emscripten & PNaCl
This commit is contained in:
Marat Dukhan 2017-02-11 05:31:40 -05:00 committed by Eric
parent fef203bd02
commit 070c0ca0a9
4 changed files with 62 additions and 18 deletions

View file

@ -230,7 +230,7 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
// expression from being optimized away by the compiler. This function is // expression from being optimized away by the compiler. This function is
// intended to add little to no overhead. // intended to add little to no overhead.
// See: https://youtu.be/nXaxk27zwlk?t=2441 // See: https://youtu.be/nXaxk27zwlk?t=2441
#if defined(__GNUC__) #if defined(__GNUC__) && !defined(__pnacl__) && !defined(EMSCRIPTEN)
template <class Tp> template <class Tp>
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "g"(value) : "memory"); asm volatile("" : : "g"(value) : "memory");

View file

@ -43,6 +43,11 @@ extern "C" uint64_t __rdtsc();
#ifndef BENCHMARK_OS_WINDOWS #ifndef BENCHMARK_OS_WINDOWS
#include <sys/time.h> #include <sys/time.h>
#include <time.h>
#endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
#include <emscripten.h>
#endif #endif
namespace benchmark { namespace benchmark {
@ -65,6 +70,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// counter pauses; it does not continue counting, nor does it // counter pauses; it does not continue counting, nor does it
// reset to zero. // reset to zero.
return mach_absolute_time(); return mach_absolute_time();
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// this goes above x86-specific code because old versions of Emscripten
// define __x86_64__, although they have nothing to do with it.
return static_cast<int64_t>(emscripten_get_now() * 1e+6);
#elif defined(__i386__) #elif defined(__i386__)
int64_t ret; int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret)); __asm__ volatile("rdtsc" : "=A"(ret));
@ -99,6 +108,22 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
_asm rdtsc _asm rdtsc
#elif defined(COMPILER_MSVC) #elif defined(COMPILER_MSVC)
return __rdtsc(); return __rdtsc();
#elif defined(BENCHMARK_OS_NACL)
// Native Client validator on x86/x86-64 allows RDTSC instructions,
// and this case is handled above. Native Client validator on ARM
// rejects MRC instructions (used in the ARM-specific sequence below),
// so we handle it here. Portable Native Client compiles to
// architecture-agnostic bytecode, which doesn't provide any
// cycle counter access mnemonics.
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution (which is noticable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = { 0, 0 };
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(__aarch64__) #elif defined(__aarch64__)
// System timer of ARMv8 runs at a different frequency than the CPU's. // System timer of ARMv8 runs at a different frequency than the CPU's.
// The frequency is fixed, typically in the range 1-50MHz. It can be // The frequency is fixed, typically in the range 1-50MHz. It can be
@ -108,7 +133,9 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value; return virtual_timer_value;
#elif defined(__ARM_ARCH) #elif defined(__ARM_ARCH)
#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount // V6 is the earliest arch that has a standard cyclecount
// Native Client validator doesn't allow MRC instructions.
#if (__ARM_ARCH >= 6)
uint32_t pmccntr; uint32_t pmccntr;
uint32_t pmuseren; uint32_t pmuseren;
uint32_t pmcntenset; uint32_t pmcntenset;

View file

@ -41,6 +41,10 @@
#define BENCHMARK_OS_FREEBSD 1 #define BENCHMARK_OS_FREEBSD 1
#elif defined(__linux__) #elif defined(__linux__)
#define BENCHMARK_OS_LINUX 1 #define BENCHMARK_OS_LINUX 1
#elif defined(__native_client__)
#define BENCHMARK_OS_NACL 1
#elif defined(EMSCRIPTEN)
#define BENCHMARK_OS_EMSCRIPTEN 1
#endif #endif
#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ #if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \

View file

@ -35,6 +35,10 @@
#endif #endif
#endif #endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
#include <emscripten.h>
#endif
#include <cerrno> #include <cerrno>
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
@ -100,14 +104,7 @@ BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
} // end namespace } // end namespace
double ProcessCPUUsage() { double ProcessCPUUsage() {
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See #if defined(BENCHMARK_OS_WINDOWS)
// https://github.com/google/benchmark/pull/292
#if defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
struct timespec spec;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
return MakeTime(spec);
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#elif defined(BENCHMARK_OS_WINDOWS)
HANDLE proc = GetCurrentProcess(); HANDLE proc = GetCurrentProcess();
FILETIME creation_time; FILETIME creation_time;
FILETIME exit_time; FILETIME exit_time;
@ -117,21 +114,28 @@ double ProcessCPUUsage() {
&user_time)) &user_time))
return MakeTime(kernel_time, user_time); return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed"); DiagnoseAndExit("GetProccessTimes() failed");
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
// Use Emscripten-specific API. Reported CPU time would be exactly the
// same as total time, but this is ok because there aren't long-latency
// syncronous system calls in Emscripten.
return emscripten_get_now() * 1e-3;
#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
// https://github.com/google/benchmark/pull/292
struct timespec spec;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
return MakeTime(spec);
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#else #else
struct rusage ru; struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru);
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed");
#endif #endif
} }
double ThreadCPUUsage() { double ThreadCPUUsage() {
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See #if defined(BENCHMARK_OS_WINDOWS)
// https://github.com/google/benchmark/pull/292
#if defined(CLOCK_THREAD_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
#elif defined(BENCHMARK_OS_WINDOWS)
HANDLE this_thread = GetCurrentThread(); HANDLE this_thread = GetCurrentThread();
FILETIME creation_time; FILETIME creation_time;
FILETIME exit_time; FILETIME exit_time;
@ -141,6 +145,8 @@ double ThreadCPUUsage() {
&user_time); &user_time);
return MakeTime(kernel_time, user_time); return MakeTime(kernel_time, user_time);
#elif defined(BENCHMARK_OS_MACOSX) #elif defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
// https://github.com/google/benchmark/pull/292
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info; thread_basic_info_data_t info;
mach_port_t thread = pthread_mach_thread_np(pthread_self()); mach_port_t thread = pthread_mach_thread_np(pthread_self());
@ -149,6 +155,13 @@ double ThreadCPUUsage() {
return MakeTime(info); return MakeTime(info);
} }
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// Emscripten doesn't support traditional threads
return ProcessCPUUsage();
#elif defined(CLOCK_THREAD_CPUTIME_ID)
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
#else #else
#error Per-thread timing is not available on your system. #error Per-thread timing is not available on your system.
#endif #endif