From 6126d2a2052bb48d3472ac0468ade50397d393c5 Mon Sep 17 00:00:00 2001 From: Chris Kennelly Date: Fri, 16 Aug 2024 11:10:18 -0400 Subject: [PATCH] Align benchmark::State to a cacheline. (#1230) * Align benchmark::State to a cacheline. This can avoid interference with neighboring objects and stabilize benchmark results. * separate cachline definition from alignment attribute macro Co-authored-by: Roman Lebedev --------- Co-authored-by: dominic <510002+dmah42@users.noreply.github.com> Co-authored-by: Roman Lebedev --- include/benchmark/benchmark.h | 43 +++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 4cdb4515..66f34867 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -290,11 +290,50 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_OVERRIDE #endif +#if defined(__GNUC__) +// Determine the cacheline size based on architecture +#if defined(__i386__) || defined(__x86_64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#elif defined(__powerpc64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128 +#elif defined(__aarch64__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#elif defined(__arm__) +// Cache line sizes for ARM: These values are not strictly correct since +// cache line sizes depend on implementations, not architectures. There +// are even implementations with cache line sizes configurable at boot +// time. +#if defined(__ARM_ARCH_5T__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32 +#elif defined(__ARM_ARCH_7A__) +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#endif // ARM_ARCH +#endif // arches +#endif // __GNUC__ + +#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE +// A reasonable default guess. Note that overestimates tend to waste more +// space, while underestimates tend to waste more time. +#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64 +#endif + +#if defined(__GNUC__) +// Indicates that the declared object be cache aligned using +// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above). +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ + __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE))) +#elif defined(_MSC_VER) +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \ + __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE)) +#else +#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED +#endif + #if defined(_MSC_VER) #pragma warning(push) // C4251: needs to have dll-interface to be used by clients of class #pragma warning(disable : 4251) -#endif +#endif // _MSC_VER_ namespace benchmark { class BenchmarkReporter; @@ -759,7 +798,7 @@ enum Skipped // State is passed to a running Benchmark and contains state for the // benchmark to use. -class BENCHMARK_EXPORT State { +class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { public: struct StateIterator; friend struct StateIterator;