mirror of https://github.com/google/benchmark.git
Improve CPU Cache info reporting -- Add Windows support. (#486)
* Improve CPU Cache info reporting -- Add Windows support. This patch does a couple of thing regarding CPU Cache reporting. First, it adds an implementation on Windows. Second it fixes the JSONReporter to correctly (and actually) output the CPU configuration information. And finally, third, it detects and reports the number of physical CPU's that share the same cache.
This commit is contained in:
parent
27e0b439cf
commit
11dc36822b
|
@ -1159,6 +1159,7 @@ struct CPUInfo {
|
|||
std::string type;
|
||||
int level;
|
||||
int size;
|
||||
int num_sharing;
|
||||
};
|
||||
|
||||
int num_cpus;
|
||||
|
|
|
@ -87,6 +87,27 @@ bool JSONReporter::ReportContext(const Context& context) {
|
|||
out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
|
||||
<< ",\n";
|
||||
|
||||
out << indent << "\"caches\": [\n";
|
||||
indent = std::string(6, ' ');
|
||||
std::string cache_indent(8, ' ');
|
||||
for (size_t i = 0; i < info.caches.size(); ++i) {
|
||||
auto& CI = info.caches[i];
|
||||
out << indent << "{\n";
|
||||
out << cache_indent << FormatKV("type", CI.type) << ",\n";
|
||||
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
|
||||
<< ",\n";
|
||||
out << cache_indent
|
||||
<< FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n";
|
||||
out << cache_indent
|
||||
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
|
||||
<< "\n";
|
||||
out << indent << "}";
|
||||
if (i != info.caches.size() - 1) out << ",";
|
||||
out << "\n";
|
||||
}
|
||||
indent = std::string(4, ' ');
|
||||
out << indent << "],\n";
|
||||
|
||||
#if defined(NDEBUG)
|
||||
const char build_type[] = "release";
|
||||
#else
|
||||
|
|
|
@ -45,7 +45,10 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
|
|||
Out << "CPU Caches:\n";
|
||||
for (auto &CInfo : info.caches) {
|
||||
Out << " L" << CInfo.level << " " << CInfo.type << " "
|
||||
<< (CInfo.size / 1000) << "K\n";
|
||||
<< (CInfo.size / 1000) << "K";
|
||||
if (CInfo.num_sharing != 0)
|
||||
Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
|
||||
Out << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
107
src/sysinfo.cc
107
src/sysinfo.cc
|
@ -32,7 +32,10 @@
|
|||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
@ -123,6 +126,15 @@ struct ValueUnion {
|
|||
return Buff->uint64_value;
|
||||
BENCHMARK_UNREACHABLE();
|
||||
}
|
||||
|
||||
template <class T, int N>
|
||||
std::array<T, N> GetAsArray() {
|
||||
const int ArrSize = sizeof(T) * N;
|
||||
CHECK_LE(ArrSize, Size);
|
||||
std::array<T, N> Arr;
|
||||
std::memcpy(Arr.data(), data(), ArrSize);
|
||||
return Arr;
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
@ -158,6 +170,14 @@ bool GetSysctl(std::string const& Name, Tp* Out) {
|
|||
*Out = static_cast<Tp>(Buff.GetAsUnsigned());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Tp, size_t N>
|
||||
bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
|
||||
auto Buff = GetSysctlImp(Name);
|
||||
if (!Buff) return false;
|
||||
*Out = Buff.GetAsArray<Tp, N>();
|
||||
return true;
|
||||
};
|
||||
#endif
|
||||
|
||||
template <class ArgT>
|
||||
|
@ -186,6 +206,25 @@ bool CpuScalingEnabled(int num_cpus) {
|
|||
return false;
|
||||
}
|
||||
|
||||
int CountSetBitsInCPUMap(std::string Val) {
|
||||
auto CountBits = [](std::string Part) {
|
||||
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
|
||||
Part = "0x" + Part;
|
||||
CPUMask Mask(std::stoul(Part, nullptr, 16));
|
||||
return static_cast<int>(Mask.count());
|
||||
};
|
||||
size_t Pos;
|
||||
int total = 0;
|
||||
while ((Pos = Val.find(',')) != std::string::npos) {
|
||||
total += CountBits(Val.substr(0, Pos));
|
||||
Val = Val.substr(Pos + 1);
|
||||
}
|
||||
if (!Val.empty()) {
|
||||
total += CountBits(Val);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
BENCHMARK_MAYBE_UNUSED
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
|
@ -214,6 +253,10 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
|
|||
PrintErrorAndDie("Failed to read from file ", FPath, "type");
|
||||
if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
|
||||
PrintErrorAndDie("Failed to read from file ", FPath, "level");
|
||||
std::string map_str;
|
||||
if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
|
||||
PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
|
||||
info.num_sharing = CountSetBitsInCPUMap(map_str);
|
||||
res.push_back(info);
|
||||
}
|
||||
|
||||
|
@ -223,14 +266,18 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
|
|||
#ifdef BENCHMARK_OS_MACOSX
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
|
||||
GetSysctl("hw.cacheconfig", &CacheCounts);
|
||||
|
||||
struct {
|
||||
std::string name;
|
||||
std::string type;
|
||||
int level;
|
||||
} Cases[] = {{"hw.l1dcachesize", "Data", 1},
|
||||
{"hw.l1icachesize", "Instruction", 1},
|
||||
{"hw.l2cachesize", "Unified", 2},
|
||||
{"hw.l3cachesize", "Unified", 3}};
|
||||
size_t num_sharing;
|
||||
} Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
|
||||
{"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
|
||||
{"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
|
||||
{"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
|
||||
for (auto& C : Cases) {
|
||||
int val;
|
||||
if (!GetSysctl(C.name, &val)) continue;
|
||||
|
@ -238,15 +285,67 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
|
|||
info.type = C.type;
|
||||
info.level = C.level;
|
||||
info.size = val;
|
||||
info.num_sharing = static_cast<int>(C.num_sharing);
|
||||
res.push_back(std::move(info));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#elif defined(BENCHMARK_OS_WINDOWS)
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
DWORD buffer_size = 0;
|
||||
using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
|
||||
using CInfo = CACHE_DESCRIPTOR;
|
||||
|
||||
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
|
||||
GetLogicalProcessorInformation(nullptr, &buffer_size);
|
||||
UPtr buff((PInfo*)malloc(buffer_size), &std::free);
|
||||
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
|
||||
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
|
||||
GetLastError());
|
||||
|
||||
PInfo* it = buff.get();
|
||||
PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
|
||||
|
||||
for (; it != end; ++it) {
|
||||
if (it->Relationship != RelationCache) continue;
|
||||
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
|
||||
BitSet B(it->ProcessorMask);
|
||||
// To prevent duplicates, only consider caches where CPU 0 is specified
|
||||
if (!B.test(0)) continue;
|
||||
CInfo* Cache = &it->Cache;
|
||||
CPUInfo::CacheInfo C;
|
||||
C.num_sharing = B.count();
|
||||
C.level = Cache->Level;
|
||||
C.size = Cache->Size;
|
||||
switch (Cache->Type) {
|
||||
case CacheUnified:
|
||||
C.type = "Unified";
|
||||
break;
|
||||
case CacheInstruction:
|
||||
C.type = "Instruction";
|
||||
break;
|
||||
case CacheData:
|
||||
C.type = "Data";
|
||||
break;
|
||||
case CacheTrace:
|
||||
C.type = "Trace";
|
||||
break;
|
||||
default:
|
||||
C.type = "Unknown";
|
||||
break;
|
||||
}
|
||||
res.push_back(C);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
|
||||
#ifdef BENCHMARK_OS_MACOSX
|
||||
return GetCacheSizesMacOSX();
|
||||
#elif defined(BENCHMARK_OS_WINDOWS)
|
||||
return GetCacheSizesWindows();
|
||||
#else
|
||||
return GetCacheSizesFromKVFS();
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,41 @@ ADD_CASES(TC_ConsoleOut,
|
|||
{{"^[-]+$", MR_Next},
|
||||
{"^Benchmark %s Time %s CPU %s Iterations$", MR_Next},
|
||||
{"^[-]+$", MR_Next}});
|
||||
static int AddContextCases() {
|
||||
AddCases(TC_ConsoleErr,
|
||||
{
|
||||
{"%int[-/]%int[-/]%int %int:%int:%int$", MR_Default},
|
||||
{"Run on \\(%int X %float MHz CPU s\\)", MR_Next},
|
||||
});
|
||||
AddCases(TC_JSONOut, {{"^\\{", MR_Default},
|
||||
{"\"context\":", MR_Next},
|
||||
{"\"date\": \"", MR_Next},
|
||||
{"\"num_cpus\": %int,$", MR_Next},
|
||||
{"\"mhz_per_cpu\": %float,$", MR_Next},
|
||||
{"\"cpu_scaling_enabled\": ", MR_Next},
|
||||
{"\"caches\": \\[$", MR_Next}});
|
||||
auto const& Caches = benchmark::CPUInfo::Get().caches;
|
||||
if (!Caches.empty()) {
|
||||
AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}});
|
||||
}
|
||||
for (size_t I = 0; I < Caches.size(); ++I) {
|
||||
std::string num_caches_str =
|
||||
Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$";
|
||||
AddCases(
|
||||
TC_ConsoleErr,
|
||||
{{"L%int (Data|Instruction|Unified) %intK" + num_caches_str, MR_Next}});
|
||||
AddCases(TC_JSONOut, {{"\\{$", MR_Next},
|
||||
{"\"type\": \"", MR_Next},
|
||||
{"\"level\": %int,$", MR_Next},
|
||||
{"\"size\": %int,$", MR_Next},
|
||||
{"\"num_sharing\": %int$", MR_Next},
|
||||
{"}[,]{0,1}$", MR_Next}});
|
||||
}
|
||||
|
||||
AddCases(TC_JSONOut, {{"],$"}});
|
||||
return 0;
|
||||
}
|
||||
int dummy_register = AddContextCases();
|
||||
ADD_CASES(TC_CSVOut, {{"%csv_header"}});
|
||||
|
||||
// ========================================================================= //
|
||||
|
|
Loading…
Reference in New Issue