open-nomad/client/fingerprint/cpu.go
Seth Hoenig 87f4b71df0
client/fingerprint: correctly fingerprint E/P cores of Apple Silicon chips (#16672)
* client/fingerprint: correctly fingerprint E/P cores of Apple Silicon chips

This PR adds detection of asymetric core types (Power & Efficiency) (P/E)
when running on M1/M2 Apple Silicon CPUs. This functionality is provided
by shoenig/go-m1cpu which makes use of the Apple IOKit framework to read
undocumented registers containing CPU performance data. Currently working
on getting that functionality merged upstream into gopsutil, but gopsutil
would still not support detecting P vs E cores like this PR does.

Also refactors the CPUFingerprinter code to handle the mixed core
types, now setting power vs efficiency cpu attributes.

For now the scheduler is still unaware of mixed core types - on Apple
platforms tasks cannot reserve cores anyway so it doesn't matter, but
at least now the total CPU shares available will be correct.

Future work should include adding support for detecting P/E cores on
the latest and upcoming Intel chips, where computation of total cpu shares
is currently incorrect. For that, we should also include updating the
scheduler to be core-type aware, so that tasks of resources.cores on Linux
platforms can be assigned the correct number of CPU shares for the core
type(s) they have been assigned.

node attributes before

cpu.arch                  = arm64
cpu.modelname             = Apple M2 Pro
cpu.numcores              = 12
cpu.reservablecores       = 0
cpu.totalcompute          = 1000

node attributes after

cpu.arch                  = arm64
cpu.frequency.efficiency  = 2424
cpu.frequency.power       = 3504
cpu.modelname             = Apple M2 Pro
cpu.numcores.efficiency   = 4
cpu.numcores.power        = 8
cpu.reservablecores       = 0
cpu.totalcompute          = 37728

* fingerprint/cpu: follow up cr items
2023-03-28 08:27:58 -05:00

150 lines
4.6 KiB
Go

package fingerprint
import (
"fmt"
"strconv"
"github.com/hashicorp/nomad/lib/cpuset"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// defaultCPUTicks is the default amount of CPU resources assumed to be
// available if the CPU performance data is unable to be detected. This is
// common on EC2 instances, where the env_aws fingerprinter will follow up,
// setting an accurate value.
defaultCPUTicks = 1000 // 1 core * 1 GHz
)
// CPUFingerprint is used to fingerprint the CPU
type CPUFingerprint struct {
StaticFingerprinter
logger hclog.Logger
// accumulates result in these resource structs
resources *structs.Resources
nodeResources *structs.NodeResources
}
// NewCPUFingerprint is used to create a CPU fingerprint
func NewCPUFingerprint(logger hclog.Logger) Fingerprint {
return &CPUFingerprint{
logger: logger.Named("cpu"),
resources: new(structs.Resources), // COMPAT (to be removed after 0.10)
nodeResources: new(structs.NodeResources),
}
}
func (f *CPUFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error {
f.initialize()
f.setModelName(response)
f.setFrequency(response)
f.setCoreCount(response)
f.setReservableCores(request, response)
f.setTotalCompute(request, response)
f.setResponseResources(response)
response.Detected = true
return nil
}
func (f *CPUFingerprint) initialize() {
if err := stats.Init(); err != nil {
f.logger.Warn("failed initializing stats collector", "error", err)
}
}
func (f *CPUFingerprint) setModelName(response *FingerprintResponse) {
if modelName := stats.CPUModelName(); modelName != "" {
response.AddAttribute("cpu.modelname", modelName)
f.logger.Debug("detected CPU model", "name", modelName)
}
}
func (*CPUFingerprint) frequency(mhz uint64) string {
return fmt.Sprintf("%.0f", float64(mhz))
}
func (f *CPUFingerprint) setFrequency(response *FingerprintResponse) {
power, efficiency := stats.CPUMHzPerCore()
switch {
case efficiency > 0:
response.AddAttribute("cpu.frequency.efficiency", f.frequency(efficiency))
response.AddAttribute("cpu.frequency.power", f.frequency(power))
f.logger.Debug("detected CPU efficiency core speed", "mhz", efficiency)
f.logger.Debug("detected CPU power core speed", "mhz", power)
case power > 0:
response.AddAttribute("cpu.frequency", f.frequency(power))
f.logger.Debug("detected CPU frequency", "mhz", power)
}
}
func (*CPUFingerprint) cores(count int) string {
return strconv.Itoa(count)
}
func (f *CPUFingerprint) setCoreCount(response *FingerprintResponse) {
power, efficiency := stats.CPUNumCores()
switch {
case efficiency > 0:
response.AddAttribute("cpu.numcores.efficiency", f.cores(efficiency))
response.AddAttribute("cpu.numcores.power", f.cores(power))
f.logger.Debug("detected CPU efficiency core count", "cores", efficiency)
f.logger.Debug("detected CPU power core count", "cores", power)
case power > 0:
response.AddAttribute("cpu.numcores", f.cores(power))
f.logger.Debug("detected CPU core count", power)
}
f.nodeResources.Cpu.TotalCpuCores = uint16(power + efficiency)
}
func (f *CPUFingerprint) setReservableCores(request *FingerprintRequest, response *FingerprintResponse) {
reservable := request.Config.ReservableCores
if len(reservable) > 0 {
f.logger.Debug("reservable cores set by config", "cpuset", reservable)
} else {
cgroupParent := request.Config.CgroupParent
if reservable = f.deriveReservableCores(cgroupParent); reservable != nil {
if request.Node.ReservedResources != nil {
forNode := request.Node.ReservedResources.Cpu.ReservedCpuCores
reservable = cpuset.New(reservable...).Difference(cpuset.New(forNode...)).ToSlice()
f.logger.Debug("client configuration reserves these cores for node", "cores", forNode)
}
f.logger.Debug("set of reservable cores available for tasks", "cores", reservable)
}
}
response.AddAttribute("cpu.reservablecores", strconv.Itoa(len(reservable)))
f.nodeResources.Cpu.ReservableCpuCores = reservable
}
func (f *CPUFingerprint) setTotalCompute(request *FingerprintRequest, response *FingerprintResponse) {
var ticks uint64
switch {
case request.Config.CpuCompute > 0:
ticks = uint64(request.Config.CpuCompute)
case stats.TotalTicksAvailable() > 0:
ticks = stats.TotalTicksAvailable()
default:
ticks = defaultCPUTicks
}
response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))
f.resources.CPU = int(ticks)
f.nodeResources.Cpu.CpuShares = int64(ticks)
}
func (f *CPUFingerprint) setResponseResources(response *FingerprintResponse) {
response.Resources = f.resources
response.NodeResources = f.nodeResources
}