Merge pull request #3167 from hashicorp/b-windows-stats
Fix invalid CPU stats on Windows
This commit is contained in:
commit
cf274cad3c
|
@ -1,8 +1,14 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestCpuStatsPercent(t *testing.T) {
|
||||
|
@ -15,3 +21,39 @@ func TestCpuStatsPercent(t *testing.T) {
|
|||
t.Fatalf("expected: %v, actual: %v", expectedPercent, percent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostStats_CPU(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
assert.Nil(shelpers.Init())
|
||||
|
||||
logger := log.New(os.Stderr, "", log.LstdFlags|log.Lmicroseconds)
|
||||
cwd, err := os.Getwd()
|
||||
assert.Nil(err)
|
||||
hs := NewHostStatsCollector(logger, cwd)
|
||||
|
||||
// Collect twice so we can calculate percents we need to generate some work
|
||||
// so that the cpu values change
|
||||
assert.Nil(hs.Collect())
|
||||
total := 0
|
||||
for i := 1; i < 1000000000; i++ {
|
||||
total *= i
|
||||
total = total % i
|
||||
}
|
||||
assert.Nil(hs.Collect())
|
||||
stats := hs.Stats()
|
||||
|
||||
assert.NotZero(stats.CPUTicksConsumed)
|
||||
assert.NotZero(len(stats.CPU))
|
||||
|
||||
for _, cpu := range stats.CPU {
|
||||
assert.False(math.IsNaN(cpu.Idle))
|
||||
assert.False(math.IsNaN(cpu.Total))
|
||||
assert.False(math.IsNaN(cpu.System))
|
||||
assert.False(math.IsNaN(cpu.User))
|
||||
|
||||
assert.False(math.IsInf(cpu.Idle, 0))
|
||||
assert.False(math.IsInf(cpu.Total, 0))
|
||||
assert.False(math.IsInf(cpu.System, 0))
|
||||
assert.False(math.IsInf(cpu.User, 0))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
// +build !windows
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
)
|
||||
|
||||
func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
|
||||
|
||||
ticksConsumed := 0.0
|
||||
cpuStats, err := cpu.Times(true)
|
||||
if err != nil {
|
||||
return nil, 0.0, err
|
||||
}
|
||||
cs := make([]*CPUStats, len(cpuStats))
|
||||
for idx, cpuStat := range cpuStats {
|
||||
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
|
||||
if !ok {
|
||||
percentCalculator = NewHostCpuStatsCalculator()
|
||||
h.statsCalculator[cpuStat.CPU] = percentCalculator
|
||||
}
|
||||
idle, user, system, total := percentCalculator.Calculate(cpuStat)
|
||||
cs[idx] = &CPUStats{
|
||||
CPU: cpuStat.CPU,
|
||||
User: user,
|
||||
System: system,
|
||||
Idle: idle,
|
||||
Total: total,
|
||||
}
|
||||
ticksConsumed += (total / 100.0) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
|
||||
}
|
||||
|
||||
return cs, ticksConsumed, nil
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// +build windows
|
||||
|
||||
package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
)
|
||||
|
||||
func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
|
||||
// Get the per cpu stats
|
||||
cpuStats, err := cpu.Times(true)
|
||||
if err != nil {
|
||||
return nil, 0.0, err
|
||||
}
|
||||
|
||||
cs := make([]*CPUStats, len(cpuStats))
|
||||
for idx, cpuStat := range cpuStats {
|
||||
|
||||
// On windows they are already in percent
|
||||
cs[idx] = &CPUStats{
|
||||
CPU: cpuStat.CPU,
|
||||
User: cpuStat.User,
|
||||
System: cpuStat.System,
|
||||
Idle: cpuStat.Idle,
|
||||
Total: cpuStat.Total(),
|
||||
}
|
||||
}
|
||||
|
||||
// Get the number of ticks
|
||||
allCpu, err := cpu.Times(false)
|
||||
if err != nil {
|
||||
return nil, 0.0, err
|
||||
}
|
||||
if len(allCpu) != 1 {
|
||||
return nil, 0.0, fmt.Errorf("unexpected number of cpus (%d)", len(allCpu))
|
||||
}
|
||||
|
||||
// We use the calculator because when retrieving against all cpus it is
|
||||
// returned as ticks.
|
||||
all := allCpu[0]
|
||||
percentCalculator, ok := h.statsCalculator[all.CPU]
|
||||
if !ok {
|
||||
percentCalculator = NewHostCpuStatsCalculator()
|
||||
h.statsCalculator[all.CPU] = percentCalculator
|
||||
}
|
||||
_, _, _, total := percentCalculator.Calculate(all)
|
||||
ticks := (total / 100) * shelpers.TotalTicksAvailable()
|
||||
|
||||
return cs, ticks, nil
|
||||
}
|
|
@ -11,8 +11,6 @@ import (
|
|||
"github.com/shirou/gopsutil/disk"
|
||||
"github.com/shirou/gopsutil/host"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
)
|
||||
|
||||
// HostStats represents resource usage stats of the host running a Nomad client
|
||||
|
@ -95,46 +93,72 @@ func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollec
|
|||
// Collect collects stats related to resource usage of a host
|
||||
func (h *HostStatsCollector) Collect() error {
|
||||
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
|
||||
memStats, err := mem.VirtualMemory()
|
||||
|
||||
// Determine up-time
|
||||
uptime, err := host.Uptime()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.Memory = &MemoryStats{
|
||||
hs.Uptime = uptime
|
||||
|
||||
// Collect memory stats
|
||||
mstats, err := h.collectMemoryStats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.Memory = mstats
|
||||
|
||||
// Collect cpu stats
|
||||
cpus, ticks, err := h.collectCPUStats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.CPU = cpus
|
||||
hs.CPUTicksConsumed = ticks
|
||||
|
||||
// Collect disk stats
|
||||
diskStats, err := h.collectDiskStats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.DiskStats = diskStats
|
||||
|
||||
// Getting the disk stats for the allocation directory
|
||||
usage, err := disk.Usage(h.allocDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
||||
|
||||
// Update the collected status object.
|
||||
h.hostStatsLock.Lock()
|
||||
h.hostStats = hs
|
||||
h.hostStatsLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
|
||||
memStats, err := mem.VirtualMemory()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mem := &MemoryStats{
|
||||
Total: memStats.Total,
|
||||
Available: memStats.Available,
|
||||
Used: memStats.Used,
|
||||
Free: memStats.Free,
|
||||
}
|
||||
|
||||
ticksConsumed := 0.0
|
||||
cpuStats, err := cpu.Times(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cs := make([]*CPUStats, len(cpuStats))
|
||||
for idx, cpuStat := range cpuStats {
|
||||
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
|
||||
if !ok {
|
||||
percentCalculator = NewHostCpuStatsCalculator()
|
||||
h.statsCalculator[cpuStat.CPU] = percentCalculator
|
||||
}
|
||||
idle, user, system, total := percentCalculator.Calculate(cpuStat)
|
||||
cs[idx] = &CPUStats{
|
||||
CPU: cpuStat.CPU,
|
||||
User: user,
|
||||
System: system,
|
||||
Idle: idle,
|
||||
Total: total,
|
||||
}
|
||||
ticksConsumed += (total / 100) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
|
||||
}
|
||||
hs.CPU = cs
|
||||
hs.CPUTicksConsumed = ticksConsumed
|
||||
return mem, nil
|
||||
}
|
||||
|
||||
func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
|
||||
partitions, err := disk.Partitions(false)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var diskStats []*DiskStats
|
||||
for _, partition := range partitions {
|
||||
usage, err := disk.Usage(partition.Mountpoint)
|
||||
|
@ -153,25 +177,8 @@ func (h *HostStatsCollector) Collect() error {
|
|||
ds := h.toDiskStats(usage, &partition)
|
||||
diskStats = append(diskStats, ds)
|
||||
}
|
||||
hs.DiskStats = diskStats
|
||||
|
||||
// Getting the disk stats for the allocation directory
|
||||
usage, err := disk.Usage(h.allocDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
||||
|
||||
uptime, err := host.Uptime()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hs.Uptime = uptime
|
||||
|
||||
h.hostStatsLock.Lock()
|
||||
h.hostStats = hs
|
||||
h.hostStatsLock.Unlock()
|
||||
return nil
|
||||
return diskStats, nil
|
||||
}
|
||||
|
||||
// Stats returns the host stats that has been collected
|
||||
|
@ -228,28 +235,26 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
|
|||
currentUser := times.User
|
||||
currentSystem := times.System
|
||||
currentTotal := times.Total()
|
||||
|
||||
deltaTotal := currentTotal - h.prevTotal
|
||||
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
|
||||
if math.IsNaN(idle) {
|
||||
idle = 100.0
|
||||
}
|
||||
|
||||
user = ((currentUser - h.prevUser) / deltaTotal) * 100
|
||||
if math.IsNaN(user) {
|
||||
user = 0.0
|
||||
}
|
||||
|
||||
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
|
||||
if math.IsNaN(system) {
|
||||
system = 0.0
|
||||
}
|
||||
|
||||
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
|
||||
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
|
||||
|
||||
deltaTotal := currentTotal - h.prevTotal
|
||||
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
|
||||
user = ((currentUser - h.prevUser) / deltaTotal) * 100
|
||||
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
|
||||
total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
|
||||
if math.IsNaN(total) {
|
||||
|
||||
// Protect against any invalid values
|
||||
if math.IsNaN(idle) || math.IsInf(idle, 0) {
|
||||
idle = 100.0
|
||||
}
|
||||
if math.IsNaN(user) || math.IsInf(user, 0) {
|
||||
user = 0.0
|
||||
}
|
||||
if math.IsNaN(system) || math.IsInf(system, 0) {
|
||||
system = 0.0
|
||||
}
|
||||
if math.IsNaN(total) || math.IsInf(total, 0) {
|
||||
total = 0.0
|
||||
}
|
||||
|
||||
|
@ -258,6 +263,5 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
|
|||
h.prevSystem = currentSystem
|
||||
h.prevTotal = currentTotal
|
||||
h.prevBusy = currentBusy
|
||||
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue