Merge pull request #3167 from hashicorp/b-windows-stats

Fix invalid CPU stats on Windows
This commit is contained in:
Alex Dadgar 2017-09-10 15:33:04 -07:00 committed by GitHub
commit cf274cad3c
4 changed files with 201 additions and 66 deletions

View File

@ -1,8 +1,14 @@
package stats
import (
"log"
"math"
"os"
"testing"
"time"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/stretchr/testify/assert"
)
func TestCpuStatsPercent(t *testing.T) {
@ -15,3 +21,39 @@ func TestCpuStatsPercent(t *testing.T) {
t.Fatalf("expected: %v, actual: %v", expectedPercent, percent)
}
}
func TestHostStats_CPU(t *testing.T) {
assert := assert.New(t)
assert.Nil(shelpers.Init())
logger := log.New(os.Stderr, "", log.LstdFlags|log.Lmicroseconds)
cwd, err := os.Getwd()
assert.Nil(err)
hs := NewHostStatsCollector(logger, cwd)
// Collect twice so we can calculate percents we need to generate some work
// so that the cpu values change
assert.Nil(hs.Collect())
total := 0
for i := 1; i < 1000000000; i++ {
total *= i
total = total % i
}
assert.Nil(hs.Collect())
stats := hs.Stats()
assert.NotZero(stats.CPUTicksConsumed)
assert.NotZero(len(stats.CPU))
for _, cpu := range stats.CPU {
assert.False(math.IsNaN(cpu.Idle))
assert.False(math.IsNaN(cpu.Total))
assert.False(math.IsNaN(cpu.System))
assert.False(math.IsNaN(cpu.User))
assert.False(math.IsInf(cpu.Idle, 0))
assert.False(math.IsInf(cpu.Total, 0))
assert.False(math.IsInf(cpu.System, 0))
assert.False(math.IsInf(cpu.User, 0))
}
}

36
client/stats/cpu_unix.go Normal file
View File

@ -0,0 +1,36 @@
// +build !windows
package stats
import (
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/shirou/gopsutil/cpu"
)
func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
ticksConsumed := 0.0
cpuStats, err := cpu.Times(true)
if err != nil {
return nil, 0.0, err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
System: system,
Idle: idle,
Total: total,
}
ticksConsumed += (total / 100.0) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
}
return cs, ticksConsumed, nil
}

View File

@ -0,0 +1,53 @@
// +build windows
package stats
import (
"fmt"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/shirou/gopsutil/cpu"
)
func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
// Get the per cpu stats
cpuStats, err := cpu.Times(true)
if err != nil {
return nil, 0.0, err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
// On windows they are already in percent
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: cpuStat.User,
System: cpuStat.System,
Idle: cpuStat.Idle,
Total: cpuStat.Total(),
}
}
// Get the number of ticks
allCpu, err := cpu.Times(false)
if err != nil {
return nil, 0.0, err
}
if len(allCpu) != 1 {
return nil, 0.0, fmt.Errorf("unexpected number of cpus (%d)", len(allCpu))
}
// We use the calculator because when retrieving against all cpus it is
// returned as ticks.
all := allCpu[0]
percentCalculator, ok := h.statsCalculator[all.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[all.CPU] = percentCalculator
}
_, _, _, total := percentCalculator.Calculate(all)
ticks := (total / 100) * shelpers.TotalTicksAvailable()
return cs, ticks, nil
}

View File

@ -11,8 +11,6 @@ import (
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/mem"
shelpers "github.com/hashicorp/nomad/helper/stats"
)
// HostStats represents resource usage stats of the host running a Nomad client
@ -95,46 +93,72 @@ func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollec
// Collect collects stats related to resource usage of a host
func (h *HostStatsCollector) Collect() error {
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
memStats, err := mem.VirtualMemory()
// Determine up-time
uptime, err := host.Uptime()
if err != nil {
return err
}
hs.Memory = &MemoryStats{
hs.Uptime = uptime
// Collect memory stats
mstats, err := h.collectMemoryStats()
if err != nil {
return err
}
hs.Memory = mstats
// Collect cpu stats
cpus, ticks, err := h.collectCPUStats()
if err != nil {
return err
}
hs.CPU = cpus
hs.CPUTicksConsumed = ticks
// Collect disk stats
diskStats, err := h.collectDiskStats()
if err != nil {
return err
}
hs.DiskStats = diskStats
// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return err
}
hs.AllocDirStats = h.toDiskStats(usage, nil)
// Update the collected status object.
h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()
return nil
}
func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
memStats, err := mem.VirtualMemory()
if err != nil {
return nil, err
}
mem := &MemoryStats{
Total: memStats.Total,
Available: memStats.Available,
Used: memStats.Used,
Free: memStats.Free,
}
ticksConsumed := 0.0
cpuStats, err := cpu.Times(true)
if err != nil {
return err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
System: system,
Idle: idle,
Total: total,
}
ticksConsumed += (total / 100) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
}
hs.CPU = cs
hs.CPUTicksConsumed = ticksConsumed
return mem, nil
}
func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
partitions, err := disk.Partitions(false)
if err != nil {
return err
return nil, err
}
var diskStats []*DiskStats
for _, partition := range partitions {
usage, err := disk.Usage(partition.Mountpoint)
@ -153,25 +177,8 @@ func (h *HostStatsCollector) Collect() error {
ds := h.toDiskStats(usage, &partition)
diskStats = append(diskStats, ds)
}
hs.DiskStats = diskStats
// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return err
}
hs.AllocDirStats = h.toDiskStats(usage, nil)
uptime, err := host.Uptime()
if err != nil {
return err
}
hs.Uptime = uptime
h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()
return nil
return diskStats, nil
}
// Stats returns the host stats that has been collected
@ -228,28 +235,26 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
currentUser := times.User
currentSystem := times.System
currentTotal := times.Total()
deltaTotal := currentTotal - h.prevTotal
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
if math.IsNaN(idle) {
idle = 100.0
}
user = ((currentUser - h.prevUser) / deltaTotal) * 100
if math.IsNaN(user) {
user = 0.0
}
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
if math.IsNaN(system) {
system = 0.0
}
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
deltaTotal := currentTotal - h.prevTotal
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
user = ((currentUser - h.prevUser) / deltaTotal) * 100
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
if math.IsNaN(total) {
// Protect against any invalid values
if math.IsNaN(idle) || math.IsInf(idle, 0) {
idle = 100.0
}
if math.IsNaN(user) || math.IsInf(user, 0) {
user = 0.0
}
if math.IsNaN(system) || math.IsInf(system, 0) {
system = 0.0
}
if math.IsNaN(total) || math.IsInf(total, 0) {
total = 0.0
}
@ -258,6 +263,5 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
h.prevSystem = currentSystem
h.prevTotal = currentTotal
h.prevBusy = currentBusy
return
}