Merge pull request #6349 from hashicorp/b-host-stats

client: Return empty values when host stats fail
This commit is contained in:
Preetha 2019-11-20 10:13:02 -06:00 committed by GitHub
commit be4a51d5b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 15 deletions

View file

@ -1335,10 +1335,14 @@ func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
if ru.ResourceUsage.MemoryStats != nil { if ru.ResourceUsage.MemoryStats != nil {
tr.setGaugeForMemory(ru) tr.setGaugeForMemory(ru)
} else {
tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
} }
if ru.ResourceUsage.CpuStats != nil { if ru.ResourceUsage.CpuStats != nil {
tr.setGaugeForCPU(ru) tr.setGaugeForCPU(ru)
} else {
tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
} }
} }

View file

@ -2592,12 +2592,11 @@ func (c *Client) emitStats() {
next.Reset(c.config.StatsCollectionInterval) next.Reset(c.config.StatsCollectionInterval)
if err != nil { if err != nil {
c.logger.Warn("error fetching host resource usage stats", "error", err) c.logger.Warn("error fetching host resource usage stats", "error", err)
continue } else {
} // Publish Node metrics if operator has opted in
if c.config.PublishNodeMetrics {
// Publish Node metrics if operator has opted in c.emitHostStats()
if c.config.PublishNodeMetrics { }
c.emitHostStats()
} }
c.emitClientMetrics() c.emitClientMetrics()

View file

@ -1,7 +1,6 @@
package stats package stats
import ( import (
"fmt"
"math" "math"
"runtime" "runtime"
"sync" "sync"
@ -117,21 +116,25 @@ func (h *HostStatsCollector) collectLocked() error {
// Determine up-time // Determine up-time
uptime, err := host.Uptime() uptime, err := host.Uptime()
if err != nil { if err != nil {
return err h.logger.Error("failed to collect upstime stats", "error", err)
uptime = 0
} }
hs.Uptime = uptime hs.Uptime = uptime
// Collect memory stats // Collect memory stats
mstats, err := h.collectMemoryStats() mstats, err := h.collectMemoryStats()
if err != nil { if err != nil {
return err h.logger.Error("failed to collect memory stats", "error", err)
mstats = &MemoryStats{}
} }
hs.Memory = mstats hs.Memory = mstats
// Collect cpu stats // Collect cpu stats
cpus, ticks, err := h.collectCPUStats() cpus, ticks, err := h.collectCPUStats()
if err != nil { if err != nil {
return err h.logger.Error("failed to collect cpu stats", "error", err)
cpus = []*CPUStats{}
ticks = 0
} }
hs.CPU = cpus hs.CPU = cpus
hs.CPUTicksConsumed = ticks hs.CPUTicksConsumed = ticks
@ -139,17 +142,19 @@ func (h *HostStatsCollector) collectLocked() error {
// Collect disk stats // Collect disk stats
diskStats, err := h.collectDiskStats() diskStats, err := h.collectDiskStats()
if err != nil { if err != nil {
return err h.logger.Error("failed to collect disk stats", "error", err)
hs.DiskStats = []*DiskStats{}
} }
hs.DiskStats = diskStats hs.DiskStats = diskStats
// Getting the disk stats for the allocation directory // Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir) usage, err := disk.Usage(h.allocDir)
if err != nil { if err != nil {
return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err) h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
hs.AllocDirStats = &DiskStats{}
} else {
hs.AllocDirStats = h.toDiskStats(usage, nil)
} }
hs.AllocDirStats = h.toDiskStats(usage, nil)
// Collect devices stats // Collect devices stats
deviceStats := h.collectDeviceGroupStats() deviceStats := h.collectDeviceGroupStats()
hs.DeviceStats = deviceStats hs.DeviceStats = deviceStats

View file

@ -121,7 +121,7 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
terminal == float32(numTasks), nil terminal == float32(numTasks), nil
}, func(err error) { }, func(err error) {
require.Fail("timed out waiting for metrics to converge", require.Fail("timed out waiting for metrics to converge",
"pending: %v, running: %v, terminal: %v", pending, running, terminal) "expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
}) })
}) })
} }