Merge pull request #6349 from hashicorp/b-host-stats

client: Return empty values when host stats fail
This commit is contained in:
Preetha 2019-11-20 10:13:02 -06:00 committed by GitHub
commit be4a51d5b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 15 deletions

View file

@ -1335,10 +1335,14 @@ func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
if ru.ResourceUsage.MemoryStats != nil {
tr.setGaugeForMemory(ru)
} else {
tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
}
if ru.ResourceUsage.CpuStats != nil {
tr.setGaugeForCPU(ru)
} else {
tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
}
}

View file

@ -2592,12 +2592,11 @@ func (c *Client) emitStats() {
next.Reset(c.config.StatsCollectionInterval)
if err != nil {
c.logger.Warn("error fetching host resource usage stats", "error", err)
continue
}
// Publish Node metrics if operator has opted in
if c.config.PublishNodeMetrics {
c.emitHostStats()
} else {
// Publish Node metrics if operator has opted in
if c.config.PublishNodeMetrics {
c.emitHostStats()
}
}
c.emitClientMetrics()

View file

@ -1,7 +1,6 @@
package stats
import (
"fmt"
"math"
"runtime"
"sync"
@ -117,21 +116,25 @@ func (h *HostStatsCollector) collectLocked() error {
// Determine up-time
uptime, err := host.Uptime()
if err != nil {
return err
h.logger.Error("failed to collect upstime stats", "error", err)
uptime = 0
}
hs.Uptime = uptime
// Collect memory stats
mstats, err := h.collectMemoryStats()
if err != nil {
return err
h.logger.Error("failed to collect memory stats", "error", err)
mstats = &MemoryStats{}
}
hs.Memory = mstats
// Collect cpu stats
cpus, ticks, err := h.collectCPUStats()
if err != nil {
return err
h.logger.Error("failed to collect cpu stats", "error", err)
cpus = []*CPUStats{}
ticks = 0
}
hs.CPU = cpus
hs.CPUTicksConsumed = ticks
@ -139,17 +142,19 @@ func (h *HostStatsCollector) collectLocked() error {
// Collect disk stats
diskStats, err := h.collectDiskStats()
if err != nil {
return err
h.logger.Error("failed to collect disk stats", "error", err)
hs.DiskStats = []*DiskStats{}
}
hs.DiskStats = diskStats
// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err)
h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
hs.AllocDirStats = &DiskStats{}
} else {
hs.AllocDirStats = h.toDiskStats(usage, nil)
}
hs.AllocDirStats = h.toDiskStats(usage, nil)
// Collect devices stats
deviceStats := h.collectDeviceGroupStats()
hs.DeviceStats = deviceStats

View file

@ -121,7 +121,7 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
terminal == float32(numTasks), nil
}, func(err error) {
require.Fail("timed out waiting for metrics to converge",
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
})
})
}