Merge pull request #6349 from hashicorp/b-host-stats
client: Return empty values when host stats fail
This commit is contained in:
commit
be4a51d5b8
|
@ -1335,10 +1335,14 @@ func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
|
|||
|
||||
if ru.ResourceUsage.MemoryStats != nil {
|
||||
tr.setGaugeForMemory(ru)
|
||||
} else {
|
||||
tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
|
||||
}
|
||||
|
||||
if ru.ResourceUsage.CpuStats != nil {
|
||||
tr.setGaugeForCPU(ru)
|
||||
} else {
|
||||
tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2592,12 +2592,11 @@ func (c *Client) emitStats() {
|
|||
next.Reset(c.config.StatsCollectionInterval)
|
||||
if err != nil {
|
||||
c.logger.Warn("error fetching host resource usage stats", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Publish Node metrics if operator has opted in
|
||||
if c.config.PublishNodeMetrics {
|
||||
c.emitHostStats()
|
||||
} else {
|
||||
// Publish Node metrics if operator has opted in
|
||||
if c.config.PublishNodeMetrics {
|
||||
c.emitHostStats()
|
||||
}
|
||||
}
|
||||
|
||||
c.emitClientMetrics()
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
@ -117,21 +116,25 @@ func (h *HostStatsCollector) collectLocked() error {
|
|||
// Determine up-time
|
||||
uptime, err := host.Uptime()
|
||||
if err != nil {
|
||||
return err
|
||||
h.logger.Error("failed to collect upstime stats", "error", err)
|
||||
uptime = 0
|
||||
}
|
||||
hs.Uptime = uptime
|
||||
|
||||
// Collect memory stats
|
||||
mstats, err := h.collectMemoryStats()
|
||||
if err != nil {
|
||||
return err
|
||||
h.logger.Error("failed to collect memory stats", "error", err)
|
||||
mstats = &MemoryStats{}
|
||||
}
|
||||
hs.Memory = mstats
|
||||
|
||||
// Collect cpu stats
|
||||
cpus, ticks, err := h.collectCPUStats()
|
||||
if err != nil {
|
||||
return err
|
||||
h.logger.Error("failed to collect cpu stats", "error", err)
|
||||
cpus = []*CPUStats{}
|
||||
ticks = 0
|
||||
}
|
||||
hs.CPU = cpus
|
||||
hs.CPUTicksConsumed = ticks
|
||||
|
@ -139,17 +142,19 @@ func (h *HostStatsCollector) collectLocked() error {
|
|||
// Collect disk stats
|
||||
diskStats, err := h.collectDiskStats()
|
||||
if err != nil {
|
||||
return err
|
||||
h.logger.Error("failed to collect disk stats", "error", err)
|
||||
hs.DiskStats = []*DiskStats{}
|
||||
}
|
||||
hs.DiskStats = diskStats
|
||||
|
||||
// Getting the disk stats for the allocation directory
|
||||
usage, err := disk.Usage(h.allocDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err)
|
||||
h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
|
||||
hs.AllocDirStats = &DiskStats{}
|
||||
} else {
|
||||
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
||||
}
|
||||
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
||||
|
||||
// Collect devices stats
|
||||
deviceStats := h.collectDeviceGroupStats()
|
||||
hs.DeviceStats = deviceStats
|
||||
|
|
|
@ -121,7 +121,7 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
|
|||
terminal == float32(numTasks), nil
|
||||
}, func(err error) {
|
||||
require.Fail("timed out waiting for metrics to converge",
|
||||
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
|
||||
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue