Merge pull request #6349 from hashicorp/b-host-stats
client: Return empty values when host stats fail
This commit is contained in:
commit
be4a51d5b8
|
@ -1335,10 +1335,14 @@ func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
|
||||||
|
|
||||||
if ru.ResourceUsage.MemoryStats != nil {
|
if ru.ResourceUsage.MemoryStats != nil {
|
||||||
tr.setGaugeForMemory(ru)
|
tr.setGaugeForMemory(ru)
|
||||||
|
} else {
|
||||||
|
tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
if ru.ResourceUsage.CpuStats != nil {
|
if ru.ResourceUsage.CpuStats != nil {
|
||||||
tr.setGaugeForCPU(ru)
|
tr.setGaugeForCPU(ru)
|
||||||
|
} else {
|
||||||
|
tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2592,12 +2592,11 @@ func (c *Client) emitStats() {
|
||||||
next.Reset(c.config.StatsCollectionInterval)
|
next.Reset(c.config.StatsCollectionInterval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.logger.Warn("error fetching host resource usage stats", "error", err)
|
c.logger.Warn("error fetching host resource usage stats", "error", err)
|
||||||
continue
|
} else {
|
||||||
}
|
// Publish Node metrics if operator has opted in
|
||||||
|
if c.config.PublishNodeMetrics {
|
||||||
// Publish Node metrics if operator has opted in
|
c.emitHostStats()
|
||||||
if c.config.PublishNodeMetrics {
|
}
|
||||||
c.emitHostStats()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c.emitClientMetrics()
|
c.emitClientMetrics()
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package stats
|
package stats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"math"
|
"math"
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -117,21 +116,25 @@ func (h *HostStatsCollector) collectLocked() error {
|
||||||
// Determine up-time
|
// Determine up-time
|
||||||
uptime, err := host.Uptime()
|
uptime, err := host.Uptime()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
h.logger.Error("failed to collect upstime stats", "error", err)
|
||||||
|
uptime = 0
|
||||||
}
|
}
|
||||||
hs.Uptime = uptime
|
hs.Uptime = uptime
|
||||||
|
|
||||||
// Collect memory stats
|
// Collect memory stats
|
||||||
mstats, err := h.collectMemoryStats()
|
mstats, err := h.collectMemoryStats()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
h.logger.Error("failed to collect memory stats", "error", err)
|
||||||
|
mstats = &MemoryStats{}
|
||||||
}
|
}
|
||||||
hs.Memory = mstats
|
hs.Memory = mstats
|
||||||
|
|
||||||
// Collect cpu stats
|
// Collect cpu stats
|
||||||
cpus, ticks, err := h.collectCPUStats()
|
cpus, ticks, err := h.collectCPUStats()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
h.logger.Error("failed to collect cpu stats", "error", err)
|
||||||
|
cpus = []*CPUStats{}
|
||||||
|
ticks = 0
|
||||||
}
|
}
|
||||||
hs.CPU = cpus
|
hs.CPU = cpus
|
||||||
hs.CPUTicksConsumed = ticks
|
hs.CPUTicksConsumed = ticks
|
||||||
|
@ -139,17 +142,19 @@ func (h *HostStatsCollector) collectLocked() error {
|
||||||
// Collect disk stats
|
// Collect disk stats
|
||||||
diskStats, err := h.collectDiskStats()
|
diskStats, err := h.collectDiskStats()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
h.logger.Error("failed to collect disk stats", "error", err)
|
||||||
|
hs.DiskStats = []*DiskStats{}
|
||||||
}
|
}
|
||||||
hs.DiskStats = diskStats
|
hs.DiskStats = diskStats
|
||||||
|
|
||||||
// Getting the disk stats for the allocation directory
|
// Getting the disk stats for the allocation directory
|
||||||
usage, err := disk.Usage(h.allocDir)
|
usage, err := disk.Usage(h.allocDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err)
|
h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
|
||||||
|
hs.AllocDirStats = &DiskStats{}
|
||||||
|
} else {
|
||||||
|
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
||||||
}
|
}
|
||||||
hs.AllocDirStats = h.toDiskStats(usage, nil)
|
|
||||||
|
|
||||||
// Collect devices stats
|
// Collect devices stats
|
||||||
deviceStats := h.collectDeviceGroupStats()
|
deviceStats := h.collectDeviceGroupStats()
|
||||||
hs.DeviceStats = deviceStats
|
hs.DeviceStats = deviceStats
|
||||||
|
|
|
@ -121,7 +121,7 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
|
||||||
terminal == float32(numTasks), nil
|
terminal == float32(numTasks), nil
|
||||||
}, func(err error) {
|
}, func(err error) {
|
||||||
require.Fail("timed out waiting for metrics to converge",
|
require.Fail("timed out waiting for metrics to converge",
|
||||||
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
|
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue