Add node "status", "scheduling eligibility" to all client metrics (#8925)

- We previously added these to the client host metrics, but it's useful to have them on all client metrics.
- e.g. so you can exclude draining nodes from charts showing your fleet size.
This commit is contained in:
Pete Woods 2020-09-22 18:53:50 +01:00 committed by GitHub
parent f749de8543
commit 81fa2a01fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2842,7 +2842,7 @@ func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, ba
} }
// setGaugeForAllocationStats proxies metrics for allocation specific statistics // setGaugeForAllocationStats proxies metrics for allocation specific statistics
func (c *Client) setGaugeForAllocationStats(nodeID string) { func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.Label) {
c.configLock.RLock() c.configLock.RLock()
node := c.configCopy.Node node := c.configCopy.Node
c.configLock.RUnlock() c.configLock.RUnlock()
@ -2852,9 +2852,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
// Emit allocated // Emit allocated
if !c.config.DisableTaggedMetrics { if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels)
} }
if c.config.BackwardsCompatibleMetrics { if c.config.BackwardsCompatibleMetrics {
@ -2865,7 +2865,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
for _, n := range allocated.Flattened.Networks { for _, n := range allocated.Flattened.Networks {
if !c.config.DisableTaggedMetrics { if !c.config.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{ labels := append(baseLabels, metrics.Label{
Name: "device", Name: "device",
Value: n.Device, Value: n.Device,
}) })
@ -2883,9 +2883,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares
if !c.config.DisableTaggedMetrics { if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels)
} }
if c.config.BackwardsCompatibleMetrics { if c.config.BackwardsCompatibleMetrics {
@ -2905,7 +2905,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
unallocatedMbits := n.MBits - usedMbits unallocatedMbits := n.MBits - usedMbits
if !c.config.DisableTaggedMetrics { if !c.config.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{ labels := append(baseLabels, metrics.Label{
Name: "device", Name: "device",
Value: n.Device, Value: n.Device,
}) })
@ -2932,16 +2932,7 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics
func (c *Client) emitHostStats() { func (c *Client) emitHostStats() {
nodeID := c.NodeID() nodeID := c.NodeID()
hStats := c.hostStatsCollector.Stats() hStats := c.hostStatsCollector.Stats()
labels := c.labels()
c.configLock.RLock()
nodeStatus := c.configCopy.Node.Status
nodeEligibility := c.configCopy.Node.SchedulingEligibility
c.configLock.RUnlock()
labels := append(c.baseLabels,
metrics.Label{Name: "node_status", Value: nodeStatus},
metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility},
)
c.setGaugeForMemoryStats(nodeID, hStats, labels) c.setGaugeForMemoryStats(nodeID, hStats, labels)
c.setGaugeForUptime(hStats, labels) c.setGaugeForUptime(hStats, labels)
@ -2952,8 +2943,9 @@ func (c *Client) emitHostStats() {
// emitClientMetrics emits lower volume client metrics // emitClientMetrics emits lower volume client metrics
func (c *Client) emitClientMetrics() { func (c *Client) emitClientMetrics() {
nodeID := c.NodeID() nodeID := c.NodeID()
labels := c.labels()
c.setGaugeForAllocationStats(nodeID) c.setGaugeForAllocationStats(nodeID, labels)
// Emit allocation metrics // Emit allocation metrics
blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0 blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0
@ -2976,11 +2968,11 @@ func (c *Client) emitClientMetrics() {
} }
if !c.config.DisableTaggedMetrics { if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), c.baseLabels) metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels)
} }
if c.config.BackwardsCompatibleMetrics { if c.config.BackwardsCompatibleMetrics {
@ -2992,6 +2984,19 @@ func (c *Client) emitClientMetrics() {
} }
} }
// labels takes the base labels and appends the node state
func (c *Client) labels() []metrics.Label {
c.configLock.RLock()
nodeStatus := c.configCopy.Node.Status
nodeEligibility := c.configCopy.Node.SchedulingEligibility
c.configLock.RUnlock()
return append(c.baseLabels,
metrics.Label{Name: "node_status", Value: nodeStatus},
metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility},
)
}
func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources { func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources {
// Unfortunately the allocs only have IP so we need to match them to the // Unfortunately the allocs only have IP so we need to match them to the
// device // device