Add node "status" and "scheduling eligibility" tags to client metrics (#6130)
When summing up the capability of your Nomad fleet for scaling purposes, it's important to exclude draining nodes, as they won't accept new jobs.
This commit is contained in:
parent
5957030d18
commit
49b7d23cea
|
@ -2593,12 +2593,12 @@ func (c *Client) emitStats() {
|
|||
}
|
||||
|
||||
// setGaugeForMemoryStats proxies metrics for memory specific statistics
|
||||
func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats) {
|
||||
func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
|
||||
if !c.config.DisableTaggedMetrics {
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), c.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), c.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), c.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), c.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels)
|
||||
}
|
||||
|
||||
if c.config.BackwardsCompatibleMetrics {
|
||||
|
@ -2610,10 +2610,10 @@ func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats)
|
|||
}
|
||||
|
||||
// setGaugeForCPUStats proxies metrics for CPU specific statistics
|
||||
func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats) {
|
||||
func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
|
||||
for _, cpu := range hStats.CPU {
|
||||
if !c.config.DisableTaggedMetrics {
|
||||
labels := append(c.baseLabels, metrics.Label{
|
||||
labels := append(baseLabels, metrics.Label{
|
||||
Name: "cpu",
|
||||
Value: cpu.CPU,
|
||||
})
|
||||
|
@ -2634,10 +2634,10 @@ func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats) {
|
|||
}
|
||||
|
||||
// setGaugeForDiskStats proxies metrics for disk specific statistics
|
||||
func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats) {
|
||||
func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
|
||||
for _, disk := range hStats.DiskStats {
|
||||
if !c.config.DisableTaggedMetrics {
|
||||
labels := append(c.baseLabels, metrics.Label{
|
||||
labels := append(baseLabels, metrics.Label{
|
||||
Name: "disk",
|
||||
Value: disk.Device,
|
||||
})
|
||||
|
@ -2737,9 +2737,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
|
|||
}
|
||||
|
||||
// No labels are required so we emit with only a key/value syntax
|
||||
func (c *Client) setGaugeForUptime(hStats *stats.HostStats) {
|
||||
func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) {
|
||||
if !c.config.DisableTaggedMetrics {
|
||||
metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), c.baseLabels)
|
||||
metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels)
|
||||
}
|
||||
if c.config.BackwardsCompatibleMetrics {
|
||||
metrics.SetGauge([]string{"client", "uptime"}, float32(hStats.Uptime))
|
||||
|
@ -2750,11 +2750,18 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats) {
|
|||
func (c *Client) emitHostStats() {
|
||||
nodeID := c.NodeID()
|
||||
hStats := c.hostStatsCollector.Stats()
|
||||
node := c.Node()
|
||||
|
||||
c.setGaugeForMemoryStats(nodeID, hStats)
|
||||
c.setGaugeForUptime(hStats)
|
||||
c.setGaugeForCPUStats(nodeID, hStats)
|
||||
c.setGaugeForDiskStats(nodeID, hStats)
|
||||
node.Canonicalize()
|
||||
labels := append(c.baseLabels,
|
||||
metrics.Label{Name: "node_status", Value: node.Status},
|
||||
metrics.Label{Name: "node_scheduling_eligibility", Value: node.SchedulingEligibility},
|
||||
)
|
||||
|
||||
c.setGaugeForMemoryStats(nodeID, hStats, labels)
|
||||
c.setGaugeForUptime(hStats, labels)
|
||||
c.setGaugeForCPUStats(nodeID, hStats, labels)
|
||||
c.setGaugeForDiskStats(nodeID, hStats, labels)
|
||||
}
|
||||
|
||||
// emitClientMetrics emits lower volume client metrics
|
||||
|
|
Loading…
Reference in a new issue