removed backwards-compatible/untagged metrics deprecated in 0.7

This commit is contained in:
Chris Baker 2020-10-13 19:56:54 +00:00
parent 517bc3c17b
commit 1d35578bed
16 changed files with 150 additions and 339 deletions

View File

@ -674,7 +674,7 @@ func (tr *TaskRunner) emitExitResultEvent(result *drivers.ExitResult) {
tr.EmitEvent(event)
if result.OOMKilled && !tr.clientConfig.DisableTaggedMetrics {
if result.OOMKilled {
metrics.IncrCounterWithLabels([]string{"client", "allocs", "oom_killed"}, 1, tr.baseLabels)
}
}
@ -1115,12 +1115,7 @@ func (tr *TaskRunner) updateStateImpl(state string) error {
// Capture the start time if it is just starting
if oldState != structs.TaskStateRunning {
taskState.StartedAt = time.Now().UTC()
if !tr.clientConfig.DisableTaggedMetrics {
metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels)
}
//if r.config.BackwardsCompatibleMetrics {
//metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1)
//}
metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels)
}
case structs.TaskStateDead:
// Capture the finished time if not already set
@ -1130,19 +1125,9 @@ func (tr *TaskRunner) updateStateImpl(state string) error {
// Emitting metrics to indicate task complete and failures
if taskState.Failed {
if !tr.clientConfig.DisableTaggedMetrics {
metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels)
}
//if r.config.BackwardsCompatibleMetrics {
//metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1)
//}
metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels)
} else {
if !tr.clientConfig.DisableTaggedMetrics {
metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels)
}
//if r.config.BackwardsCompatibleMetrics {
//metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1)
//}
metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels)
}
}
@ -1202,12 +1187,7 @@ func (tr *TaskRunner) appendEvent(event *structs.TaskEvent) error {
// XXX This seems like a super awkward spot for this? Why not shouldRestart?
// Update restart metrics
if event.Type == structs.TaskRestarting {
if !tr.clientConfig.DisableTaggedMetrics {
metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels)
}
//if r.config.BackwardsCompatibleMetrics {
//metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1)
//}
metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels)
tr.state.Restarts++
tr.state.LastRestart = time.Unix(0, event.Time)
}
@ -1322,38 +1302,23 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024
}
if !tr.clientConfig.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"},
float32(ru.ResourceUsage.MemoryStats.RSS), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "cache"},
float32(ru.ResourceUsage.MemoryStats.Cache), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "swap"},
float32(ru.ResourceUsage.MemoryStats.Swap), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "usage"},
float32(ru.ResourceUsage.MemoryStats.Usage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_usage"},
float32(ru.ResourceUsage.MemoryStats.MaxUsage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_usage"},
float32(ru.ResourceUsage.MemoryStats.KernelUsage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"},
float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), tr.baseLabels)
if allocatedMem > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
allocatedMem, tr.baseLabels)
}
}
if tr.clientConfig.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "usage"}, float32(ru.ResourceUsage.MemoryStats.Usage))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
if allocatedMem > 0 {
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "allocated"}, allocatedMem)
}
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"},
float32(ru.ResourceUsage.MemoryStats.RSS), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "cache"},
float32(ru.ResourceUsage.MemoryStats.Cache), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "swap"},
float32(ru.ResourceUsage.MemoryStats.Swap), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "usage"},
float32(ru.ResourceUsage.MemoryStats.Usage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_usage"},
float32(ru.ResourceUsage.MemoryStats.MaxUsage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_usage"},
float32(ru.ResourceUsage.MemoryStats.KernelUsage), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"},
float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), tr.baseLabels)
if allocatedMem > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
allocatedMem, tr.baseLabels)
}
}
@ -1365,35 +1330,21 @@ func (tr *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) {
allocatedCPU = float32(taskRes.Cpu.CpuShares)
}
if !tr.clientConfig.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"},
float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"},
float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"},
float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"},
float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"},
float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"},
float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels)
if allocatedCPU > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"},
allocatedCPU, tr.baseLabels)
}
}
if tr.clientConfig.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks))
if allocatedCPU > 0 {
metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "allocated"}, allocatedCPU)
}
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"},
float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"},
float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"},
float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"},
float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"},
float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"},
float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels)
if allocatedCPU > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"},
allocatedCPU, tr.baseLabels)
}
}

View File

@ -18,6 +18,10 @@ import (
"github.com/hashicorp/consul/lib"
hclog "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
vaultapi "github.com/hashicorp/vault/api"
"github.com/pkg/errors"
"github.com/shirou/gopsutil/host"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
@ -47,9 +51,6 @@ import (
"github.com/hashicorp/nomad/plugins/csi"
"github.com/hashicorp/nomad/plugins/device"
"github.com/hashicorp/nomad/plugins/drivers"
vaultapi "github.com/hashicorp/vault/api"
"github.com/pkg/errors"
"github.com/shirou/gopsutil/host"
)
const (
@ -2784,68 +2785,40 @@ func (c *Client) emitStats() {
// setGaugeForMemoryStats proxies metrics for memory specific statistics
func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total))
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available))
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used))
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "free"}, float32(hStats.Memory.Free))
}
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels)
}
// setGaugeForCPUStats proxies metrics for CPU specific statistics
func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
for _, cpu := range hStats.CPU {
if !c.config.DisableTaggedMetrics {
labels := append(baseLabels, metrics.Label{
Name: "cpu",
Value: cpu.CPU,
})
labels := append(baseLabels, metrics.Label{
Name: "cpu",
Value: cpu.CPU,
})
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total))
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User))
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle))
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "system"}, float32(cpu.System))
}
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels)
}
}
// setGaugeForDiskStats proxies metrics for disk specific statistics
func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) {
for _, disk := range hStats.DiskStats {
if !c.config.DisableTaggedMetrics {
labels := append(baseLabels, metrics.Label{
Name: "disk",
Value: disk.Device,
})
labels := append(baseLabels, metrics.Label{
Name: "disk",
Value: disk.Device,
})
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used_percent"}, float32(disk.UsedPercent))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent))
}
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels)
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels)
}
}
@ -2859,30 +2832,16 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.
allocated := c.getAllocatedResources(node)
// Emit allocated
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocated", "memory", nodeID}, float32(allocated.Flattened.Memory.MemoryMB))
metrics.SetGauge([]string{"client", "allocated", "disk", nodeID}, float32(allocated.Shared.DiskMB))
metrics.SetGauge([]string{"client", "allocated", "cpu", nodeID}, float32(allocated.Flattened.Cpu.CpuShares))
}
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels)
for _, n := range allocated.Flattened.Networks {
if !c.config.DisableTaggedMetrics {
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocated", "network", n.Device, nodeID}, float32(n.MBits))
}
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels)
}
// Emit unallocated
@ -2890,17 +2849,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.
unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB
unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "unallocated", "memory", nodeID}, float32(unallocatedMem))
metrics.SetGauge([]string{"client", "unallocated", "disk", nodeID}, float32(unallocatedDisk))
metrics.SetGauge([]string{"client", "unallocated", "cpu", nodeID}, float32(unallocatedCpu))
}
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels)
totalComparable := total.Comparable()
for _, n := range totalComparable.Flattened.Networks {
@ -2912,28 +2863,17 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.
}
unallocatedMbits := n.MBits - usedMbits
if !c.config.DisableTaggedMetrics {
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "unallocated", "network", n.Device, nodeID}, float32(unallocatedMbits))
}
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels)
}
}
// No labels are required so we emit with only a key/value syntax
func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) {
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "uptime"}, float32(hStats.Uptime))
}
metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels)
}
// emitHostStats pushes host resource usage stats to remote metrics collection sinks
@ -2975,21 +2915,11 @@ func (c *Client) emitClientMetrics() {
}
}
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels)
}
if c.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocations", "migrating", nodeID}, float32(migrating))
metrics.SetGauge([]string{"client", "allocations", "blocked", nodeID}, float32(blocked))
metrics.SetGauge([]string{"client", "allocations", "pending", nodeID}, float32(pending))
metrics.SetGauge([]string{"client", "allocations", "running", nodeID}, float32(running))
metrics.SetGauge([]string{"client", "allocations", "terminal", nodeID}, float32(terminal))
}
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels)
}
// labels takes the base labels and appends the node state

View File

@ -205,20 +205,12 @@ type Config struct {
// ACLPolicyTTL is how long we cache policy values for
ACLPolicyTTL time.Duration
// DisableTaggedMetrics determines whether metrics will be displayed via a
// key/value/tag format, or simply a key/value format
DisableTaggedMetrics bool
// DisableRemoteExec disables remote exec targeting tasks on this client
DisableRemoteExec bool
// TemplateConfig includes configuration for template rendering
TemplateConfig *ClientTemplateConfig
// BackwardsCompatibleMetrics determines whether to show methods of
// displaying metrics for older versions, or to only show the new format
BackwardsCompatibleMetrics bool
// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
// This is used to paper over a loss of leadership by instead holding RPCs,
// so that the caller experiences a slow response rather than an error.
@ -321,18 +313,16 @@ func DefaultConfig() *Config {
GCInodeUsageThreshold: 70,
GCMaxAllocs: 50,
NoHostUUID: true,
DisableTaggedMetrics: false,
DisableRemoteExec: false,
TemplateConfig: &ClientTemplateConfig{
FunctionDenylist: []string{"plugin"},
DisableSandbox: false,
},
BackwardsCompatibleMetrics: false,
RPCHoldTimeout: 5 * time.Second,
CNIPath: "/opt/cni/bin",
CNIConfigDir: "/opt/cni/config",
CNIInterfacePrefix: "eth",
HostNetworks: map[string]*structs.ClientHostNetworkConfig{},
RPCHoldTimeout: 5 * time.Second,
CNIPath: "/opt/cni/bin",
CNIConfigDir: "/opt/cni/config",
CNIInterfacePrefix: "eth",
HostNetworks: map[string]*structs.ClientHostNetworkConfig{},
}
}

View File

@ -387,9 +387,7 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
// Setup telemetry related config
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics
conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
// Parse Limits timeout from a string into durations
if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil {
@ -616,8 +614,6 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
// Set the TLS related configs
conf.TLSConfig = agentConfig.TLSConfig

View File

@ -521,8 +521,6 @@ func TestAgent_Client_TelemetryConfiguration(t *testing.T) {
conf := DefaultConfig()
conf.DevMode = true
conf.Telemetry.DisableTaggedMetrics = true
conf.Telemetry.BackwardsCompatibleMetrics = true
a := &Agent{config: conf}
@ -534,8 +532,6 @@ func TestAgent_Client_TelemetryConfiguration(t *testing.T) {
assert.Equal(c.StatsCollectionInterval, telemetry.collectionInterval)
assert.Equal(c.PublishNodeMetrics, telemetry.PublishNodeMetrics)
assert.Equal(c.PublishAllocationMetrics, telemetry.PublishAllocationMetrics)
assert.Equal(c.DisableTaggedMetrics, telemetry.DisableTaggedMetrics)
assert.Equal(c.BackwardsCompatibleMetrics, telemetry.BackwardsCompatibleMetrics)
}
// TestAgent_HTTPCheck asserts Agent.agentHTTPCheck properly alters the HTTP

View File

@ -980,8 +980,7 @@ func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) {
metricsConf.EnableHostname = !telConfig.DisableHostname
// Prefer the hostname as a label.
metricsConf.EnableHostnameLabel = !telConfig.DisableHostname &&
!telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics
metricsConf.EnableHostnameLabel = !telConfig.DisableHostname
if telConfig.UseNodeName {
metricsConf.HostName = config.NodeName

View File

@ -561,14 +561,6 @@ type Telemetry struct {
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
// DisableTaggedMetrics disables a new version of generating metrics which
// uses tags
DisableTaggedMetrics bool `hcl:"disable_tagged_metrics"`
// BackwardsCompatibleMetrics allows for generating metrics in a simple
// key/value structure as done in older versions of Nomad
BackwardsCompatibleMetrics bool `hcl:"backwards_compatible_metrics"`
// PrefixFilter allows for filtering out metrics from being collected
PrefixFilter []string `hcl:"prefix_filter"`
@ -1642,14 +1634,6 @@ func (a *Telemetry) Merge(b *Telemetry) *Telemetry {
result.CirconusBrokerSelectTag = b.CirconusBrokerSelectTag
}
if b.DisableTaggedMetrics {
result.DisableTaggedMetrics = b.DisableTaggedMetrics
}
if b.BackwardsCompatibleMetrics {
result.BackwardsCompatibleMetrics = b.BackwardsCompatibleMetrics
}
if b.PrefixFilter != nil {
result.PrefixFilter = b.PrefixFilter
}

View File

@ -180,8 +180,6 @@ var basicConfig = &Config{
collectionInterval: 3 * time.Second,
PublishAllocationMetrics: true,
PublishNodeMetrics: true,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
},
LeaveOnInt: true,
LeaveOnTerm: true,

View File

@ -67,8 +67,6 @@ func TestConfig_Merge(t *testing.T) {
DataDogTags: []string{"cat1:tag1", "cat2:tag2"},
PrometheusMetrics: true,
DisableHostname: false,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
CirconusAPIToken: "0",
CirconusAPIApp: "nomadic",
CirconusAPIURL: "http://api.circonus.com/v2",
@ -256,8 +254,6 @@ func TestConfig_Merge(t *testing.T) {
DisableHostname: true,
PublishNodeMetrics: true,
PublishAllocationMetrics: true,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
CirconusAPIToken: "1",
CirconusAPIApp: "nomad",
CirconusAPIURL: "https://api.circonus.com/v2",

View File

@ -73,8 +73,6 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
httpTest(t, func(c *Config) {
c.Telemetry.PublishAllocationMetrics = true
c.Telemetry.PublishNodeMetrics = true
c.Telemetry.BackwardsCompatibleMetrics = false
c.Telemetry.DisableTaggedMetrics = false
}, func(s *TestAgent) {
// Create the job, wait for it to finish
job := mock.BatchJob()

View File

@ -177,15 +177,13 @@ audit {
}
telemetry {
statsite_address = "127.0.0.1:1234"
statsd_address = "127.0.0.1:2345"
prometheus_metrics = true
disable_hostname = true
collection_interval = "3s"
publish_allocation_metrics = true
publish_node_metrics = true
disable_tagged_metrics = true
backwards_compatible_metrics = true
statsite_address = "127.0.0.1:1234"
statsd_address = "127.0.0.1:2345"
prometheus_metrics = true
disable_hostname = true
collection_interval = "3s"
publish_allocation_metrics = true
publish_node_metrics = true
}
leave_on_interrupt = true

View File

@ -313,10 +313,8 @@
"syslog_facility": "LOCAL1",
"telemetry": [
{
"backwards_compatible_metrics": true,
"collection_interval": "3s",
"disable_hostname": true,
"disable_tagged_metrics": true,
"prometheus_metrics": true,
"publish_allocation_metrics": true,
"publish_node_metrics": true,

View File

@ -298,18 +298,10 @@ type Config struct {
// publishes metrics which are periodic in nature like updating gauges
StatsCollectionInterval time.Duration
// DisableTaggedMetrics determines whether metrics will be displayed via a
// key/value/tag format, or simply a key/value format
DisableTaggedMetrics bool
// DisableDispatchedJobSummaryMetrics allows for ignore dispatched jobs when
// publishing Job summary metrics
DisableDispatchedJobSummaryMetrics bool
// BackwardsCompatibleMetrics determines whether to show methods of
// displaying metrics for older versions, or to only show the new format
BackwardsCompatibleMetrics bool
// AutopilotConfig is used to apply the initial autopilot config when
// bootstrapping.
AutopilotConfig *structs.AutopilotConfig

View File

@ -769,65 +769,55 @@ func (s *Server) publishJobSummaryMetrics(stopCh chan struct{}) {
func (s *Server) iterateJobSummaryMetrics(summary *structs.JobSummary) {
for name, tgSummary := range summary.Summary {
if !s.config.DisableTaggedMetrics {
labels := []metrics.Label{
{
Name: "job",
Value: summary.JobID,
},
{
Name: "task_group",
Value: name,
},
{
Name: "namespace",
Value: summary.Namespace,
},
}
if strings.Contains(summary.JobID, "/dispatch-") {
jobInfo := strings.Split(summary.JobID, "/dispatch-")
labels = append(labels, metrics.Label{
Name: "parent_id",
Value: jobInfo[0],
}, metrics.Label{
Name: "dispatch_id",
Value: jobInfo[1],
})
}
if strings.Contains(summary.JobID, "/periodic-") {
jobInfo := strings.Split(summary.JobID, "/periodic-")
labels = append(labels, metrics.Label{
Name: "parent_id",
Value: jobInfo[0],
}, metrics.Label{
Name: "periodic_id",
Value: jobInfo[1],
})
}
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "queued"},
float32(tgSummary.Queued), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "complete"},
float32(tgSummary.Complete), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "failed"},
float32(tgSummary.Failed), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "running"},
float32(tgSummary.Running), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "starting"},
float32(tgSummary.Starting), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "lost"},
float32(tgSummary.Lost), labels)
labels := []metrics.Label{
{
Name: "job",
Value: summary.JobID,
},
{
Name: "task_group",
Value: name,
},
{
Name: "namespace",
Value: summary.Namespace,
},
}
if s.config.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "queued"}, float32(tgSummary.Queued))
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "complete"}, float32(tgSummary.Complete))
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "failed"}, float32(tgSummary.Failed))
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "running"}, float32(tgSummary.Running))
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "starting"}, float32(tgSummary.Starting))
metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "lost"}, float32(tgSummary.Lost))
if strings.Contains(summary.JobID, "/dispatch-") {
jobInfo := strings.Split(summary.JobID, "/dispatch-")
labels = append(labels, metrics.Label{
Name: "parent_id",
Value: jobInfo[0],
}, metrics.Label{
Name: "dispatch_id",
Value: jobInfo[1],
})
}
if strings.Contains(summary.JobID, "/periodic-") {
jobInfo := strings.Split(summary.JobID, "/periodic-")
labels = append(labels, metrics.Label{
Name: "parent_id",
Value: jobInfo[0],
}, metrics.Label{
Name: "periodic_id",
Value: jobInfo[1],
})
}
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "queued"},
float32(tgSummary.Queued), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "complete"},
float32(tgSummary.Complete), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "failed"},
float32(tgSummary.Failed), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "running"},
float32(tgSummary.Running), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "starting"},
float32(tgSummary.Starting), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "lost"},
float32(tgSummary.Lost), labels)
}
}

View File

@ -50,17 +50,6 @@ The following options are available on all telemetry configurations.
- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
runtime metrics of nodes.
- `backwards_compatible_metrics` `(bool: false)` - Specifies if Nomad should
publish metrics that are backwards compatible with versions below 0.7, as
post version 0.7, Nomad emits tagged metrics. All new metrics will
only be added to tagged metrics. Note that this option is used to transition
monitoring to tagged metrics and will eventually be deprecated.
- `disable_tagged_metrics` `(bool: false)` - Specifies if Nomad should not emit
tagged metrics and only emit metrics compatible with versions below Nomad
0.7. Note that this option is used to transition monitoring to tagged
metrics and will eventually be deprecated.
- `filter_default` `(bool: true)` - This controls whether to allow metrics that
have not been specified by the filter. Defaults to true, which will allow all
metrics when no filters are provided. When set to false with no filters, no

View File

@ -24,6 +24,12 @@ When stopping tasks running with the Docker task driver, Nomad documents that a
versions of Nomad would issue `SIGINT` instead. Starting again with Nomad v0.13.0
`SIGTERM` will be sent by default when stopping Docker tasks.
### Deprecated metrics have been removed
Nomad v0.7.0 added supported for tagged metrics and deprecated untagged metrics.
There was support for configuring backwards-compatible metrics. This support has
been removed with v0.13.0, and all metrics will be emitted with tags.
### Null characters in region, datacenter, job name/ID, task group name, and task names
Starting with Nomad v0.13.0, jobs will fail validation if any of the following