metrics: even classless blocked evals get metrics
This PR fixes a bug where blocked evaluations with no class set would not have metrics exported at the dc:class scope. Fixes #13759
This commit is contained in:
parent
05cd91155d
commit
c23da281a1
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
metrics: Fixed a bug where blocked evals with no class produced no dc:class scope metrics
|
||||
```
|
|
@ -4,8 +4,8 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
@ -31,7 +31,7 @@ const (
|
|||
// failed allocation becomes available.
|
||||
type BlockedEvals struct {
|
||||
// logger is the logger to use by the blocked eval tracker.
|
||||
logger log.Logger
|
||||
logger hclog.Logger
|
||||
|
||||
evalBroker *EvalBroker
|
||||
enabled bool
|
||||
|
@ -96,7 +96,7 @@ type wrappedEval struct {
|
|||
|
||||
// NewBlockedEvals creates a new blocked eval tracker that will enqueue
|
||||
// unblocked evals into the passed broker.
|
||||
func NewBlockedEvals(evalBroker *EvalBroker, logger log.Logger) *BlockedEvals {
|
||||
func NewBlockedEvals(evalBroker *EvalBroker, logger hclog.Logger) *BlockedEvals {
|
||||
return &BlockedEvals{
|
||||
logger: logger.Named("blocked_evals"),
|
||||
evalBroker: evalBroker,
|
||||
|
@ -727,7 +727,7 @@ func (b *BlockedEvals) EmitStats(period time.Duration, stopCh <-chan struct{}) {
|
|||
metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "job", "memory"}, float32(v.MemoryMB), labels)
|
||||
}
|
||||
|
||||
for k, v := range stats.BlockedResources.ByNode {
|
||||
for k, v := range stats.BlockedResources.ByClassInDC {
|
||||
labels := []metrics.Label{
|
||||
{Name: "datacenter", Value: k.dc},
|
||||
{Name: "node_class", Value: k.class},
|
||||
|
|
|
@ -24,8 +24,8 @@ type BlockedStats struct {
|
|||
BlockedResources *BlockedResourcesStats
|
||||
}
|
||||
|
||||
// node stores information related to nodes.
|
||||
type node struct {
|
||||
// classInDC is a coordinate of a specific class in a specific datacenter
|
||||
type classInDC struct {
|
||||
dc string
|
||||
class string
|
||||
}
|
||||
|
@ -65,9 +65,9 @@ func (b *BlockedStats) prune(cutoff time.Time) {
|
|||
}
|
||||
}
|
||||
|
||||
for k, v := range b.BlockedResources.ByNode {
|
||||
for k, v := range b.BlockedResources.ByClassInDC {
|
||||
if shouldPrune(v) {
|
||||
delete(b.BlockedResources.ByNode, k)
|
||||
delete(b.BlockedResources.ByClassInDC, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -89,6 +89,10 @@ func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
|
|||
for class := range allocMetrics.ClassExhausted {
|
||||
classes[class] = struct{}{}
|
||||
}
|
||||
if len(allocMetrics.ClassExhausted) == 0 {
|
||||
// some evaluations have no class
|
||||
classes[""] = struct{}{}
|
||||
}
|
||||
for _, r := range allocMetrics.ResourcesExhausted {
|
||||
resources.CPU += r.CPU
|
||||
resources.MemoryMB += r.MemoryMB
|
||||
|
@ -99,17 +103,17 @@ func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
|
|||
nsID := structs.NewNamespacedID(eval.JobID, eval.Namespace)
|
||||
byJob[nsID] = resources
|
||||
|
||||
byNodeInfo := make(map[node]BlockedResourcesSummary)
|
||||
byClassInDC := make(map[classInDC]BlockedResourcesSummary)
|
||||
for dc := range dcs {
|
||||
for class := range classes {
|
||||
k := node{dc: dc, class: class}
|
||||
byNodeInfo[k] = resources
|
||||
k := classInDC{dc: dc, class: class}
|
||||
byClassInDC[k] = resources
|
||||
}
|
||||
}
|
||||
|
||||
return &BlockedResourcesStats{
|
||||
ByJob: byJob,
|
||||
ByNode: byNodeInfo,
|
||||
ByClassInDC: byClassInDC,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,14 +121,14 @@ func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
|
|||
// tracked both by job and by node.
|
||||
type BlockedResourcesStats struct {
|
||||
ByJob map[structs.NamespacedID]BlockedResourcesSummary
|
||||
ByNode map[node]BlockedResourcesSummary
|
||||
ByClassInDC map[classInDC]BlockedResourcesSummary
|
||||
}
|
||||
|
||||
// NewBlockedResourcesStats returns a new BlockedResourcesStats.
|
||||
func NewBlockedResourcesStats() *BlockedResourcesStats {
|
||||
return &BlockedResourcesStats{
|
||||
ByJob: make(map[structs.NamespacedID]BlockedResourcesSummary),
|
||||
ByNode: make(map[node]BlockedResourcesSummary),
|
||||
ByClassInDC: make(map[classInDC]BlockedResourcesSummary),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,8 +140,8 @@ func (b *BlockedResourcesStats) Copy() *BlockedResourcesStats {
|
|||
result.ByJob[k] = v // value copy
|
||||
}
|
||||
|
||||
for k, v := range b.ByNode {
|
||||
result.ByNode[k] = v // value copy
|
||||
for k, v := range b.ByClassInDC {
|
||||
result.ByClassInDC[k] = v // value copy
|
||||
}
|
||||
|
||||
return result
|
||||
|
@ -152,8 +156,8 @@ func (b *BlockedResourcesStats) Add(a *BlockedResourcesStats) *BlockedResourcesS
|
|||
result.ByJob[k] = b.ByJob[k].Add(v)
|
||||
}
|
||||
|
||||
for k, v := range a.ByNode {
|
||||
result.ByNode[k] = b.ByNode[k].Add(v)
|
||||
for k, v := range a.ByClassInDC {
|
||||
result.ByClassInDC[k] = b.ByClassInDC[k].Add(v)
|
||||
}
|
||||
|
||||
return result
|
||||
|
@ -168,8 +172,8 @@ func (b *BlockedResourcesStats) Subtract(a *BlockedResourcesStats) *BlockedResou
|
|||
result.ByJob[k] = b.ByJob[k].Subtract(v)
|
||||
}
|
||||
|
||||
for k, v := range a.ByNode {
|
||||
result.ByNode[k] = b.ByNode[k].Subtract(v)
|
||||
for k, v := range a.ByClassInDC {
|
||||
result.ByClassInDC[k] = b.ByClassInDC[k].Subtract(v)
|
||||
}
|
||||
|
||||
return result
|
||||
|
|
|
@ -128,8 +128,8 @@ func TestBlockedResourceStats_New(t *testing.T) {
|
|||
a := NewBlockedResourcesStats()
|
||||
require.NotNil(t, a.ByJob)
|
||||
require.Empty(t, a.ByJob)
|
||||
require.NotNil(t, a.ByNode)
|
||||
require.Empty(t, a.ByNode)
|
||||
require.NotNil(t, a.ByClassInDC)
|
||||
require.Empty(t, a.ByClassInDC)
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -143,15 +143,20 @@ var (
|
|||
Namespace: "two",
|
||||
}
|
||||
|
||||
node1 = node{
|
||||
node1 = classInDC{
|
||||
dc: "dc1",
|
||||
class: "alpha",
|
||||
}
|
||||
|
||||
node2 = node{
|
||||
node2 = classInDC{
|
||||
dc: "dc1",
|
||||
class: "beta",
|
||||
}
|
||||
|
||||
node3 = classInDC{
|
||||
dc: "dc1",
|
||||
class: "", // not set
|
||||
}
|
||||
)
|
||||
|
||||
func TestBlockedResourceStats_Copy(t *testing.T) {
|
||||
|
@ -166,7 +171,7 @@ func TestBlockedResourceStats_Copy(t *testing.T) {
|
|||
MemoryMB: 256,
|
||||
},
|
||||
}
|
||||
a.ByNode = map[node]BlockedResourcesSummary{
|
||||
a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node1: {
|
||||
Timestamp: now1,
|
||||
CPU: 300,
|
||||
|
@ -180,7 +185,7 @@ func TestBlockedResourceStats_Copy(t *testing.T) {
|
|||
CPU: 888,
|
||||
MemoryMB: 888,
|
||||
}
|
||||
c.ByNode[node1] = BlockedResourcesSummary{
|
||||
c.ByClassInDC[node1] = BlockedResourcesSummary{
|
||||
Timestamp: now2,
|
||||
CPU: 999,
|
||||
MemoryMB: 999,
|
||||
|
@ -188,7 +193,7 @@ func TestBlockedResourceStats_Copy(t *testing.T) {
|
|||
|
||||
// underlying data should have been deep copied
|
||||
require.Equal(t, 100, a.ByJob[id1].CPU)
|
||||
require.Equal(t, 300, a.ByNode[node1].CPU)
|
||||
require.Equal(t, 300, a.ByClassInDC[node1].CPU)
|
||||
}
|
||||
|
||||
func TestBlockedResourcesStats_Add(t *testing.T) {
|
||||
|
@ -196,7 +201,7 @@ func TestBlockedResourcesStats_Add(t *testing.T) {
|
|||
a.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
|
||||
id1: {Timestamp: now(1), CPU: 111, MemoryMB: 222},
|
||||
}
|
||||
a.ByNode = map[node]BlockedResourcesSummary{
|
||||
a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(2), CPU: 333, MemoryMB: 444},
|
||||
}
|
||||
|
||||
|
@ -205,7 +210,7 @@ func TestBlockedResourcesStats_Add(t *testing.T) {
|
|||
id1: {Timestamp: now(3), CPU: 200, MemoryMB: 300},
|
||||
id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
|
||||
}
|
||||
b.ByNode = map[node]BlockedResourcesSummary{
|
||||
b.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(5), CPU: 600, MemoryMB: 700},
|
||||
node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
|
||||
}
|
||||
|
@ -218,10 +223,10 @@ func TestBlockedResourcesStats_Add(t *testing.T) {
|
|||
id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
|
||||
}, result.ByJob)
|
||||
|
||||
require.Equal(t, map[node]BlockedResourcesSummary{
|
||||
require.Equal(t, map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(5), CPU: 933, MemoryMB: 1144},
|
||||
node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
|
||||
}, result.ByNode)
|
||||
}, result.ByClassInDC)
|
||||
})
|
||||
|
||||
// make sure we handle zeros in both directions
|
||||
|
@ -233,20 +238,31 @@ func TestBlockedResourcesStats_Add(t *testing.T) {
|
|||
id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
|
||||
}, result.ByJob)
|
||||
|
||||
require.Equal(t, map[node]BlockedResourcesSummary{
|
||||
require.Equal(t, map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(2), CPU: 933, MemoryMB: 1144},
|
||||
node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
|
||||
}, result.ByNode)
|
||||
}, result.ByClassInDC)
|
||||
})
|
||||
}
|
||||
|
||||
func TestBlockedResourcesStats_Add_NoClass(t *testing.T) {
|
||||
a := NewBlockedResourcesStats()
|
||||
a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node3: {Timestamp: now(1), CPU: 111, MemoryMB: 1111},
|
||||
}
|
||||
result := a.Add(a)
|
||||
require.Equal(t, map[classInDC]BlockedResourcesSummary{
|
||||
node3: {Timestamp: now(1), CPU: 222, MemoryMB: 2222},
|
||||
}, result.ByClassInDC)
|
||||
}
|
||||
|
||||
func TestBlockedResourcesStats_Subtract(t *testing.T) {
|
||||
a := NewBlockedResourcesStats()
|
||||
a.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
|
||||
id1: {Timestamp: now(1), CPU: 100, MemoryMB: 100},
|
||||
id2: {Timestamp: now(2), CPU: 200, MemoryMB: 200},
|
||||
}
|
||||
a.ByNode = map[node]BlockedResourcesSummary{
|
||||
a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(3), CPU: 300, MemoryMB: 300},
|
||||
node2: {Timestamp: now(4), CPU: 400, MemoryMB: 400},
|
||||
}
|
||||
|
@ -256,7 +272,7 @@ func TestBlockedResourcesStats_Subtract(t *testing.T) {
|
|||
id1: {Timestamp: now(5), CPU: 10, MemoryMB: 11},
|
||||
id2: {Timestamp: now(6), CPU: 12, MemoryMB: 13},
|
||||
}
|
||||
b.ByNode = map[node]BlockedResourcesSummary{
|
||||
b.ByClassInDC = map[classInDC]BlockedResourcesSummary{
|
||||
node1: {Timestamp: now(7), CPU: 14, MemoryMB: 15},
|
||||
node2: {Timestamp: now(8), CPU: 16, MemoryMB: 17},
|
||||
}
|
||||
|
@ -274,14 +290,14 @@ func TestBlockedResourcesStats_Subtract(t *testing.T) {
|
|||
require.Equal(t, 187, result.ByJob[id2].MemoryMB)
|
||||
|
||||
// node1
|
||||
require.Equal(t, now(7), result.ByNode[node1].Timestamp)
|
||||
require.Equal(t, 286, result.ByNode[node1].CPU)
|
||||
require.Equal(t, 285, result.ByNode[node1].MemoryMB)
|
||||
require.Equal(t, now(7), result.ByClassInDC[node1].Timestamp)
|
||||
require.Equal(t, 286, result.ByClassInDC[node1].CPU)
|
||||
require.Equal(t, 285, result.ByClassInDC[node1].MemoryMB)
|
||||
|
||||
// node2
|
||||
require.Equal(t, now(8), result.ByNode[node2].Timestamp)
|
||||
require.Equal(t, 384, result.ByNode[node2].CPU)
|
||||
require.Equal(t, 383, result.ByNode[node2].MemoryMB)
|
||||
require.Equal(t, now(8), result.ByClassInDC[node2].Timestamp)
|
||||
require.Equal(t, 384, result.ByClassInDC[node2].CPU)
|
||||
require.Equal(t, 383, result.ByClassInDC[node2].MemoryMB)
|
||||
}
|
||||
|
||||
// testBlockedEvalsRandomBlockedEval wraps an eval that is randomly generated.
|
||||
|
@ -363,9 +379,9 @@ func clearTimestampFromBlockedResourceStats(b *BlockedResourcesStats) {
|
|||
v.Timestamp = time.Time{}
|
||||
b.ByJob[k] = v
|
||||
}
|
||||
for k, v := range b.ByNode {
|
||||
for k, v := range b.ByClassInDC {
|
||||
v.Timestamp = time.Time{}
|
||||
b.ByNode[k] = v
|
||||
b.ByClassInDC[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue