2021-04-29 19:03:45 +00:00
|
|
|
package nomad
|
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
|
|
|
// BlockedStats returns all the stats about the blocked eval tracker.
|
|
|
|
type BlockedStats struct {
|
|
|
|
// TotalEscaped is the total number of blocked evaluations that have escaped
|
|
|
|
// computed node classes.
|
|
|
|
TotalEscaped int
|
|
|
|
|
|
|
|
// TotalBlocked is the total number of blocked evaluations.
|
|
|
|
TotalBlocked int
|
|
|
|
|
|
|
|
// TotalQuotaLimit is the total number of blocked evaluations that are due
|
|
|
|
// to the quota limit being reached.
|
|
|
|
TotalQuotaLimit int
|
|
|
|
|
|
|
|
// BlockedResources stores the amount of resources requested by blocked
|
|
|
|
// evaluations.
|
2022-05-23 20:19:30 +00:00
|
|
|
BlockedResources *BlockedResourcesStats
|
|
|
|
}
|
|
|
|
|
|
|
|
// node stores information related to nodes.
|
|
|
|
type node struct {
|
|
|
|
dc string
|
|
|
|
class string
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewBlockedStats returns a new BlockedStats.
|
|
|
|
func NewBlockedStats() *BlockedStats {
|
|
|
|
return &BlockedStats{
|
|
|
|
BlockedResources: NewBlockedResourcesStats(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Block updates the stats for the blocked eval tracker with the details of the
|
|
|
|
// evaluation being blocked.
|
|
|
|
func (b *BlockedStats) Block(eval *structs.Evaluation) {
|
|
|
|
b.TotalBlocked++
|
|
|
|
resourceStats := generateResourceStats(eval)
|
|
|
|
b.BlockedResources = b.BlockedResources.Add(resourceStats)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unblock updates the stats for the blocked eval tracker with the details of the
|
|
|
|
// evaluation being unblocked.
|
|
|
|
func (b *BlockedStats) Unblock(eval *structs.Evaluation) {
|
|
|
|
b.TotalBlocked--
|
|
|
|
resourceStats := generateResourceStats(eval)
|
|
|
|
b.BlockedResources = b.BlockedResources.Subtract(resourceStats)
|
|
|
|
}
|
|
|
|
|
|
|
|
// prune deletes any key zero metric values older than the cutoff.
|
|
|
|
func (b *BlockedStats) prune(cutoff time.Time) {
|
|
|
|
shouldPrune := func(s BlockedResourcesSummary) bool {
|
|
|
|
return s.Timestamp.Before(cutoff) && s.IsZero()
|
|
|
|
}
|
|
|
|
|
|
|
|
for k, v := range b.BlockedResources.ByJob {
|
|
|
|
if shouldPrune(v) {
|
|
|
|
delete(b.BlockedResources.ByJob, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
for k, v := range b.BlockedResources.ByNode {
|
2021-04-29 19:03:45 +00:00
|
|
|
if shouldPrune(v) {
|
2022-05-23 20:19:30 +00:00
|
|
|
delete(b.BlockedResources.ByNode, k)
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateResourceStats returns a summary of the resources requested by the
|
|
|
|
// input evaluation.
|
2022-05-23 20:19:30 +00:00
|
|
|
func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
|
2021-04-29 19:03:45 +00:00
|
|
|
dcs := make(map[string]struct{})
|
|
|
|
classes := make(map[string]struct{})
|
|
|
|
|
|
|
|
resources := BlockedResourcesSummary{
|
|
|
|
Timestamp: time.Now().UTC(),
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, allocMetrics := range eval.FailedTGAllocs {
|
|
|
|
for dc := range allocMetrics.NodesAvailable {
|
|
|
|
dcs[dc] = struct{}{}
|
|
|
|
}
|
|
|
|
for class := range allocMetrics.ClassExhausted {
|
|
|
|
classes[class] = struct{}{}
|
|
|
|
}
|
|
|
|
for _, r := range allocMetrics.ResourcesExhausted {
|
|
|
|
resources.CPU += r.CPU
|
|
|
|
resources.MemoryMB += r.MemoryMB
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
byJob := make(map[structs.NamespacedID]BlockedResourcesSummary)
|
2022-05-23 20:19:30 +00:00
|
|
|
nsID := structs.NewNamespacedID(eval.JobID, eval.Namespace)
|
|
|
|
byJob[nsID] = resources
|
2021-04-29 19:03:45 +00:00
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
byNodeInfo := make(map[node]BlockedResourcesSummary)
|
2021-04-29 19:03:45 +00:00
|
|
|
for dc := range dcs {
|
|
|
|
for class := range classes {
|
2022-05-23 20:19:30 +00:00
|
|
|
k := node{dc: dc, class: class}
|
2021-04-29 19:03:45 +00:00
|
|
|
byNodeInfo[k] = resources
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
return &BlockedResourcesStats{
|
|
|
|
ByJob: byJob,
|
|
|
|
ByNode: byNodeInfo,
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
// BlockedResourcesStats stores resources requested by blocked evaluations,
|
|
|
|
// tracked both by job and by node.
|
2021-04-29 19:03:45 +00:00
|
|
|
type BlockedResourcesStats struct {
|
2022-05-23 20:19:30 +00:00
|
|
|
ByJob map[structs.NamespacedID]BlockedResourcesSummary
|
|
|
|
ByNode map[node]BlockedResourcesSummary
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewBlockedResourcesStats returns a new BlockedResourcesStats.
|
2022-05-23 20:19:30 +00:00
|
|
|
func NewBlockedResourcesStats() *BlockedResourcesStats {
|
|
|
|
return &BlockedResourcesStats{
|
|
|
|
ByJob: make(map[structs.NamespacedID]BlockedResourcesSummary),
|
|
|
|
ByNode: make(map[node]BlockedResourcesSummary),
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy returns a deep copy of the blocked resource stats.
|
2022-05-23 20:19:30 +00:00
|
|
|
func (b *BlockedResourcesStats) Copy() *BlockedResourcesStats {
|
2021-04-29 19:03:45 +00:00
|
|
|
result := NewBlockedResourcesStats()
|
|
|
|
|
|
|
|
for k, v := range b.ByJob {
|
2022-05-23 20:19:30 +00:00
|
|
|
result.ByJob[k] = v // value copy
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
for k, v := range b.ByNode {
|
|
|
|
result.ByNode[k] = v // value copy
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add returns a new BlockedResourcesStats with the values set to the current
|
|
|
|
// resource values plus the input.
|
2022-05-23 20:19:30 +00:00
|
|
|
func (b *BlockedResourcesStats) Add(a *BlockedResourcesStats) *BlockedResourcesStats {
|
2021-04-29 19:03:45 +00:00
|
|
|
result := b.Copy()
|
|
|
|
|
|
|
|
for k, v := range a.ByJob {
|
|
|
|
result.ByJob[k] = b.ByJob[k].Add(v)
|
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
for k, v := range a.ByNode {
|
|
|
|
result.ByNode[k] = b.ByNode[k].Add(v)
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// Subtract returns a new BlockedResourcesStats with the values set to the
|
|
|
|
// current resource values minus the input.
|
2022-05-23 20:19:30 +00:00
|
|
|
func (b *BlockedResourcesStats) Subtract(a *BlockedResourcesStats) *BlockedResourcesStats {
|
2021-04-29 19:03:45 +00:00
|
|
|
result := b.Copy()
|
|
|
|
|
|
|
|
for k, v := range a.ByJob {
|
|
|
|
result.ByJob[k] = b.ByJob[k].Subtract(v)
|
|
|
|
}
|
|
|
|
|
2022-05-23 20:19:30 +00:00
|
|
|
for k, v := range a.ByNode {
|
|
|
|
result.ByNode[k] = b.ByNode[k].Subtract(v)
|
2021-04-29 19:03:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// BlockedResourcesSummary stores resource values for blocked evals.
|
|
|
|
type BlockedResourcesSummary struct {
|
|
|
|
Timestamp time.Time
|
|
|
|
CPU int
|
|
|
|
MemoryMB int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add returns a new BlockedResourcesSummary with each resource set to the
|
|
|
|
// current value plus the input.
|
|
|
|
func (b BlockedResourcesSummary) Add(a BlockedResourcesSummary) BlockedResourcesSummary {
|
|
|
|
return BlockedResourcesSummary{
|
|
|
|
Timestamp: a.Timestamp,
|
|
|
|
CPU: b.CPU + a.CPU,
|
|
|
|
MemoryMB: b.MemoryMB + a.MemoryMB,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Subtract returns a new BlockedResourcesSummary with each resource set to the
|
|
|
|
// current value minus the input.
|
|
|
|
func (b BlockedResourcesSummary) Subtract(a BlockedResourcesSummary) BlockedResourcesSummary {
|
|
|
|
return BlockedResourcesSummary{
|
|
|
|
Timestamp: a.Timestamp,
|
|
|
|
CPU: b.CPU - a.CPU,
|
|
|
|
MemoryMB: b.MemoryMB - a.MemoryMB,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsZero returns true if all resource values are zero.
|
|
|
|
func (b BlockedResourcesSummary) IsZero() bool {
|
|
|
|
return b.CPU == 0 && b.MemoryMB == 0
|
|
|
|
}
|