open-nomad/scheduler/context.go
Mahmood Ali a9d5e4c510
scheduler: stopped-yet-running allocs are still running (#10446)
* scheduler: stopped-yet-running allocs are still running

* scheduler: test new stopped-but-running logic

* test: assert nonoverlapping alloc behavior

Also add a simpler Wait test helper to improve line numbers and save few
lines of code.

* docs: tried my best to describe #10446

it's not concise... feedback welcome

* scheduler: fix test that allowed overlapping allocs

* devices: only free devices when ClientStatus is terminal

* test: output nicer failure message if err==nil

Co-authored-by: Mahmood Ali <mahmood@hashicorp.com>
Co-authored-by: Michael Schurter <mschurter@hashicorp.com>
2022-09-13 12:52:47 -07:00

425 lines
11 KiB
Go

package scheduler
import (
"regexp"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/structs"
)
// Context is used to track contextual information used for placement
type Context interface {
// State is used to inspect the current global state
State() State
// Plan returns the current plan
Plan() *structs.Plan
// Logger provides a way to log
Logger() log.Logger
// Metrics returns the current metrics
Metrics() *structs.AllocMetric
// Reset is invoked after making a placement
Reset()
// ProposedAllocs returns the proposed allocations for a node which are
// the existing allocations, removing evictions, and adding any planned
// placements.
ProposedAllocs(nodeID string) ([]*structs.Allocation, error)
// RegexpCache is a cache of regular expressions
RegexpCache() map[string]*regexp.Regexp
// VersionConstraintCache is a cache of version constraints
VersionConstraintCache() map[string]VerConstraints
// SemverConstraintCache is a cache of semver constraints
SemverConstraintCache() map[string]VerConstraints
// Eligibility returns a tracker for node eligibility in the context of the
// eval.
Eligibility() *EvalEligibility
// SendEvent provides best-effort delivery of scheduling and placement
// events.
SendEvent(event interface{})
}
// EvalCache is used to cache certain things during an evaluation
type EvalCache struct {
reCache map[string]*regexp.Regexp
versionCache map[string]VerConstraints
semverCache map[string]VerConstraints
}
func (e *EvalCache) RegexpCache() map[string]*regexp.Regexp {
if e.reCache == nil {
e.reCache = make(map[string]*regexp.Regexp)
}
return e.reCache
}
func (e *EvalCache) VersionConstraintCache() map[string]VerConstraints {
if e.versionCache == nil {
e.versionCache = make(map[string]VerConstraints)
}
return e.versionCache
}
func (e *EvalCache) SemverConstraintCache() map[string]VerConstraints {
if e.semverCache == nil {
e.semverCache = make(map[string]VerConstraints)
}
return e.semverCache
}
// PortCollisionEvent is an event that can happen during scheduling when
// an unexpected port collision is detected.
type PortCollisionEvent struct {
Reason string
Node *structs.Node
Allocations []*structs.Allocation
// TODO: this is a large struct, but may be required to debug unexpected
// port collisions. Re-evaluate its need in the future if the bug is fixed
// or not caused by this field.
NetIndex *structs.NetworkIndex
}
func (ev *PortCollisionEvent) Copy() *PortCollisionEvent {
if ev == nil {
return nil
}
c := new(PortCollisionEvent)
*c = *ev
c.Node = ev.Node.Copy()
if len(ev.Allocations) > 0 {
for i, a := range ev.Allocations {
c.Allocations[i] = a.Copy()
}
}
c.NetIndex = ev.NetIndex.Copy()
return c
}
func (ev *PortCollisionEvent) Sanitize() *PortCollisionEvent {
if ev == nil {
return nil
}
clean := ev.Copy()
clean.Node = ev.Node.Sanitize()
clean.Node.Meta = make(map[string]string)
for i, alloc := range ev.Allocations {
clean.Allocations[i] = alloc.CopySkipJob()
clean.Allocations[i].Job = nil
}
return clean
}
// EvalContext is a Context used during an Evaluation
type EvalContext struct {
EvalCache
eventsCh chan<- interface{}
state State
plan *structs.Plan
logger log.Logger
metrics *structs.AllocMetric
eligibility *EvalEligibility
}
// NewEvalContext constructs a new EvalContext
func NewEvalContext(eventsCh chan<- interface{}, s State, p *structs.Plan, log log.Logger) *EvalContext {
ctx := &EvalContext{
eventsCh: eventsCh,
state: s,
plan: p,
logger: log,
metrics: new(structs.AllocMetric),
}
return ctx
}
func (e *EvalContext) State() State {
return e.state
}
func (e *EvalContext) Plan() *structs.Plan {
return e.plan
}
func (e *EvalContext) Logger() log.Logger {
return e.logger
}
func (e *EvalContext) Metrics() *structs.AllocMetric {
return e.metrics
}
func (e *EvalContext) SetState(s State) {
e.state = s
}
func (e *EvalContext) Reset() {
e.metrics = new(structs.AllocMetric)
}
func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, error) {
// Get the existing allocations that are non-terminal
ws := memdb.NewWatchSet()
proposed, err := e.state.AllocsByNode(ws, nodeID)
if err != nil {
return nil, err
}
// Determine the proposed allocation by first removing allocations
// that are planned evictions and adding the new allocations.
if update := e.plan.NodeUpdate[nodeID]; len(update) > 0 {
proposed = structs.RemoveAllocs(proposed, update)
}
// Remove any allocs that are being preempted
nodePreemptedAllocs := e.plan.NodePreemptions[nodeID]
if len(nodePreemptedAllocs) > 0 {
proposed = structs.RemoveAllocs(proposed, nodePreemptedAllocs)
}
// We create an index of the existing allocations so that if an inplace
// update occurs, we do not double count and we override the old allocation.
proposedIDs := make(map[string]*structs.Allocation, len(proposed))
for _, alloc := range proposed {
if alloc.ClientTerminalStatus() {
continue
}
proposedIDs[alloc.ID] = alloc
}
for _, alloc := range e.plan.NodeAllocation[nodeID] {
proposedIDs[alloc.ID] = alloc
}
// Materialize the proposed slice
proposed = make([]*structs.Allocation, 0, len(proposedIDs))
for _, alloc := range proposedIDs {
proposed = append(proposed, alloc)
}
return proposed, nil
}
func (e *EvalContext) Eligibility() *EvalEligibility {
if e.eligibility == nil {
e.eligibility = NewEvalEligibility()
}
return e.eligibility
}
func (e *EvalContext) SendEvent(event interface{}) {
if e == nil || e.eventsCh == nil {
return
}
select {
case e.eventsCh <- event:
default:
}
}
type ComputedClassFeasibility byte
const (
// EvalComputedClassUnknown is the initial state until the eligibility has
// been explicitly marked to eligible/ineligible or escaped.
EvalComputedClassUnknown ComputedClassFeasibility = iota
// EvalComputedClassIneligible is used to mark the computed class as
// ineligible for the evaluation.
EvalComputedClassIneligible
// EvalComputedClassIneligible is used to mark the computed class as
// eligible for the evaluation.
EvalComputedClassEligible
// EvalComputedClassEscaped signals that computed class can not determine
// eligibility because a constraint exists that is not captured by computed
// node classes.
EvalComputedClassEscaped
)
// EvalEligibility tracks eligibility of nodes by computed node class over the
// course of an evaluation.
type EvalEligibility struct {
// job tracks the eligibility at the job level per computed node class.
job map[string]ComputedClassFeasibility
// jobEscaped marks whether constraints have escaped at the job level.
jobEscaped bool
// taskGroups tracks the eligibility at the task group level per computed
// node class.
taskGroups map[string]map[string]ComputedClassFeasibility
// tgEscapedConstraints is a map of task groups to whether constraints have
// escaped.
tgEscapedConstraints map[string]bool
// quotaReached marks that the quota limit has been reached for the given
// quota
quotaReached string
}
// NewEvalEligibility returns an eligibility tracker for the context of an evaluation.
func NewEvalEligibility() *EvalEligibility {
return &EvalEligibility{
job: make(map[string]ComputedClassFeasibility),
taskGroups: make(map[string]map[string]ComputedClassFeasibility),
tgEscapedConstraints: make(map[string]bool),
}
}
// SetJob takes the job being evaluated and calculates the escaped constraints
// at the job and task group level.
func (e *EvalEligibility) SetJob(job *structs.Job) {
// Determine whether the job has escaped constraints.
e.jobEscaped = len(structs.EscapedConstraints(job.Constraints)) != 0
// Determine the escaped constraints per task group.
for _, tg := range job.TaskGroups {
constraints := tg.Constraints
for _, task := range tg.Tasks {
constraints = append(constraints, task.Constraints...)
}
e.tgEscapedConstraints[tg.Name] = len(structs.EscapedConstraints(constraints)) != 0
}
}
// HasEscaped returns whether any of the constraints in the passed job have
// escaped computed node classes.
func (e *EvalEligibility) HasEscaped() bool {
if e.jobEscaped {
return true
}
for _, escaped := range e.tgEscapedConstraints {
if escaped {
return true
}
}
return false
}
// GetClasses returns the tracked classes to their eligibility, across the job
// and task groups.
func (e *EvalEligibility) GetClasses() map[string]bool {
elig := make(map[string]bool)
// Go through the task groups.
for _, classes := range e.taskGroups {
for class, feas := range classes {
switch feas {
case EvalComputedClassEligible:
elig[class] = true
case EvalComputedClassIneligible:
// Only mark as ineligible if it hasn't been marked before. This
// prevents one task group marking a class as ineligible when it
// is eligible on another task group.
if _, ok := elig[class]; !ok {
elig[class] = false
}
}
}
}
// Go through the job.
for class, feas := range e.job {
switch feas {
case EvalComputedClassEligible:
// Only mark as eligible if it hasn't been marked before. This
// prevents the job marking a class as eligible when it is ineligible
// to all the task groups.
if _, ok := elig[class]; !ok {
elig[class] = true
}
case EvalComputedClassIneligible:
elig[class] = false
}
}
return elig
}
// JobStatus returns the eligibility status of the job.
func (e *EvalEligibility) JobStatus(class string) ComputedClassFeasibility {
if e.jobEscaped {
return EvalComputedClassEscaped
}
if status, ok := e.job[class]; ok {
return status
}
return EvalComputedClassUnknown
}
// SetJobEligibility sets the eligibility status of the job for the computed
// node class.
func (e *EvalEligibility) SetJobEligibility(eligible bool, class string) {
if eligible {
e.job[class] = EvalComputedClassEligible
} else {
e.job[class] = EvalComputedClassIneligible
}
}
// TaskGroupStatus returns the eligibility status of the task group.
func (e *EvalEligibility) TaskGroupStatus(tg, class string) ComputedClassFeasibility {
if escaped, ok := e.tgEscapedConstraints[tg]; ok {
if escaped {
return EvalComputedClassEscaped
}
}
if classes, ok := e.taskGroups[tg]; ok {
if status, ok := classes[class]; ok {
return status
}
}
return EvalComputedClassUnknown
}
// SetTaskGroupEligibility sets the eligibility status of the task group for the
// computed node class.
func (e *EvalEligibility) SetTaskGroupEligibility(eligible bool, tg, class string) {
var eligibility ComputedClassFeasibility
if eligible {
eligibility = EvalComputedClassEligible
} else {
eligibility = EvalComputedClassIneligible
}
if classes, ok := e.taskGroups[tg]; ok {
classes[class] = eligibility
} else {
e.taskGroups[tg] = map[string]ComputedClassFeasibility{class: eligibility}
}
}
// SetQuotaLimitReached marks that the quota limit has been reached for the
// given quota
func (e *EvalEligibility) SetQuotaLimitReached(quota string) {
e.quotaReached = quota
}
// QuotaLimitReached returns the quota name if the quota limit has been reached.
func (e *EvalEligibility) QuotaLimitReached() string {
return e.quotaReached
}