From a2ceab3d8c9b3f9537ea719d00d2780197823eac Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 7 Mar 2023 10:05:59 -0500 Subject: [PATCH] scheduler: correctly detect inplace update with wildcard datacenters (#16362) Wildcard datacenters introduced a bug where a job with any wildcard datacenters will always be treated as a destructive update when we check whether a datacenter has been removed from the jobspec. Includes updating the helper so that callers don't have to loop over the job's datacenters. --- .changelog/16362.txt | 3 +++ nomad/node_endpoint.go | 7 ++----- nomad/structs/structs.go | 9 +++++++-- scheduler/util.go | 14 +++++--------- scheduler/util_test.go | 30 ++++++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 16 deletions(-) create mode 100644 .changelog/16362.txt diff --git a/.changelog/16362.txt b/.changelog/16362.txt new file mode 100644 index 000000000..3e9cc508e --- /dev/null +++ b/.changelog/16362.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: Fixed a bug where allocs of system jobs with wildcard datacenters would be destructively updated +``` diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 732b90289..a13e80a75 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -1620,11 +1620,8 @@ func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string, // here, but datacenter is a good optimization to start with as // datacenter cardinality tends to be low so the check // shouldn't add much work. - for _, dc := range job.Datacenters { - if node.IsInDC(dc) { - sysJobs = append(sysJobs, job) - break - } + if node.IsInAnyDC(job.Datacenters) { + sysJobs = append(sysJobs, job) } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 52d1eb0cc..efa4d0fc6 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2311,8 +2311,13 @@ func (n *Node) ComparableResources() *ComparableResources { } } -func (n *Node) IsInDC(dc string) bool { - return glob.Glob(dc, n.Datacenter) +func (n *Node) IsInAnyDC(datacenters []string) bool { + for _, dc := range datacenters { + if glob.Glob(dc, n.Datacenter) { + return true + } + } + return false } // Stub returns a summarized version of the node diff --git a/scheduler/util.go b/scheduler/util.go index 0a4572a0b..a9499a5e2 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -9,7 +9,6 @@ import ( log "github.com/hashicorp/go-hclog" memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/nomad/structs" - "golang.org/x/exp/slices" ) // allocTuple is a tuple of the allocation name and potential alloc ID @@ -66,12 +65,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]str notReady[node.ID] = struct{}{} continue } - for _, dc := range dcs { - if node.IsInDC(dc) { - out = append(out, node) - dcMap[node.Datacenter]++ - break - } + if node.IsInAnyDC(dcs) { + out = append(out, node) + dcMap[node.Datacenter]++ } } return out, notReady, dcMap, nil @@ -558,7 +554,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, } // The alloc is on a node that's now in an ineligible DC - if !slices.Contains(job.Datacenters, node.Datacenter) { + if !node.IsInAnyDC(job.Datacenters) { continue } @@ -802,7 +798,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy } // The alloc is on a node that's now in an ineligible DC - if !slices.Contains(newJob.Datacenters, node.Datacenter) { + if !node.IsInAnyDC(newJob.Datacenters) { return false, true, nil } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 9936f47a0..8d853f5bc 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -320,6 +320,7 @@ func TestTaskUpdatedSpread(t *testing.T) { require.False(t, tasksUpdated(j5, j6, name)) } + func TestTasksUpdated(t *testing.T) { ci.Parallel(t) @@ -972,6 +973,35 @@ func TestInplaceUpdate_Success(t *testing.T) { } } +func TestInplaceUpdate_WildcardDatacenters(t *testing.T) { + ci.Parallel(t) + + store, ctx := testContext(t) + eval := mock.Eval() + job := mock.Job() + job.Datacenters = []string{"*"} + + node := mock.Node() + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 900, node)) + + // Register an alloc + alloc := mock.AllocForNode(node) + alloc.Job = job + alloc.JobID = job.ID + must.NoError(t, store.UpsertJobSummary(1000, mock.JobSummary(alloc.JobID))) + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1001, []*structs.Allocation{alloc})) + + updates := []allocTuple{{Alloc: alloc, TaskGroup: job.TaskGroups[0]}} + stack := NewGenericStack(false, ctx) + unplaced, inplace := inplaceUpdate(ctx, eval, job, stack, updates) + + must.Len(t, 1, inplace, + must.Sprintf("inplaceUpdate should have an inplace update")) + must.Len(t, 0, unplaced) + must.MapNotEmpty(t, ctx.plan.NodeAllocation, + must.Sprintf("inplaceUpdate should have an inplace update")) +} + func TestUtil_connectUpdated(t *testing.T) { ci.Parallel(t)