diff --git a/.changelog/11007.txt b/.changelog/11007.txt new file mode 100644 index 000000000..b30fb63a6 --- /dev/null +++ b/.changelog/11007.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: Re-evaluate nodes for system jobs after attributes changes +``` diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 1431a1ceb..e85a29afa 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -3,6 +3,7 @@ package nomad import ( "context" "fmt" + "reflect" "strings" "sync" "time" @@ -169,12 +170,7 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp reply.NodeModifyIndex = index // Check if we should trigger evaluations - originalStatus := structs.NodeStatusInit - if originalNode != nil { - originalStatus = originalNode.Status - } - transitionToReady := transitionedToReady(args.Node.Status, originalStatus) - if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { + if shouldCreateNodeEval(originalNode, args.Node) { evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) if err != nil { n.logger.Error("eval creation failed", "error", err) @@ -211,6 +207,56 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp return nil } +// shouldCreateNodeEval returns true if the node update may result into +// allocation updates, so the node should be re-evaluating. +// +// Such cases might be: +// * node health/drain status changes that may result into alloc rescheduling +// * node drivers or attributes changing that may cause system job placement changes +func shouldCreateNodeEval(original, updated *structs.Node) bool { + if structs.ShouldDrainNode(updated.Status) { + return true + } + + if original == nil { + return transitionedToReady(updated.Status, structs.NodeStatusInit) + } + + if transitionedToReady(updated.Status, original.Status) { + return true + } + + // check fields used by the feasibility checks in ../scheduler/feasible.go, + // whether through a Constraint explicitly added by user or an implicit constraint + // added through a driver/volume check. + // + // Node Resources (e.g. CPU/Memory) are handled differently, using blocked evals, + // and not relevant in this check. + return !(original.ID == updated.ID && + original.Datacenter == updated.Datacenter && + original.Name == updated.Name && + original.NodeClass == updated.NodeClass && + reflect.DeepEqual(original.Attributes, updated.Attributes) && + reflect.DeepEqual(original.Meta, updated.Meta) && + reflect.DeepEqual(original.Drivers, updated.Drivers) && + reflect.DeepEqual(original.HostVolumes, updated.HostVolumes) && + equalDevices(original, updated)) +} + +func equalDevices(n1, n2 *structs.Node) bool { + // ignore super old nodes, mostly to avoid nil dereferencing + if n1.NodeResources == nil || n2.NodeResources == nil { + return n1.NodeResources == n2.NodeResources + } + + // treat nil and empty value as equal + if len(n1.NodeResources.Devices) == 0 { + return len(n1.NodeResources.Devices) == len(n2.NodeResources.Devices) + } + + return reflect.DeepEqual(n1.NodeResources.Devices, n2.NodeResources.Devices) +} + // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = string(n.srv.raft.Leader()) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 188fc50f5..2fc40f636 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -3602,3 +3602,56 @@ func TestClientEndpoint_EmitEvents(t *testing.T) { require.Nil(err) require.False(len(out.Events) < 2) } + +func TestClientEndpoint_ShouldCreateNodeEval(t *testing.T) { + t.Run("spurious changes don't require eval", func(t *testing.T) { + n1 := mock.Node() + n2 := n1.Copy() + n2.SecretID = uuid.Generate() + n2.Links["vault"] = "links don't get interpolated" + n2.ModifyIndex++ + + require.False(t, shouldCreateNodeEval(n1, n2)) + }) + + positiveCases := []struct { + name string + updateFn func(n *structs.Node) + }{ + { + "data center changes", + func(n *structs.Node) { n.Datacenter += "u" }, + }, + { + "attribute change", + func(n *structs.Node) { n.Attributes["test.attribute"] = "something" }, + }, + { + "meta change", + func(n *structs.Node) { n.Meta["test.meta"] = "something" }, + }, + { + "drivers health changed", + func(n *structs.Node) { n.Drivers["exec"].Detected = false }, + }, + { + "new drivers", + func(n *structs.Node) { + n.Drivers["newdriver"] = &structs.DriverInfo{ + Detected: true, + Healthy: true, + } + }, + }, + } + + for _, c := range positiveCases { + t.Run(c.name, func(t *testing.T) { + n1 := mock.Node() + n2 := n1.Copy() + c.updateFn(n2) + + require.Truef(t, shouldCreateNodeEval(n1, n2), "node changed but without node eval: %v", pretty.Diff(n1, n2)) + }) + } +}