system: re-evaluate node on feasibility changes (#11007)

Fix a bug where system jobs may fail to be placed on a node that
initially was not eligible for system job placement.

This changes causes the reschedule to re-evaluate the node if any
attribute used in feasibility checks changes.

Fixes https://github.com/hashicorp/nomad/issues/8448
This commit is contained in:
Mahmood Ali 2021-08-10 17:17:44 -04:00 committed by GitHub
parent bfc766357e
commit ea003188fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 6 deletions

3
.changelog/11007.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
scheduler: Re-evaluate nodes for system jobs after attributes changes
```

View File

@ -3,6 +3,7 @@ package nomad
import ( import (
"context" "context"
"fmt" "fmt"
"reflect"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -169,12 +170,7 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp
reply.NodeModifyIndex = index reply.NodeModifyIndex = index
// Check if we should trigger evaluations // Check if we should trigger evaluations
originalStatus := structs.NodeStatusInit if shouldCreateNodeEval(originalNode, args.Node) {
if originalNode != nil {
originalStatus = originalNode.Status
}
transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
if err != nil { if err != nil {
n.logger.Error("eval creation failed", "error", err) n.logger.Error("eval creation failed", "error", err)
@ -211,6 +207,56 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp
return nil return nil
} }
// shouldCreateNodeEval returns true if the node update may result into
// allocation updates, so the node should be re-evaluating.
//
// Such cases might be:
// * node health/drain status changes that may result into alloc rescheduling
// * node drivers or attributes changing that may cause system job placement changes
func shouldCreateNodeEval(original, updated *structs.Node) bool {
if structs.ShouldDrainNode(updated.Status) {
return true
}
if original == nil {
return transitionedToReady(updated.Status, structs.NodeStatusInit)
}
if transitionedToReady(updated.Status, original.Status) {
return true
}
// check fields used by the feasibility checks in ../scheduler/feasible.go,
// whether through a Constraint explicitly added by user or an implicit constraint
// added through a driver/volume check.
//
// Node Resources (e.g. CPU/Memory) are handled differently, using blocked evals,
// and not relevant in this check.
return !(original.ID == updated.ID &&
original.Datacenter == updated.Datacenter &&
original.Name == updated.Name &&
original.NodeClass == updated.NodeClass &&
reflect.DeepEqual(original.Attributes, updated.Attributes) &&
reflect.DeepEqual(original.Meta, updated.Meta) &&
reflect.DeepEqual(original.Drivers, updated.Drivers) &&
reflect.DeepEqual(original.HostVolumes, updated.HostVolumes) &&
equalDevices(original, updated))
}
func equalDevices(n1, n2 *structs.Node) bool {
// ignore super old nodes, mostly to avoid nil dereferencing
if n1.NodeResources == nil || n2.NodeResources == nil {
return n1.NodeResources == n2.NodeResources
}
// treat nil and empty value as equal
if len(n1.NodeResources.Devices) == 0 {
return len(n1.NodeResources.Devices) == len(n2.NodeResources.Devices)
}
return reflect.DeepEqual(n1.NodeResources.Devices, n2.NodeResources.Devices)
}
// updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
reply.LeaderRPCAddr = string(n.srv.raft.Leader()) reply.LeaderRPCAddr = string(n.srv.raft.Leader())

View File

@ -3602,3 +3602,56 @@ func TestClientEndpoint_EmitEvents(t *testing.T) {
require.Nil(err) require.Nil(err)
require.False(len(out.Events) < 2) require.False(len(out.Events) < 2)
} }
func TestClientEndpoint_ShouldCreateNodeEval(t *testing.T) {
t.Run("spurious changes don't require eval", func(t *testing.T) {
n1 := mock.Node()
n2 := n1.Copy()
n2.SecretID = uuid.Generate()
n2.Links["vault"] = "links don't get interpolated"
n2.ModifyIndex++
require.False(t, shouldCreateNodeEval(n1, n2))
})
positiveCases := []struct {
name string
updateFn func(n *structs.Node)
}{
{
"data center changes",
func(n *structs.Node) { n.Datacenter += "u" },
},
{
"attribute change",
func(n *structs.Node) { n.Attributes["test.attribute"] = "something" },
},
{
"meta change",
func(n *structs.Node) { n.Meta["test.meta"] = "something" },
},
{
"drivers health changed",
func(n *structs.Node) { n.Drivers["exec"].Detected = false },
},
{
"new drivers",
func(n *structs.Node) {
n.Drivers["newdriver"] = &structs.DriverInfo{
Detected: true,
Healthy: true,
}
},
},
}
for _, c := range positiveCases {
t.Run(c.name, func(t *testing.T) {
n1 := mock.Node()
n2 := n1.Copy()
c.updateFn(n2)
require.Truef(t, shouldCreateNodeEval(n1, n2), "node changed but without node eval: %v", pretty.Diff(n1, n2))
})
}
}