From e58998e2187886f371de07afc7a059a8c504d50e Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Thu, 8 Sep 2022 14:31:36 -0400 Subject: [PATCH] Add client scheduling eligibility to heartbeat (#14483) --- .changelog/14483.txt | 3 +++ client/client.go | 8 ++++++++ nomad/node_endpoint.go | 12 ++++++++---- nomad/structs/structs.go | 4 ++++ 4 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 .changelog/14483.txt diff --git a/.changelog/14483.txt b/.changelog/14483.txt new file mode 100644 index 000000000..07a3e141d --- /dev/null +++ b/.changelog/14483.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Update client `node_scheduling_eligibility` value with server heartbeats. +``` diff --git a/client/client.go b/client/client.go index 3f4042b33..1fa2cc512 100644 --- a/client/client.go +++ b/client/client.go @@ -1955,6 +1955,14 @@ func (c *Client) updateNodeStatus() error { } } + // Check heartbeat response for information about the server-side scheduling + // state of this node + c.UpdateConfig(func(c *config.Config) { + if resp.SchedulingEligibility != "" { + c.Node.SchedulingEligibility = resp.SchedulingEligibility + } + }) + // Update the number of nodes in the cluster so we can adjust our server // rebalance rate. c.servers.SetNumNodes(resp.NumNodes) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index c13ec70cc..689b1082e 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -199,7 +199,7 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(args.Node.ID, snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -258,7 +258,7 @@ func equalDevices(n1, n2 *structs.Node) bool { } // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. -func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { +func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = string(n.srv.raft.Leader()) // Reply with config information required for future RPC requests @@ -271,6 +271,10 @@ func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply }) } + // Add ClientStatus information to heartbeat response. + node, _ := snap.NodeByID(nil, nodeID) + reply.SchedulingEligibility = node.SchedulingEligibility + // TODO(sean@): Use an indexed node count instead // // Snapshot is used only to iterate over all nodes to create a node @@ -564,7 +568,7 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct reply.Index = index n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -821,7 +825,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b5e5a25bf..819b378e7 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1361,6 +1361,10 @@ type NodeUpdateResponse struct { // region. Servers []*NodeServerInfo + // SchedulingEligibility is used to inform clients what the server-side + // has for their scheduling status during heartbeats. + SchedulingEligibility string + QueryMeta }