8a6b1acaa6
The whole approach to monitoring drains has ordering issues and lacks state to output useful error messages. AFAICT to get the tests passing reliably I needed to change the behavior of monitoring. Parts of these tests are skipped in CI, and they should be rewritten as e2e tests.
336 lines
9.6 KiB
Go
336 lines
9.6 KiB
Go
package command
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/hashicorp/nomad/api"
|
||
"github.com/hashicorp/nomad/api/contexts"
|
||
"github.com/posener/complete"
|
||
)
|
||
|
||
var (
|
||
// defaultDrainDuration is the default drain duration if it is not specified
|
||
// explicitly
|
||
defaultDrainDuration = 1 * time.Hour
|
||
)
|
||
|
||
type NodeDrainCommand struct {
|
||
Meta
|
||
}
|
||
|
||
func (c *NodeDrainCommand) Help() string {
|
||
helpText := `
|
||
Usage: nomad node drain [options] <node>
|
||
|
||
Toggles node draining on a specified node. It is required
|
||
that either -enable or -disable is specified, but not both.
|
||
The -self flag is useful to drain the local node.
|
||
|
||
General Options:
|
||
|
||
` + generalOptionsUsage() + `
|
||
|
||
Node Drain Options:
|
||
|
||
-disable
|
||
Disable draining for the specified node.
|
||
|
||
-enable
|
||
Enable draining for the specified node.
|
||
|
||
-deadline <duration>
|
||
Set the deadline by which all allocations must be moved off the node.
|
||
Remaining allocations after the deadline are forced removed from the node.
|
||
If unspecified, a default deadline of one hour is applied.
|
||
|
||
-detach
|
||
Return immediately instead of entering monitor mode.
|
||
|
||
-monitor
|
||
Enter monitor mode directly without modifying the drain status.
|
||
|
||
-force
|
||
Force remove allocations off the node immediately.
|
||
|
||
-no-deadline
|
||
No deadline allows the allocations to drain off the node without being force
|
||
stopped after a certain deadline.
|
||
|
||
-ignore-system
|
||
Ignore system allows the drain to complete without stopping system job
|
||
allocations. By default system jobs are stopped last.
|
||
|
||
-keep-ineligible
|
||
Keep ineligible will maintain the node's scheduling ineligibility even if
|
||
the drain is being disabled. This is useful when an existing drain is being
|
||
cancelled but additional scheduling on the node is not desired.
|
||
|
||
-self
|
||
Set the drain status of the local node.
|
||
|
||
-yes
|
||
Automatic yes to prompts.
|
||
`
|
||
return strings.TrimSpace(helpText)
|
||
}
|
||
|
||
func (c *NodeDrainCommand) Synopsis() string {
|
||
return "Toggle drain mode on a given node"
|
||
}
|
||
|
||
func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
|
||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||
complete.Flags{
|
||
"-disable": complete.PredictNothing,
|
||
"-enable": complete.PredictNothing,
|
||
"-deadline": complete.PredictAnything,
|
||
"-detach": complete.PredictNothing,
|
||
"-force": complete.PredictNothing,
|
||
"-no-deadline": complete.PredictNothing,
|
||
"-ignore-system": complete.PredictNothing,
|
||
"-keep-ineligible": complete.PredictNothing,
|
||
"-self": complete.PredictNothing,
|
||
"-yes": complete.PredictNothing,
|
||
})
|
||
}
|
||
|
||
func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
|
||
return complete.PredictFunc(func(a complete.Args) []string {
|
||
client, err := c.Meta.Client()
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
|
||
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil)
|
||
if err != nil {
|
||
return []string{}
|
||
}
|
||
return resp.Matches[contexts.Nodes]
|
||
})
|
||
}
|
||
|
||
func (c *NodeDrainCommand) Name() string { return "node-drain" }
|
||
|
||
func (c *NodeDrainCommand) Run(args []string) int {
|
||
var enable, disable, detach, force,
|
||
noDeadline, ignoreSystem, keepIneligible,
|
||
self, autoYes, monitor bool
|
||
var deadline string
|
||
|
||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
|
||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||
flags.BoolVar(&enable, "enable", false, "Enable drain mode")
|
||
flags.BoolVar(&disable, "disable", false, "Disable drain mode")
|
||
flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped")
|
||
flags.BoolVar(&detach, "detach", false, "")
|
||
flags.BoolVar(&force, "force", false, "Force immediate drain")
|
||
flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
|
||
flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
|
||
flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
|
||
flags.BoolVar(&self, "self", false, "")
|
||
flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
|
||
flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.")
|
||
|
||
if err := flags.Parse(args); err != nil {
|
||
return 1
|
||
}
|
||
|
||
// Check that enable or disable is not set with monitor
|
||
if monitor && (enable || disable) {
|
||
c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
|
||
// Check that we got either enable or disable, but not both.
|
||
if (enable && disable) || (!monitor && !enable && !disable) {
|
||
c.Ui.Error("Ethier the '-enable' or '-disable' flag must be set, unless using '-monitor'")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
|
||
// Check that we got a node ID
|
||
args = flags.Args()
|
||
if l := len(args); self && l != 0 || !self && l != 1 {
|
||
c.Ui.Error("Node ID must be specified if -self isn't being used")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
|
||
// Validate a compatible set of flags were set
|
||
if disable && (deadline != "" || force || noDeadline || ignoreSystem) {
|
||
c.Ui.Error("-disable can't be combined with flags configuring drain strategy")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
if deadline != "" && (force || noDeadline) {
|
||
c.Ui.Error("-deadline can't be combined with -force or -no-deadline")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
if force && noDeadline {
|
||
c.Ui.Error("-force and -no-deadline are mutually exclusive")
|
||
c.Ui.Error(commandErrorText(c))
|
||
return 1
|
||
}
|
||
|
||
// Parse the duration
|
||
var d time.Duration
|
||
if force {
|
||
d = -1 * time.Second
|
||
} else if noDeadline {
|
||
d = 0
|
||
} else if deadline != "" {
|
||
dur, err := time.ParseDuration(deadline)
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err))
|
||
return 1
|
||
}
|
||
if dur <= 0 {
|
||
c.Ui.Error("A positive drain duration must be given")
|
||
return 1
|
||
}
|
||
|
||
d = dur
|
||
} else {
|
||
d = defaultDrainDuration
|
||
}
|
||
|
||
// Get the HTTP client
|
||
client, err := c.Meta.Client()
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
|
||
return 1
|
||
}
|
||
|
||
// If -self flag is set then determine the current node.
|
||
var nodeID string
|
||
if !self {
|
||
nodeID = args[0]
|
||
} else {
|
||
var err error
|
||
if nodeID, err = getLocalNodeID(client); err != nil {
|
||
c.Ui.Error(err.Error())
|
||
return 1
|
||
}
|
||
}
|
||
|
||
// Check if node exists
|
||
if len(nodeID) == 1 {
|
||
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
||
return 1
|
||
}
|
||
|
||
nodeID = sanitizeUUIDPrefix(nodeID)
|
||
nodes, _, err := client.Nodes().PrefixList(nodeID)
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
||
return 1
|
||
}
|
||
// Return error if no nodes are found
|
||
if len(nodes) == 0 {
|
||
c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
|
||
return 1
|
||
}
|
||
if len(nodes) > 1 {
|
||
c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s",
|
||
formatNodeStubList(nodes, true)))
|
||
return 1
|
||
}
|
||
|
||
// Prefix lookup matched a single node
|
||
node, meta, err := client.Nodes().Info(nodes[0].ID, nil)
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
||
return 1
|
||
}
|
||
|
||
// If monitoring the drain start the montior and return when done
|
||
if monitor {
|
||
if node.DrainStrategy == nil {
|
||
c.Ui.Warn("No drain strategy set")
|
||
return 0
|
||
}
|
||
c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID))
|
||
c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem)
|
||
return 0
|
||
}
|
||
|
||
// Confirm drain if the node was a prefix match.
|
||
if nodeID != node.ID && !autoYes {
|
||
verb := "enable"
|
||
if disable {
|
||
verb = "disable"
|
||
}
|
||
question := fmt.Sprintf("Are you sure you want to %s drain mode for node %q? [y/N]", verb, node.ID)
|
||
answer, err := c.Ui.Ask(question)
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Failed to parse answer: %v", err))
|
||
return 1
|
||
}
|
||
|
||
if answer == "" || strings.ToLower(answer)[0] == 'n' {
|
||
// No case
|
||
c.Ui.Output("Canceling drain toggle")
|
||
return 0
|
||
} else if strings.ToLower(answer)[0] == 'y' && len(answer) > 1 {
|
||
// Non exact match yes
|
||
c.Ui.Output("For confirmation, an exact ‘y’ is required.")
|
||
return 0
|
||
} else if answer != "y" {
|
||
c.Ui.Output("No confirmation detected. For confirmation, an exact 'y' is required.")
|
||
return 1
|
||
}
|
||
}
|
||
|
||
var spec *api.DrainSpec
|
||
if enable {
|
||
spec = &api.DrainSpec{
|
||
Deadline: d,
|
||
IgnoreSystemJobs: ignoreSystem,
|
||
}
|
||
}
|
||
|
||
// Toggle node draining
|
||
updateMeta, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil)
|
||
if err != nil {
|
||
c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
|
||
return 1
|
||
}
|
||
|
||
if !enable || detach {
|
||
if enable {
|
||
c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID))
|
||
} else {
|
||
c.Ui.Output(fmt.Sprintf("Node %q drain strategy unset", node.ID))
|
||
}
|
||
}
|
||
|
||
if enable && !detach {
|
||
now := time.Now()
|
||
c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now)))
|
||
c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID))
|
||
c.monitorDrain(client, context.Background(), node, updateMeta.LastIndex, ignoreSystem)
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) {
|
||
outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem)
|
||
for msg := range outCh {
|
||
switch msg.Level {
|
||
case api.MonitorMsgLevelInfo:
|
||
c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
|
||
case api.MonitorMsgLevelWarn:
|
||
c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
|
||
case api.MonitorMsgLevelError:
|
||
c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
|
||
default:
|
||
c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
|
||
}
|
||
}
|
||
}
|