diff --git a/.changelog/16827.txt b/.changelog/16827.txt
new file mode 100644
index 000000000..8b44857cd
--- /dev/null
+++ b/.changelog/16827.txt
@@ -0,0 +1,3 @@
+```release-note:improvement
+client: Added `drain_on_shutdown` configuration
+```
diff --git a/client/client.go b/client/client.go
index bd1e8f37d..c2864d75c 100644
--- a/client/client.go
+++ b/client/client.go
@@ -761,8 +761,12 @@ func (c *Client) Reload(newConfig *config.Config) error {
// Leave is used to prepare the client to leave the cluster
func (c *Client) Leave() error {
- // TODO
- return nil
+ if c.GetConfig().DevMode {
+ return nil
+ }
+
+ // In normal mode optionally drain the node
+ return c.DrainSelf()
}
// GetConfig returns the config of the client. Do *not* mutate without first
diff --git a/client/config/config.go b/client/config/config.go
index 0e32db3fa..0f4b3ea0a 100644
--- a/client/config/config.go
+++ b/client/config/config.go
@@ -316,6 +316,9 @@ type Config struct {
// Artifact configuration from the agent's config file.
Artifact *ArtifactConfig
+
+ // Drain configuration from the agent's config file.
+ Drain *DrainConfig
}
type APIListenerRegistrar interface {
diff --git a/client/config/drain.go b/client/config/drain.go
new file mode 100644
index 000000000..539f5f2dc
--- /dev/null
+++ b/client/config/drain.go
@@ -0,0 +1,55 @@
+package config
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/hashicorp/nomad/nomad/structs/config"
+)
+
+// DrainConfig describes a Node's drain behavior on graceful shutdown.
+type DrainConfig struct {
+ // Deadline is the duration after the drain starts when client will stop
+ // waiting for allocations to stop.
+ Deadline time.Duration
+
+ // IgnoreSystemJobs allows systems jobs to remain on the node even though it
+ // has been marked for draining.
+ IgnoreSystemJobs bool
+
+ // Force causes the drain to stop all the allocations immediately, ignoring
+ // their jobs' migrate blocks.
+ Force bool
+}
+
+// DrainConfigFromAgent creates the internal read-only copy of the client
+// agent's DrainConfig.
+func DrainConfigFromAgent(c *config.DrainConfig) (*DrainConfig, error) {
+ if c == nil {
+ return nil, nil
+ }
+
+ deadline := time.Hour
+ ignoreSystemJobs := false
+ force := false
+
+ if c.Deadline != nil {
+ var err error
+ deadline, err = time.ParseDuration(*c.Deadline)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing Deadline: %w", err)
+ }
+ }
+ if c.IgnoreSystemJobs != nil {
+ ignoreSystemJobs = *c.IgnoreSystemJobs
+ }
+ if c.Force != nil {
+ force = *c.Force
+ }
+
+ return &DrainConfig{
+ Deadline: deadline,
+ IgnoreSystemJobs: ignoreSystemJobs,
+ Force: force,
+ }, nil
+}
diff --git a/client/drain.go b/client/drain.go
new file mode 100644
index 000000000..22bc80ceb
--- /dev/null
+++ b/client/drain.go
@@ -0,0 +1,163 @@
+package client
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/hashicorp/nomad/client/config"
+ "github.com/hashicorp/nomad/helper"
+ "github.com/hashicorp/nomad/nomad/structs"
+)
+
+func (c *Client) DrainSelf() error {
+ drainSpec := c.GetConfig().Drain
+ if drainSpec == nil {
+ return nil
+ }
+
+ logger := c.logger.Named("drain")
+
+ now := time.Now()
+ drainReq := &structs.NodeUpdateDrainRequest{
+ NodeID: c.NodeID(),
+ DrainStrategy: &structs.DrainStrategy{
+ DrainSpec: structs.DrainSpec{
+ Deadline: drainSpec.Deadline,
+ IgnoreSystemJobs: drainSpec.IgnoreSystemJobs,
+ },
+ StartedAt: now,
+ },
+ MarkEligible: false,
+ Meta: map[string]string{"message": "shutting down"},
+ WriteRequest: structs.WriteRequest{
+ Region: c.Region(), AuthToken: c.secretNodeID()},
+ }
+ if drainSpec.Deadline > 0 {
+ drainReq.DrainStrategy.ForceDeadline = now.Add(drainSpec.Deadline)
+ }
+
+ var drainResp structs.NodeDrainUpdateResponse
+ err := c.RPC("Node.UpdateDrain", drainReq, &drainResp)
+ if err != nil {
+ return err
+ }
+
+ // note: the default deadline is 1hr but could be set to "". letting this
+ // run forever seems wrong but init system (ex systemd) will almost always
+ // force kill the client eventually
+ ctx := context.Background()
+ var cancel context.CancelFunc
+ if drainSpec.Deadline > 0 {
+ // if we set this context to the deadline, the server will reach the
+ // deadline but not get a chance to record it before this context
+ // expires, resulting in spurious errors. So extend the deadline here by
+ // a few seconds
+ ctx, cancel = context.WithTimeout(context.Background(), drainSpec.Deadline+(5*time.Second))
+ defer cancel()
+ }
+ statusCheckInterval := time.Second
+
+ logger.Info("monitoring self-drain")
+ err = c.pollServerForDrainStatus(ctx, statusCheckInterval)
+ switch err {
+ case nil:
+ logger.Debug("self-drain complete")
+ return nil
+ case context.DeadlineExceeded, context.Canceled:
+ logger.Error("self-drain exceeded deadline")
+ return fmt.Errorf("self-drain exceeded deadline")
+ default:
+ logger.Error("could not check node status, falling back to local status checks", "error", err)
+ }
+
+ err = c.pollLocalStatusForDrainStatus(ctx, statusCheckInterval, drainSpec)
+ if err != nil {
+ return fmt.Errorf("self-drain exceeded deadline")
+ }
+
+ logger.Debug("self-drain complete")
+ return nil
+}
+
+// pollServerForDrainStatus will poll the server periodically for the client's
+// drain status, returning an error if the context expires or get any error from
+// the RPC call. If this function returns nil, the drain was successful.
+func (c *Client) pollServerForDrainStatus(ctx context.Context, interval time.Duration) error {
+ timer, stop := helper.NewSafeTimer(0)
+ defer stop()
+
+ statusReq := &structs.NodeSpecificRequest{
+ NodeID: c.NodeID(),
+ SecretID: c.secretNodeID(),
+ QueryOptions: structs.QueryOptions{
+ Region: c.Region(), AuthToken: c.secretNodeID()},
+ }
+ var statusResp structs.SingleNodeResponse
+
+ for {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-timer.C:
+ err := c.RPC("Node.GetNode", statusReq, &statusResp)
+ if err != nil {
+ return err
+ }
+ if &statusResp != nil && statusResp.Node.DrainStrategy == nil {
+ return nil
+ }
+ timer.Reset(interval)
+ }
+ }
+}
+
+// pollLocalStatusForDrainStatus polls the local allocrunner state periodicially
+// for the client status of all allocation runners, returning an error if the
+// context expires or get any error from the RPC call. If this function returns
+// nil, the drain was successful. This is a fallback function in case polling
+// the server fails.
+func (c *Client) pollLocalStatusForDrainStatus(ctx context.Context,
+ interval time.Duration, drainSpec *config.DrainConfig) error {
+
+ // drainIsDone is its own function scope so we can release the allocLock
+ // between poll attempts
+ drainIsDone := func() bool {
+ c.allocLock.RLock()
+ defer c.allocLock.RUnlock()
+ for _, runner := range c.allocs {
+
+ // note: allocs in runners should never be nil or have a nil Job but
+ // if they do we can safely assume the runner is done with it
+ alloc := runner.Alloc()
+ if alloc != nil && !alloc.ClientTerminalStatus() {
+ if !drainSpec.IgnoreSystemJobs {
+ return false
+ }
+ if alloc.Job == nil {
+ continue
+ }
+ if alloc.Job.Type != structs.JobTypeSystem {
+ return false
+ }
+ }
+ }
+ return true
+ }
+
+ timer, stop := helper.NewSafeTimer(0)
+ defer stop()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-timer.C:
+ if drainIsDone() {
+ return nil
+ }
+ timer.Reset(interval)
+ }
+
+ }
+}
diff --git a/client/drain_test.go b/client/drain_test.go
new file mode 100644
index 000000000..2fc81685f
--- /dev/null
+++ b/client/drain_test.go
@@ -0,0 +1,201 @@
+package client
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "github.com/shoenig/test/must"
+ "github.com/shoenig/test/wait"
+
+ "github.com/hashicorp/nomad/ci"
+ "github.com/hashicorp/nomad/client/config"
+ "github.com/hashicorp/nomad/helper/uuid"
+ "github.com/hashicorp/nomad/nomad"
+ "github.com/hashicorp/nomad/nomad/mock"
+ "github.com/hashicorp/nomad/nomad/structs"
+ nstructs "github.com/hashicorp/nomad/nomad/structs"
+ "github.com/hashicorp/nomad/testutil"
+)
+
+// TestClient_SelfDrainConfig is an integration test of the client's Leave
+// method that exercises the behavior of the drain_on_shutdown configuration
+func TestClient_SelfDrainConfig(t *testing.T) {
+ ci.Parallel(t)
+
+ srv, _, cleanupSRV := testServer(t, nil)
+ defer cleanupSRV()
+ testutil.WaitForLeader(t, srv.RPC)
+
+ c1, cleanupC1 := TestClient(t, func(c *config.Config) {
+ c.RPCHandler = srv
+ c.DevMode = false
+ c.Drain = &config.DrainConfig{
+ Deadline: 10 * time.Second,
+ IgnoreSystemJobs: true,
+ }
+ })
+ defer cleanupC1()
+
+ jobID := "service-job-" + uuid.Short()
+ sysJobID := "system-job-" + uuid.Short()
+ testSelfDrainSetup(t, srv, c1.Node().ID, jobID, sysJobID)
+ t.Log("setup complete successful, self-draining node")
+
+ testCtx, cancel := context.WithTimeout(context.Background(), time.Second*10)
+ defer cancel()
+
+ errCh := make(chan error)
+ go func() {
+ errCh <- c1.Leave()
+ }()
+
+ select {
+ case err := <-errCh:
+ must.NoError(t, err)
+ case <-testCtx.Done():
+ t.Fatal("expected drain complete before deadline")
+ }
+
+ c1.allocLock.RLock()
+ defer c1.allocLock.RUnlock()
+ for _, runner := range c1.allocs {
+ if runner.Alloc().JobID == sysJobID {
+ must.Eq(t, structs.AllocClientStatusRunning, runner.AllocState().ClientStatus)
+ } else {
+ must.Eq(t, structs.AllocClientStatusComplete, runner.AllocState().ClientStatus)
+ }
+ }
+
+}
+
+// TestClient_SelfDrain_FailLocal is an integration test of the client's Leave
+// method that exercises the behavior when the client loses connection with the
+// server
+func TestClient_SelfDrain_FailLocal(t *testing.T) {
+ ci.Parallel(t)
+
+ srv, _, cleanupSRV := testServer(t, nil)
+ defer cleanupSRV()
+ testutil.WaitForLeader(t, srv.RPC)
+
+ c1, cleanupC1 := TestClient(t, func(c *config.Config) {
+ c.RPCHandler = srv
+ c.DevMode = false
+ c.Drain = &config.DrainConfig{Deadline: 5 * time.Second}
+ })
+ defer cleanupC1()
+
+ jobID := "service-job-" + uuid.Short()
+ sysJobID := "system-job-" + uuid.Short()
+ testSelfDrainSetup(t, srv, c1.Node().ID, jobID, sysJobID)
+
+ t.Log("setup complete successful, self-draining node and disconnecting node from server")
+
+ // note: this timeout has to cover the drain deadline plus the RPC timeout
+ // when we fail to make the RPC to the leader
+ testCtx, cancel := context.WithTimeout(context.Background(), time.Second*20)
+ defer cancel()
+
+ errCh := make(chan error)
+ go func() {
+ errCh <- c1.Leave()
+ }()
+
+ // We want to disconnect the server so that self-drain is forced to fallback
+ // to local drain behavior. But if we disconnect the server before we start
+ // the self-drain, the drain won't happen at all. So this attempts to
+ // interleave disconnecting the server between when the drain starts and the
+ // server marks the drain successful.
+ go func() {
+ req := structs.NodeSpecificRequest{
+ NodeID: c1.Node().ID,
+ QueryOptions: structs.QueryOptions{Region: "global"},
+ }
+ var out structs.SingleNodeResponse
+ for {
+ select {
+ case <-testCtx.Done():
+ return
+ default:
+ }
+ err := srv.RPC("Node.GetNode", &req, &out)
+ must.NoError(t, err)
+ if out.Node.DrainStrategy != nil {
+ cleanupSRV()
+ return
+ } else if out.Node.LastDrain != nil {
+ return // the drain is already complete
+ }
+ }
+ }()
+
+ select {
+ case err := <-errCh:
+ if err != nil {
+ // we might not be able to interleave the disconnection, so it's
+ // possible the Leave works just fine
+ must.EqError(t, err, "self-drain exceeded deadline")
+ }
+ case <-testCtx.Done():
+ t.Fatal("expected drain complete before test timeout")
+ }
+}
+
+func testSelfDrainSetup(t *testing.T, srv *nomad.Server, nodeID, jobID, sysJobID string) {
+ req := structs.NodeSpecificRequest{
+ NodeID: nodeID,
+ QueryOptions: structs.QueryOptions{Region: "global"},
+ }
+ var out structs.SingleNodeResponse
+
+ // Wait for the node to register before we drain
+ must.Wait(t, wait.InitialSuccess(
+ wait.BoolFunc(func() bool {
+ err := srv.RPC("Node.GetNode", &req, &out)
+ must.NoError(t, err)
+ return out.Node != nil
+ }),
+ wait.Timeout(5*time.Second),
+ wait.Gap(10*time.Millisecond),
+ ))
+
+ // Run a job that starts quickly
+ job := mock.Job()
+ job.ID = jobID
+ job.Constraints = nil
+ job.TaskGroups[0].Constraints = nil
+ job.TaskGroups[0].Count = 1
+ job.TaskGroups[0].Migrate = nstructs.DefaultMigrateStrategy()
+ job.TaskGroups[0].Migrate.MinHealthyTime = 100 * time.Millisecond
+ job.TaskGroups[0].Networks = []*structs.NetworkResource{}
+ job.TaskGroups[0].Tasks[0] = &structs.Task{
+ Name: "mock",
+ Driver: "mock_driver",
+ Config: map[string]interface{}{"run_for": "1m"},
+ LogConfig: structs.DefaultLogConfig(),
+ Resources: &structs.Resources{
+ CPU: 50,
+ MemoryMB: 25,
+ },
+ }
+ testutil.WaitForRunning(t, srv.RPC, job.Copy())
+
+ sysJob := mock.SystemJob()
+ sysJob.ID = sysJobID
+ sysJob.Constraints = nil
+ sysJob.TaskGroups[0].Constraints = nil
+ sysJob.TaskGroups[0].Networks = []*structs.NetworkResource{}
+ sysJob.TaskGroups[0].Tasks[0] = &structs.Task{
+ Name: "mock",
+ Driver: "mock_driver",
+ Config: map[string]interface{}{"run_for": "1m"},
+ LogConfig: structs.DefaultLogConfig(),
+ Resources: &structs.Resources{
+ CPU: 50,
+ MemoryMB: 25,
+ },
+ }
+ testutil.WaitForRunning(t, srv.RPC, sysJob.Copy())
+
+}
diff --git a/command/agent/agent.go b/command/agent/agent.go
index 9019672bc..2d2d0c4fc 100644
--- a/command/agent/agent.go
+++ b/command/agent/agent.go
@@ -853,6 +853,12 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
}
conf.Artifact = artifactConfig
+ drainConfig, err := clientconfig.DrainConfigFromAgent(agentConfig.Client.Drain)
+ if err != nil {
+ return nil, fmt.Errorf("invalid drain_on_shutdown config: %v", err)
+ }
+ conf.Drain = drainConfig
+
return conf, nil
}
diff --git a/command/agent/config.go b/command/agent/config.go
index b97eab841..e02a7ca0f 100644
--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -344,6 +344,9 @@ type ClientConfig struct {
// Artifact contains the configuration for artifacts.
Artifact *config.ArtifactConfig `hcl:"artifact"`
+ // Drain specifies whether to drain the client on shutdown; ignored in dev mode.
+ Drain *config.DrainConfig `hcl:"drain_on_shutdown"`
+
// ExtraKeysHCL is used by hcl to surface unexpected keys
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
}
@@ -366,6 +369,7 @@ func (c *ClientConfig) Copy() *ClientConfig {
nc.HostNetworks = helper.CopySlice(c.HostNetworks)
nc.NomadServiceDiscovery = pointer.Copy(c.NomadServiceDiscovery)
nc.Artifact = c.Artifact.Copy()
+ nc.Drain = c.Drain.Copy()
nc.ExtraKeysHCL = slices.Clone(c.ExtraKeysHCL)
return &nc
}
@@ -1292,6 +1296,7 @@ func DefaultConfig() *Config {
CNIConfigDir: "/opt/cni/config",
NomadServiceDiscovery: pointer.Of(true),
Artifact: config.DefaultArtifactConfig(),
+ Drain: nil,
},
Server: &ServerConfig{
Enabled: false,
@@ -2199,6 +2204,7 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
}
result.Artifact = a.Artifact.Merge(b.Artifact)
+ result.Drain = a.Drain.Merge(b.Drain)
return &result
}
diff --git a/nomad/fsm.go b/nomad/fsm.go
index b44293181..ab5d6c495 100644
--- a/nomad/fsm.go
+++ b/nomad/fsm.go
@@ -473,10 +473,19 @@ func (n *nomadFSM) applyDrainUpdate(reqType structs.MessageType, buf []byte, ind
return fmt.Errorf("error looking up ACL token: %v", err)
}
if token == nil {
- n.logger.Error("token did not exist during node drain update")
- return fmt.Errorf("token did not exist during node drain update")
+ node, err := n.state.NodeBySecretID(nil, req.AuthToken)
+ if err != nil {
+ n.logger.Error("error looking up node for drain update", "error", err)
+ return fmt.Errorf("error looking up node for drain update: %v", err)
+ }
+ if node == nil {
+ n.logger.Error("token did not exist during node drain update")
+ return fmt.Errorf("token did not exist during node drain update")
+ }
+ accessorId = node.ID
+ } else {
+ accessorId = token.AccessorID
}
- accessorId = token.AccessorID
}
if err := n.state.UpdateNodeDrain(reqType, index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.UpdatedAt,
diff --git a/nomad/structs/config/drain.go b/nomad/structs/config/drain.go
new file mode 100644
index 000000000..0df9812ce
--- /dev/null
+++ b/nomad/structs/config/drain.go
@@ -0,0 +1,49 @@
+package config
+
+import "github.com/hashicorp/nomad/helper/pointer"
+
+// DrainConfig describes a Node's drain behavior on graceful shutdown.
+type DrainConfig struct {
+ // Deadline is the duration after the drain starts when client will stop
+ // waiting for allocations to stop.
+ Deadline *string `hcl:"deadline"`
+
+ // IgnoreSystemJobs allows systems jobs to remain on the node even though it
+ // has been marked for draining.
+ IgnoreSystemJobs *bool `hcl:"ignore_system_jobs"`
+
+ // Force causes the drain to stop all the allocations immediately, ignoring
+ // their jobs' migrate blocks.
+ Force *bool `hcl:"force"`
+}
+
+func (d *DrainConfig) Copy() *DrainConfig {
+ if d == nil {
+ return nil
+ }
+
+ nd := new(DrainConfig)
+ *nd = *d
+ return nd
+}
+
+func (d *DrainConfig) Merge(o *DrainConfig) *DrainConfig {
+ switch {
+ case d == nil:
+ return o.Copy()
+ case o == nil:
+ return d.Copy()
+ default:
+ nd := d.Copy()
+ if o.Deadline != nil {
+ nd.Deadline = pointer.Copy(o.Deadline)
+ }
+ if o.IgnoreSystemJobs != nil && *o.IgnoreSystemJobs {
+ nd.IgnoreSystemJobs = pointer.Of(true)
+ }
+ if o.Force != nil && *o.Force {
+ nd.Force = pointer.Of(true)
+ }
+ return nd
+ }
+}
diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx
index e02c02b8b..eb6d2a391 100644
--- a/website/content/docs/configuration/client.mdx
+++ b/website/content/docs/configuration/client.mdx
@@ -172,6 +172,11 @@ client {
- `host_network` ([host_network](#host_network-block): nil)
- Registers
additional host networks with the node that can be selected when port mapping.
+- `drain_on_shutdown` ([drain_on_shutdown](#drain_on_shutdown-block):
+ nil)
- Controls the behavior of the client when
+ [`leave_on_interrupt`][] or [`leave_on_terminate`][] are set and the client
+ receives the appropriate signal.
+
- `cgroup_parent` `(string: "/nomad")` - Specifies the cgroup parent for which cgroup
subsystems managed by Nomad will be mounted under. Currently this only applies to the
`cpuset` subsystems. This field is ignored on non Linux platforms.
@@ -602,6 +607,50 @@ client {
[`reserved.reserved_ports`](#reserved_ports) are also reserved on each host
network.
+### `drain_on_shutdown` Block
+
+The `drain_on_shutdown` block controls the behavior of the client when
+[`leave_on_interrupt`][] or [`leave_on_terminate`][] are set. By default
+`drain_on_shutdown` is not configured and clients will not drain on any
+signal.
+
+If `drain_on_shutdown` is configured, the node will drain itself when receiving
+the appropriate signal (`SIGINT` for `leave_on_interrupt` or `SIGTERM` on
+`leave_on_terminate`). By default this acts similarly to running [`nomad node
+drain -self -no-deadline`][]
+
+Note that even if no deadline is set, your init system may send `SIGKILL` to
+Nomad if the drain takes longer than allowed by the service shutdown. For
+example, when running under Linux with `systemd`, you should adjust the
+[`TimeoutStopSec`][] value in the `nomad.service` unit file to account for
+enough time for the client to drain.
+
+```hcl
+client {
+ # Either leave_on_interrupt or leave_on_terminate must be set
+ # for this to take effect.
+ drain_on_shutdown {
+ deadline = "1h"
+ force = false
+ ignore_system_jobs = false
+ }
+}
+```
+
+- `deadline` `(string: "1h")` - Set the deadline by which all allocations must
+ be moved off the client. Remaining allocations after the deadline are removed
+ from the client, regardless of their [`migrate`][] block. Defaults to 1 hour.
+
+- `force` `(bool: false)` - Setting to `true` drains all the allocations on the
+ client immediately, ignoring the [`migrate`][] block. Note if you have
+ multiple allocations for the same job on the draining client without
+ additional allocations on other clients, this will result in an outage for
+ that job until the drain is complete.
+
+- `ignore_system_jobs` `(bool: false)` - Setting to `true` allows the drain to
+ complete without stopping system job allocations. By default system jobs (and
+ CSI plugins) are stopped last.
+
## `client` Examples
### Common Setup
@@ -666,3 +715,8 @@ client {
[task working directory]: /nomad/docs/runtime/environment#task-directories 'Task directories'
[go-sockaddr/template]: https://godoc.org/github.com/hashicorp/go-sockaddr/template
[landlock]: https://docs.kernel.org/userspace-api/landlock.html
+[`leave_on_interrupt`]: /nomad/docs/configuration#leave_on_interrupt
+[`leave_on_terminate`]: /nomad/docs/configuration#leave_on_terminate
+[migrate]: /nomad/docs/job-specification/migrate
+[`nomad node drain -self -no-deadline`]: /nomad/docs/commands/node/drain
+[`TimeoutStopSec`]: https://www.freedesktop.org/software/systemd/man/systemd.service.html#TimeoutStopSec=
diff --git a/website/content/docs/configuration/index.mdx b/website/content/docs/configuration/index.mdx
index 4366ff2ec..6a2cd0eae 100644
--- a/website/content/docs/configuration/index.mdx
+++ b/website/content/docs/configuration/index.mdx
@@ -171,17 +171,27 @@ testing.
- `http_api_response_headers` `(map: nil)` - Specifies
user-defined headers to add to the HTTP API responses.
-- `leave_on_interrupt` `(bool: false)` - Specifies if the agent should
- gracefully leave when receiving the interrupt signal. By default, the agent
- will exit forcefully on any signal. This value should only be set to true on
- server agents if it is expected that a terminated server instance will never
- join the cluster again.
+- `leave_on_interrupt` `(bool: false)` - Specifies if the agent should leave
+ when receiving the interrupt signal. By default, any stop signal to an agent
+ (interrupt or terminate) will cause the agent to exit after ensuring its
+ internal state is committed to disk as needed. If this value is set to true on
+ a server agent, the server will notify other servers of their intention to
+ leave the peer set. You should only set this value to true on server agents if
+ the terminated server will never join the cluster again. If this value is set
+ to true on a client agent and the client is configured with
+ [`drain_on_shutdown`][], the client will drain its workloads before shutting
+ down.
-- `leave_on_terminate` `(bool: false)` - Specifies if the agent should
- gracefully leave when receiving the terminate signal. By default, the agent
- will exit forcefully on any signal. This value should only be set to true on
- server agents if it is expected that a terminated server instance will never
- join the cluster again.
+- `leave_on_terminate` `(bool: false)` - Specifies if the agent should leave
+ when receiving the terminate signal. By default, any stop signal to an agent
+ (interrupt or terminate) will cause the agent to exit after ensuring its
+ internal state is committed to disk as needed. If this value is set to true on
+ a server agent, the server will notify other servers of their intention to
+ leave the peer set. You should only set this value to true on server agents if
+ the terminated server will never join the cluster again. If this value is set
+ to true on a client agent and the client is configured with
+ [`drain_on_shutdown`][], the client will drain its workloads before shutting
+ down.
- `limits` - Available in Nomad 0.10.3 and later, this is a nested object that
configures limits that are enforced by the agent. The following parameters
@@ -384,3 +394,4 @@ http_api_response_headers {
[tls-reload]: /nomad/docs/configuration/tls#tls-configuration-reloads
[vault-reload]: /nomad/docs/configuration/vault#vault-configuration-reloads
[gh-3885]: https://github.com/hashicorp/nomad/issues/3885
+[`drain_on_shutdown`]: /nomad/docs/configuration/client#drain_on_shutdown
diff --git a/website/content/docs/operations/nomad-agent.mdx b/website/content/docs/operations/nomad-agent.mdx
index eaa18aad8..70095397b 100644
--- a/website/content/docs/operations/nomad-agent.mdx
+++ b/website/content/docs/operations/nomad-agent.mdx
@@ -82,20 +82,23 @@ There are several important messages that `nomad agent` outputs:
## Stopping an Agent
-An agent can be stopped in two ways: gracefully or forcefully. By default, any
-stop signal to an agent (interrupt, terminate, kill) will cause the agent to
-forcefully stop. Graceful termination can be configured by either setting
+By default, any stop signal to an agent (interrupt or terminate) will cause the
+agent to exit after ensuring its internal state is committed to disk as
+needed. You can configuration additonal behaviors by setting shutdown
[`leave_on_interrupt`][] or [`leave_on_terminate`][] to respond to the
respective signals.
-When gracefully exiting, servers will notify their intention to leave the
-cluster which allows them to leave the [consensus][] peer set.
+For servers, when `leave_on_interrupt` or `leave_on_terminate` are set the
+server will notify other servers of their intention to leave the cluster, which
+allows them to leave the [consensus][] peer set. It is especially important that
+a server node be allowed to leave gracefully so that there will be a minimal
+impact on availability as the server leaves the consensus peer set. If a server
+does not gracefully leave, and will not return into service, the [`server
+force-leave` command][] should be used to eject it from the consensus peer set.
-It is especially important that a server node be allowed to leave gracefully so
-that there will be a minimal impact on availability as the server leaves the
-consensus peer set. If a server does not gracefully leave, and will not return
-into service, the [`server force-leave` command][] should be used to eject it
-from the consensus peer set.
+For clients, when `leave_on_interrupt` or `leave_on_terminate` are set and the
+client is configured with [`drain_on_shutdown`][], the client will drain its
+workloads before shutting down.
## Signal Handling
@@ -180,5 +183,6 @@ owned by `root` with filesystem permissions set to `0700`.
[`leave_on_terminate`]: /nomad/docs/configuration#leave_on_terminate
[`server force-leave` command]: /nomad/docs/commands/server/force-leave
[consensus]: /nomad/docs/concepts/consensus
+[`drain_on_shutdown`]: /nomad/docs/configuration/client#drain_on_shutdown
[reload its configuration]: /nomad/docs/configuration#configuration-reload
[metrics]: /nomad/docs/operations/metrics-reference
diff --git a/website/content/docs/upgrade/index.mdx b/website/content/docs/upgrade/index.mdx
index 0a9a2c498..d59f8c7fa 100644
--- a/website/content/docs/upgrade/index.mdx
+++ b/website/content/docs/upgrade/index.mdx
@@ -73,6 +73,12 @@ On a single server, install the new version of Nomad. You can do this by
joining a new server to the cluster or by replacing or upgrading the binary
locally and restarting the Nomad service.
+Note that if you have [`leave_on_terminate`][] or [`leave_on_interrupt`][] set,
+you should ensure you're using the expected signal for your upgrade process. For
+example, if you have `leave_on_terminate` set and you intend on updating a
+server in-place, you should `SIGINT` and not `SIGTERM` when shutting down the
+server before restarting it.
+
### 2. Check cluster health
[Monitor the Nomad logs][monitor] on the remaining servers to check that the
@@ -244,3 +250,5 @@ configuration to `3` and restart the server.
[peers-json]: /nomad/tutorials/manage-clusters/outage-recovery#manual-recovery-using-peersjson
[`raft_protocol`]: /nomad/docs/configuration/server#raft_protocol
+[`leave_on_interrupt`]: /nomad/docs/configuration#leave_on_interrupt
+[`leave_on_terminate`]: /nomad/docs/configuration#leave_on_terminate