provide `-no-shutdown-delay` flag for job/alloc stop (#11596)

Some operators use very long group/task `shutdown_delay` settings to
safely drain network connections to their workloads after service
deregistration. But during incident response, they may want to cause
that drain to be skipped so they can quickly shed load.

Provide a `-no-shutdown-delay` flag on the `nomad alloc stop` and
`nomad job stop` commands that bypasses the delay. This sets a new
desired transition state on the affected allocations that the
allocation/task runner will identify during pre-kill on the client.

Note (as documented here) that using this flag will almost always
result in failed inbound network connections for workloads as the
tasks will exit before clients receive updated service discovery
information and won't be gracefully drained.
This commit is contained in:
Tim Gross 2021-12-13 14:54:53 -05:00 committed by GitHub
parent 64e909f6a6
commit a0cf5db797
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 372 additions and 47 deletions

3
.changelog/11596.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
cli: provide `-no-shutdown-delay` option to `job stop` and `alloc stop` commands to ignore `shutdown_delay`
```

View File

@ -299,6 +299,11 @@ type DeregisterOptions struct {
// is useful when an operator wishes to push through a job deregistration // is useful when an operator wishes to push through a job deregistration
// in busy clusters with a large evaluation backlog. // in busy clusters with a large evaluation backlog.
EvalPriority int EvalPriority int
// NoShutdownDelay, if set to true, will override the group and
// task shutdown_delay configuration and ignore the delay for any
// allocations stopped as a result of this Deregister call.
NoShutdownDelay bool
} }
// DeregisterOpts is used to remove an existing job. See DeregisterOptions // DeregisterOpts is used to remove an existing job. See DeregisterOptions
@ -312,8 +317,8 @@ func (j *Jobs) DeregisterOpts(jobID string, opts *DeregisterOptions, q *WriteOpt
// Protect against nil opts. url.Values expects a string, and so using // Protect against nil opts. url.Values expects a string, and so using
// fmt.Sprintf is the best way to do this. // fmt.Sprintf is the best way to do this.
if opts != nil { if opts != nil {
endpoint += fmt.Sprintf("?purge=%t&global=%t&eval_priority=%v", endpoint += fmt.Sprintf("?purge=%t&global=%t&eval_priority=%v&no_shutdown_delay=%t",
opts.Purge, opts.Global, opts.EvalPriority) opts.Purge, opts.Global, opts.EvalPriority, opts.NoShutdownDelay)
} }
wm, err := j.client.delete(endpoint, &resp, q) wm, err := j.client.delete(endpoint, &resp, q)

View File

@ -171,6 +171,9 @@ type allocRunner struct {
taskHookCoordinator *taskHookCoordinator taskHookCoordinator *taskHookCoordinator
shutdownDelayCtx context.Context
shutdownDelayCancelFn context.CancelFunc
// rpcClient is the RPC Client that should be used by the allocrunner and its // rpcClient is the RPC Client that should be used by the allocrunner and its
// hooks to communicate with Nomad Servers. // hooks to communicate with Nomad Servers.
rpcClient RPCer rpcClient RPCer
@ -230,6 +233,10 @@ func NewAllocRunner(config *Config) (*allocRunner, error) {
ar.taskHookCoordinator = newTaskHookCoordinator(ar.logger, tg.Tasks) ar.taskHookCoordinator = newTaskHookCoordinator(ar.logger, tg.Tasks)
shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background())
ar.shutdownDelayCtx = shutdownDelayCtx
ar.shutdownDelayCancelFn = shutdownDelayCancel
// Initialize the runners hooks. // Initialize the runners hooks.
if err := ar.initRunnerHooks(config.ClientConfig); err != nil { if err := ar.initRunnerHooks(config.ClientConfig); err != nil {
return nil, err return nil, err
@ -265,6 +272,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error {
DriverManager: ar.driverManager, DriverManager: ar.driverManager,
ServersContactedCh: ar.serversContactedCh, ServersContactedCh: ar.serversContactedCh,
StartConditionMetCtx: ar.taskHookCoordinator.startConditionForTask(task), StartConditionMetCtx: ar.taskHookCoordinator.startConditionForTask(task),
ShutdownDelayCtx: ar.shutdownDelayCtx,
} }
if ar.cpusetManager != nil { if ar.cpusetManager != nil {
@ -824,6 +832,10 @@ func (ar *allocRunner) Update(update *structs.Allocation) {
default: default:
} }
if update.DesiredTransition.ShouldIgnoreShutdownDelay() {
ar.shutdownDelayCancelFn()
}
// Queue the new update // Queue the new update
ar.allocUpdatedCh <- update ar.allocUpdatedCh <- update
} }

View File

@ -159,6 +159,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
taskEnvBuilder: envBuilder, taskEnvBuilder: envBuilder,
networkStatusGetter: ar, networkStatusGetter: ar,
logger: hookLogger, logger: hookLogger,
shutdownDelayCtx: ar.shutdownDelayCtx,
}), }),
newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),

View File

@ -1,6 +1,7 @@
package allocrunner package allocrunner
import ( import (
"context"
"sync" "sync"
"time" "time"
@ -29,9 +30,9 @@ type groupServiceHook struct {
consulClient consul.ConsulServiceAPI consulClient consul.ConsulServiceAPI
consulNamespace string consulNamespace string
prerun bool prerun bool
delay time.Duration
deregistered bool deregistered bool
networkStatusGetter networkStatusGetter networkStatusGetter networkStatusGetter
shutdownDelayCtx context.Context
logger log.Logger logger log.Logger
@ -41,6 +42,7 @@ type groupServiceHook struct {
networks structs.Networks networks structs.Networks
ports structs.AllocatedPorts ports structs.AllocatedPorts
taskEnvBuilder *taskenv.Builder taskEnvBuilder *taskenv.Builder
delay time.Duration
// Since Update() may be called concurrently with any other hook all // Since Update() may be called concurrently with any other hook all
// hook methods must be fully serialized // hook methods must be fully serialized
@ -54,6 +56,7 @@ type groupServiceHookConfig struct {
restarter agentconsul.WorkloadRestarter restarter agentconsul.WorkloadRestarter
taskEnvBuilder *taskenv.Builder taskEnvBuilder *taskenv.Builder
networkStatusGetter networkStatusGetter networkStatusGetter networkStatusGetter
shutdownDelayCtx context.Context
logger log.Logger logger log.Logger
} }
@ -76,6 +79,7 @@ func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
networkStatusGetter: cfg.networkStatusGetter, networkStatusGetter: cfg.networkStatusGetter,
logger: cfg.logger.Named(groupServiceHookName), logger: cfg.logger.Named(groupServiceHookName),
services: cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup).Services, services: cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup).Services,
shutdownDelayCtx: cfg.shutdownDelayCtx,
} }
if cfg.alloc.AllocatedResources != nil { if cfg.alloc.AllocatedResources != nil {
@ -187,9 +191,12 @@ func (h *groupServiceHook) preKillLocked() {
h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay) h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay)
// Wait for specified shutdown_delay select {
// Wait for specified shutdown_delay unless ignored
// This will block an agent from shutting down. // This will block an agent from shutting down.
<-time.After(h.delay) case <-time.After(h.delay):
case <-h.shutdownDelayCtx.Done():
}
} }
func (h *groupServiceHook) Postrun() error { func (h *groupServiceHook) Postrun() error {

View File

@ -112,6 +112,11 @@ type TaskRunner struct {
killErr error killErr error
killErrLock sync.Mutex killErrLock sync.Mutex
// shutdownDelayCtx is a context from the alloc runner which will
// tell us to exit early from shutdown_delay
shutdownDelayCtx context.Context
shutdownDelayCancelFn context.CancelFunc
// Logger is the logger for the task runner. // Logger is the logger for the task runner.
logger log.Logger logger log.Logger
@ -287,6 +292,13 @@ type Config struct {
// startConditionMetCtx is done when TR should start the task // startConditionMetCtx is done when TR should start the task
StartConditionMetCtx <-chan struct{} StartConditionMetCtx <-chan struct{}
// ShutdownDelayCtx is a context from the alloc runner which will
// tell us to exit early from shutdown_delay
ShutdownDelayCtx context.Context
// ShutdownDelayCancelFn should only be used in testing.
ShutdownDelayCancelFn context.CancelFunc
} }
func NewTaskRunner(config *Config) (*TaskRunner, error) { func NewTaskRunner(config *Config) (*TaskRunner, error) {
@ -342,6 +354,8 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) {
maxEvents: defaultMaxEvents, maxEvents: defaultMaxEvents,
serversContactedCh: config.ServersContactedCh, serversContactedCh: config.ServersContactedCh,
startConditionMetCtx: config.StartConditionMetCtx, startConditionMetCtx: config.StartConditionMetCtx,
shutdownDelayCtx: config.ShutdownDelayCtx,
shutdownDelayCancelFn: config.ShutdownDelayCancelFn,
} }
// Create the logger based on the allocation ID // Create the logger based on the allocation ID
@ -895,6 +909,8 @@ func (tr *TaskRunner) handleKill(resultCh <-chan *drivers.ExitResult) *drivers.E
select { select {
case result := <-resultCh: case result := <-resultCh:
return result return result
case <-tr.shutdownDelayCtx.Done():
break
case <-time.After(delay): case <-time.After(delay):
} }
} }
@ -1478,3 +1494,9 @@ func (tr *TaskRunner) DriverCapabilities() (*drivers.Capabilities, error) {
func (tr *TaskRunner) SetAllocHookResources(res *cstructs.AllocHookResources) { func (tr *TaskRunner) SetAllocHookResources(res *cstructs.AllocHookResources) {
tr.allocHookResources = res tr.allocHookResources = res
} }
// shutdownDelayCancel is used for testing only and cancels the
// shutdownDelayCtx
func (tr *TaskRunner) shutdownDelayCancel() {
tr.shutdownDelayCancelFn()
}

View File

@ -14,6 +14,10 @@ import (
"time" "time"
"github.com/golang/snappy" "github.com/golang/snappy"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/config"
@ -26,6 +30,7 @@ import (
agentconsul "github.com/hashicorp/nomad/command/agent/consul" agentconsul "github.com/hashicorp/nomad/command/agent/consul"
mockdriver "github.com/hashicorp/nomad/drivers/mock" mockdriver "github.com/hashicorp/nomad/drivers/mock"
"github.com/hashicorp/nomad/drivers/rawexec" "github.com/hashicorp/nomad/drivers/rawexec"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/mock"
@ -33,9 +38,6 @@ import (
"github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/device"
"github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/testutil" "github.com/hashicorp/nomad/testutil"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
type MockTaskStateUpdater struct { type MockTaskStateUpdater struct {
@ -94,26 +96,30 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri
cleanup() cleanup()
} }
shutdownDelayCtx, shutdownDelayCancelFn := context.WithCancel(context.Background())
// Create a closed channel to mock TaskHookCoordinator.startConditionForTask. // Create a closed channel to mock TaskHookCoordinator.startConditionForTask.
// Closed channel indicates this task is not blocked on prestart hooks. // Closed channel indicates this task is not blocked on prestart hooks.
closedCh := make(chan struct{}) closedCh := make(chan struct{})
close(closedCh) close(closedCh)
conf := &Config{ conf := &Config{
Alloc: alloc, Alloc: alloc,
ClientConfig: clientConf, ClientConfig: clientConf,
Task: thisTask, Task: thisTask,
TaskDir: taskDir, TaskDir: taskDir,
Logger: clientConf.Logger, Logger: clientConf.Logger,
Consul: consulapi.NewMockConsulServiceClient(t, logger), Consul: consulapi.NewMockConsulServiceClient(t, logger),
ConsulSI: consulapi.NewMockServiceIdentitiesClient(), ConsulSI: consulapi.NewMockServiceIdentitiesClient(),
Vault: vaultclient.NewMockVaultClient(), Vault: vaultclient.NewMockVaultClient(),
StateDB: cstate.NoopDB{}, StateDB: cstate.NoopDB{},
StateUpdater: NewMockTaskStateUpdater(), StateUpdater: NewMockTaskStateUpdater(),
DeviceManager: devicemanager.NoopMockManager(), DeviceManager: devicemanager.NoopMockManager(),
DriverManager: drivermanager.TestDriverManager(t), DriverManager: drivermanager.TestDriverManager(t),
ServersContactedCh: make(chan struct{}), ServersContactedCh: make(chan struct{}),
StartConditionMetCtx: closedCh, StartConditionMetCtx: closedCh,
ShutdownDelayCtx: shutdownDelayCtx,
ShutdownDelayCancelFn: shutdownDelayCancelFn,
} }
return conf, trCleanup return conf, trCleanup
} }
@ -996,6 +1002,82 @@ WAIT:
} }
} }
// TestTaskRunner_NoShutdownDelay asserts services are removed from
// Consul and tasks are killed without waiting for ${shutdown_delay}
// when the alloc has the NoShutdownDelay transition flag set.
func TestTaskRunner_NoShutdownDelay(t *testing.T) {
t.Parallel()
// don't set this too high so that we don't block the test runner
// on shutting down the agent if the test fails
maxTestDuration := time.Duration(testutil.TestMultiplier()*10) * time.Second
maxTimeToFailDuration := time.Duration(testutil.TestMultiplier()) * time.Second
alloc := mock.Alloc()
alloc.DesiredTransition = structs.DesiredTransition{NoShutdownDelay: helper.BoolToPtr(true)}
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Services[0].Tags = []string{"tag1"}
task.Services = task.Services[:1] // only need 1 for this test
task.Driver = "mock_driver"
task.Config = map[string]interface{}{
"run_for": "1000s",
}
task.ShutdownDelay = maxTestDuration
tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
defer cleanup()
mockConsul := conf.Consul.(*consulapi.MockConsulServiceClient)
testWaitForTaskToStart(t, tr)
testutil.WaitForResult(func() (bool, error) {
ops := mockConsul.GetOps()
if n := len(ops); n != 1 {
return false, fmt.Errorf("expected 1 consul operation. Found %d", n)
}
return ops[0].Op == "add", fmt.Errorf("consul operation was not a registration: %#v", ops[0])
}, func(err error) {
t.Fatalf("err: %v", err)
})
testCtx, cancel := context.WithTimeout(context.Background(), maxTimeToFailDuration)
defer cancel()
killed := make(chan error)
go func() {
tr.shutdownDelayCancel()
err := tr.Kill(testCtx, structs.NewTaskEvent("test"))
killed <- err
}()
// Wait for first de-registration call. Note that unlike
// TestTaskRunner_ShutdownDelay, we're racing with task exit
// and can't assert that we only get the first deregistration op
// (from serviceHook.PreKill).
testutil.WaitForResult(func() (bool, error) {
ops := mockConsul.GetOps()
if n := len(ops); n < 2 {
return false, fmt.Errorf("expected at least 2 consul operations.")
}
return ops[1].Op == "remove", fmt.Errorf(
"consul operation was not a deregistration: %#v", ops[1])
}, func(err error) {
t.Fatalf("err: %v", err)
})
// Wait for the task to exit
select {
case <-tr.WaitCh():
case <-time.After(maxTimeToFailDuration):
t.Fatalf("task kill did not ignore shutdown delay")
return
}
err := <-killed
require.NoError(t, err, "killing task returned unexpected error")
}
// TestTaskRunner_Dispatch_Payload asserts that a dispatch job runs and the // TestTaskRunner_Dispatch_Payload asserts that a dispatch job runs and the
// payload was written to disk. // payload was written to disk.
func TestTaskRunner_Dispatch_Payload(t *testing.T) { func TestTaskRunner_Dispatch_Payload(t *testing.T) {

View File

@ -138,8 +138,18 @@ func (s *HTTPServer) allocStop(allocID string, resp http.ResponseWriter, req *ht
return nil, CodedError(405, ErrInvalidMethod) return nil, CodedError(405, ErrInvalidMethod)
} }
noShutdownDelay := false
if noShutdownDelayQS := req.URL.Query().Get("no_shutdown_delay"); noShutdownDelayQS != "" {
var err error
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayQS)
if err != nil {
return nil, fmt.Errorf("no_shutdown_delay value is not a boolean: %v", err)
}
}
sr := &structs.AllocStopRequest{ sr := &structs.AllocStopRequest{
AllocID: allocID, AllocID: allocID,
NoShutdownDelay: noShutdownDelay,
} }
s.parseWriteRequest(req, &sr.WriteRequest) s.parseWriteRequest(req, &sr.WriteRequest)

View File

@ -453,6 +453,18 @@ func (s *HTTPServer) jobDelete(resp http.ResponseWriter, req *http.Request,
return nil, err return nil, err
} }
// Identify the no_shutdown_delay query param and parse.
noShutdownDelayStr := req.URL.Query().Get("no_shutdown_delay")
var noShutdownDelay bool
if noShutdownDelayStr != "" {
var err error
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayStr)
if err != nil {
return nil, fmt.Errorf("Failed to parse value of %qq (%v) as a bool: %v", "no_shutdown_delay", noShutdownDelayStr, err)
}
}
args.NoShutdownDelay = noShutdownDelay
// Validate the evaluation priority if the user supplied a non-default // Validate the evaluation priority if the user supplied a non-default
// value. It's more efficient to do it here, within the agent rather than // value. It's more efficient to do it here, within the agent rather than
// sending a bad request for the server to reject. // sending a bad request for the server to reject.

View File

@ -38,6 +38,12 @@ Stop Specific Options:
screen, which can be used to examine the rescheduling evaluation using the screen, which can be used to examine the rescheduling evaluation using the
eval-status command. eval-status command.
-no-shutdown-delay
Ignore the the group and task shutdown_delay configuration so there is no
delay between service deregistration and task shutdown. Note that using
this flag will result in failed network connections to the allocation
being stopped.
-verbose -verbose
Show full information. Show full information.
` `
@ -47,12 +53,13 @@ Stop Specific Options:
func (c *AllocStopCommand) Name() string { return "alloc stop" } func (c *AllocStopCommand) Name() string { return "alloc stop" }
func (c *AllocStopCommand) Run(args []string) int { func (c *AllocStopCommand) Run(args []string) int {
var detach, verbose bool var detach, verbose, noShutdownDelay bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) } flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&detach, "detach", false, "") flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&verbose, "verbose", false, "") flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
if err := flags.Parse(args); err != nil { if err := flags.Parse(args); err != nil {
return 1 return 1
@ -115,7 +122,12 @@ func (c *AllocStopCommand) Run(args []string) int {
return 1 return 1
} }
resp, err := client.Allocations().Stop(alloc, nil) var opts *api.QueryOptions
if noShutdownDelay {
opts = &api.QueryOptions{Params: map[string]string{"no_shutdown_delay": "true"}}
}
resp, err := client.Allocations().Stop(alloc, opts)
if err != nil { if err != nil {
c.Ui.Error(fmt.Sprintf("Error stopping allocation: %s", err)) c.Ui.Error(fmt.Sprintf("Error stopping allocation: %s", err))
return 1 return 1

View File

@ -43,14 +43,20 @@ Stop Options:
Override the priority of the evaluations produced as a result of this job Override the priority of the evaluations produced as a result of this job
deregistration. By default, this is set to the priority of the job. deregistration. By default, this is set to the priority of the job.
-purge
Purge is used to stop the job and purge it from the system. If not set, the
job will still be queryable and will be purged by the garbage collector.
-global -global
Stop a multi-region job in all its regions. By default job stop will stop Stop a multi-region job in all its regions. By default job stop will stop
only a single region at a time. Ignored for single-region jobs. only a single region at a time. Ignored for single-region jobs.
-no-shutdown-delay
Ignore the the group and task shutdown_delay configuration so that there is no
delay between service deregistration and task shutdown. Note that using
this flag will result in failed network connections to the allocations
being stopped.
-purge
Purge is used to stop the job and purge it from the system. If not set, the
job will still be queryable and will be purged by the garbage collector.
-yes -yes
Automatic yes to prompts. Automatic yes to prompts.
@ -67,12 +73,13 @@ func (c *JobStopCommand) Synopsis() string {
func (c *JobStopCommand) AutocompleteFlags() complete.Flags { func (c *JobStopCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{ complete.Flags{
"-detach": complete.PredictNothing, "-detach": complete.PredictNothing,
"-eval-priority": complete.PredictNothing, "-eval-priority": complete.PredictNothing,
"-purge": complete.PredictNothing, "-purge": complete.PredictNothing,
"-global": complete.PredictNothing, "-global": complete.PredictNothing,
"-yes": complete.PredictNothing, "-no-shutdown-delay": complete.PredictNothing,
"-verbose": complete.PredictNothing, "-yes": complete.PredictNothing,
"-verbose": complete.PredictNothing,
}) })
} }
@ -94,7 +101,7 @@ func (c *JobStopCommand) AutocompleteArgs() complete.Predictor {
func (c *JobStopCommand) Name() string { return "job stop" } func (c *JobStopCommand) Name() string { return "job stop" }
func (c *JobStopCommand) Run(args []string) int { func (c *JobStopCommand) Run(args []string) int {
var detach, purge, verbose, global, autoYes bool var detach, purge, verbose, global, autoYes, noShutdownDelay bool
var evalPriority int var evalPriority int
flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
@ -102,6 +109,7 @@ func (c *JobStopCommand) Run(args []string) int {
flags.BoolVar(&detach, "detach", false, "") flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&verbose, "verbose", false, "") flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&global, "global", false, "") flags.BoolVar(&global, "global", false, "")
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
flags.BoolVar(&autoYes, "yes", false, "") flags.BoolVar(&autoYes, "yes", false, "")
flags.BoolVar(&purge, "purge", false, "") flags.BoolVar(&purge, "purge", false, "")
flags.IntVar(&evalPriority, "eval-priority", 0, "") flags.IntVar(&evalPriority, "eval-priority", 0, "")
@ -199,7 +207,7 @@ func (c *JobStopCommand) Run(args []string) int {
} }
// Invoke the stop // Invoke the stop
opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority} opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority, NoShutdownDelay: noShutdownDelay}
wq := &api.WriteOptions{Namespace: jobs[0].JobSummary.Namespace} wq := &api.WriteOptions{Namespace: jobs[0].JobSummary.Namespace}
evalID, _, err := client.Jobs().DeregisterOpts(*job.ID, opts, wq) evalID, _, err := client.Jobs().DeregisterOpts(*job.ID, opts, wq)
if err != nil { if err != nil {

View File

@ -320,7 +320,8 @@ func (a *Alloc) Stop(args *structs.AllocStopRequest, reply *structs.AllocStopRes
Evals: []*structs.Evaluation{eval}, Evals: []*structs.Evaluation{eval},
Allocs: map[string]*structs.DesiredTransition{ Allocs: map[string]*structs.DesiredTransition{
args.AllocID: { args.AllocID: {
Migrate: helper.BoolToPtr(true), Migrate: helper.BoolToPtr(true),
NoShutdownDelay: helper.BoolToPtr(args.NoShutdownDelay),
}, },
}, },
} }

View File

@ -11,6 +11,7 @@ import (
log "github.com/hashicorp/go-hclog" log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb" memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
@ -605,7 +606,7 @@ func (n *nomadFSM) applyDeregisterJob(msgType structs.MessageType, buf []byte, i
} }
err := n.state.WithWriteTransaction(msgType, index, func(tx state.Txn) error { err := n.state.WithWriteTransaction(msgType, index, func(tx state.Txn) error {
err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge, tx) err := n.handleJobDeregister(index, req.JobID, req.Namespace, req.Purge, req.NoShutdownDelay, tx)
if err != nil { if err != nil {
n.logger.Error("deregistering job failed", n.logger.Error("deregistering job failed",
@ -645,7 +646,7 @@ func (n *nomadFSM) applyBatchDeregisterJob(msgType structs.MessageType, buf []by
// evals for jobs whose deregistering didn't get committed yet. // evals for jobs whose deregistering didn't get committed yet.
err := n.state.WithWriteTransaction(msgType, index, func(tx state.Txn) error { err := n.state.WithWriteTransaction(msgType, index, func(tx state.Txn) error {
for jobNS, options := range req.Jobs { for jobNS, options := range req.Jobs {
if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge, tx); err != nil { if err := n.handleJobDeregister(index, jobNS.ID, jobNS.Namespace, options.Purge, false, tx); err != nil {
n.logger.Error("deregistering job failed", "job", jobNS.ID, "error", err) n.logger.Error("deregistering job failed", "job", jobNS.ID, "error", err)
return err return err
} }
@ -670,12 +671,31 @@ func (n *nomadFSM) applyBatchDeregisterJob(msgType structs.MessageType, buf []by
// handleJobDeregister is used to deregister a job. Leaves error logging up to // handleJobDeregister is used to deregister a job. Leaves error logging up to
// caller. // caller.
func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool, tx state.Txn) error { func (n *nomadFSM) handleJobDeregister(index uint64, jobID, namespace string, purge bool, noShutdownDelay bool, tx state.Txn) error {
// If it is periodic remove it from the dispatcher // If it is periodic remove it from the dispatcher
if err := n.periodicDispatcher.Remove(namespace, jobID); err != nil { if err := n.periodicDispatcher.Remove(namespace, jobID); err != nil {
return fmt.Errorf("periodicDispatcher.Remove failed: %w", err) return fmt.Errorf("periodicDispatcher.Remove failed: %w", err)
} }
if noShutdownDelay {
ws := memdb.NewWatchSet()
allocs, err := n.state.AllocsByJob(ws, namespace, jobID, false)
if err != nil {
return err
}
transition := &structs.DesiredTransition{NoShutdownDelay: helper.BoolToPtr(true)}
for _, alloc := range allocs {
err := n.state.UpdateAllocDesiredTransitionTxn(tx, index, alloc.ID, transition)
if err != nil {
return err
}
err = tx.Insert("index", &state.IndexEntry{Key: "allocs", Value: index})
if err != nil {
return fmt.Errorf("index update failed: %v", err)
}
}
}
if purge { if purge {
if err := n.state.DeleteJobTxn(index, namespace, jobID, tx); err != nil { if err := n.state.DeleteJobTxn(index, namespace, jobID, tx); err != nil {
return fmt.Errorf("DeleteJob failed: %w", err) return fmt.Errorf("DeleteJob failed: %w", err)

View File

@ -3831,6 +3831,97 @@ func TestJobEndpoint_Deregister_EvalCreation_Legacy(t *testing.T) {
}) })
} }
func TestJobEndpoint_Deregister_NoShutdownDelay(t *testing.T) {
t.Parallel()
require := require.New(t)
s1, cleanupS1 := TestServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer cleanupS1()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the register requests
job := mock.Job()
reg := &structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: job.Namespace,
},
}
// Fetch the response
var resp0 structs.JobRegisterResponse
require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", reg, &resp0))
// Deregister but don't purge
dereg1 := &structs.JobDeregisterRequest{
JobID: job.ID,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: job.Namespace,
},
}
var resp1 structs.JobDeregisterResponse
require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Deregister", dereg1, &resp1))
require.NotZero(resp1.Index)
// Check for the job in the FSM
state := s1.fsm.State()
out, err := state.JobByID(nil, job.Namespace, job.ID)
require.NoError(err)
require.NotNil(out)
require.True(out.Stop)
// Lookup the evaluation
eval, err := state.EvalByID(nil, resp1.EvalID)
require.NoError(err)
require.NotNil(eval)
require.EqualValues(resp1.EvalCreateIndex, eval.CreateIndex)
require.Equal(structs.EvalTriggerJobDeregister, eval.TriggeredBy)
// Lookup allocation transitions
var ws memdb.WatchSet
allocs, err := state.AllocsByJob(ws, job.Namespace, job.ID, true)
require.NoError(err)
for _, alloc := range allocs {
require.Nil(alloc.DesiredTransition)
}
// Deregister with no shutdown delay
dereg2 := &structs.JobDeregisterRequest{
JobID: job.ID,
NoShutdownDelay: true,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: job.Namespace,
},
}
var resp2 structs.JobDeregisterResponse
require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Deregister", dereg2, &resp2))
require.NotZero(resp2.Index)
// Lookup the evaluation
eval, err = state.EvalByID(nil, resp2.EvalID)
require.NoError(err)
require.NotNil(eval)
require.EqualValues(resp2.EvalCreateIndex, eval.CreateIndex)
require.Equal(structs.EvalTriggerJobDeregister, eval.TriggeredBy)
// Lookup allocation transitions
allocs, err = state.AllocsByJob(ws, job.Namespace, job.ID, true)
require.NoError(err)
for _, alloc := range allocs {
require.NotNil(alloc.DesiredTransition)
require.True(*(alloc.DesiredTransition.NoShutdownDelay))
}
}
func TestJobEndpoint_BatchDeregister(t *testing.T) { func TestJobEndpoint_BatchDeregister(t *testing.T) {
t.Parallel() t.Parallel()
require := require.New(t) require := require.New(t)

View File

@ -1600,7 +1600,7 @@ func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion b
} }
if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil { if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil {
return fmt.Errorf("unable to update job scaling policies: %v", err) return fmt.Errorf("unable to update job csi plugins: %v", err)
} }
// Insert the job // Insert the job
@ -3371,7 +3371,7 @@ func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType,
// Handle each of the updated allocations // Handle each of the updated allocations
for id, transition := range allocs { for id, transition := range allocs {
if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil { if err := s.UpdateAllocDesiredTransitionTxn(txn, index, id, transition); err != nil {
return err return err
} }
} }
@ -3390,9 +3390,9 @@ func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType,
return txn.Commit() return txn.Commit()
} }
// nestedUpdateAllocDesiredTransition is used to nest an update of an // UpdateAllocDesiredTransitionTxn is used to nest an update of an
// allocations desired transition // allocations desired transition
func (s *StateStore) nestedUpdateAllocDesiredTransition( func (s *StateStore) UpdateAllocDesiredTransitionTxn(
txn *txn, index uint64, allocID string, txn *txn, index uint64, allocID string,
transition *structs.DesiredTransition) error { transition *structs.DesiredTransition) error {
@ -3414,8 +3414,9 @@ func (s *StateStore) nestedUpdateAllocDesiredTransition(
// Merge the desired transitions // Merge the desired transitions
copyAlloc.DesiredTransition.Merge(transition) copyAlloc.DesiredTransition.Merge(transition)
// Update the modify index // Update the modify indexes
copyAlloc.ModifyIndex = index copyAlloc.ModifyIndex = index
copyAlloc.AllocModifyIndex = index
// Update the allocation // Update the allocation
if err := txn.Insert("allocs", copyAlloc); err != nil { if err := txn.Insert("allocs", copyAlloc); err != nil {

View File

@ -628,6 +628,11 @@ type JobDeregisterRequest struct {
// in busy clusters with a large evaluation backlog. // in busy clusters with a large evaluation backlog.
EvalPriority int EvalPriority int
// NoShutdownDelay, if set to true, will override the group and
// task shutdown_delay configuration and ignore the delay for any
// allocations stopped as a result of this Deregister call.
NoShutdownDelay bool
// Eval is the evaluation to create that's associated with job deregister // Eval is the evaluation to create that's associated with job deregister
Eval *Evaluation Eval *Evaluation
@ -955,7 +960,8 @@ type AllocUpdateDesiredTransitionRequest struct {
// AllocStopRequest is used to stop and reschedule a running Allocation. // AllocStopRequest is used to stop and reschedule a running Allocation.
type AllocStopRequest struct { type AllocStopRequest struct {
AllocID string AllocID string
NoShutdownDelay bool
WriteRequest WriteRequest
} }
@ -9140,6 +9146,11 @@ type DesiredTransition struct {
// This field is only used when operators want to force a placement even if // This field is only used when operators want to force a placement even if
// a failed allocation is not eligible to be rescheduled // a failed allocation is not eligible to be rescheduled
ForceReschedule *bool ForceReschedule *bool
// NoShutdownDelay, if set to true, will override the group and
// task shutdown_delay configuration and ignore the delay for any
// allocations stopped as a result of this Deregister call.
NoShutdownDelay *bool
} }
// Merge merges the two desired transitions, preferring the values from the // Merge merges the two desired transitions, preferring the values from the
@ -9156,6 +9167,10 @@ func (d *DesiredTransition) Merge(o *DesiredTransition) {
if o.ForceReschedule != nil { if o.ForceReschedule != nil {
d.ForceReschedule = o.ForceReschedule d.ForceReschedule = o.ForceReschedule
} }
if o.NoShutdownDelay != nil {
d.NoShutdownDelay = o.NoShutdownDelay
}
} }
// ShouldMigrate returns whether the transition object dictates a migration. // ShouldMigrate returns whether the transition object dictates a migration.
@ -9178,6 +9193,15 @@ func (d *DesiredTransition) ShouldForceReschedule() bool {
return d.ForceReschedule != nil && *d.ForceReschedule return d.ForceReschedule != nil && *d.ForceReschedule
} }
// ShouldIgnoreShutdownDelay returns whether the transition object dictates
// that shutdown skip any shutdown delays.
func (d *DesiredTransition) ShouldIgnoreShutdownDelay() bool {
if d == nil {
return false
}
return d.NoShutdownDelay != nil && *d.NoShutdownDelay
}
const ( const (
AllocDesiredStatusRun = "run" // Allocation should run AllocDesiredStatusRun = "run" // Allocation should run
AllocDesiredStatusStop = "stop" // Allocation should stop AllocDesiredStatusStop = "stop" // Allocation should stop

View File

@ -42,6 +42,12 @@ allocation's namespace.
- `-verbose`: Display verbose output. - `-verbose`: Display verbose output.
- `-no-shutdown-delay`
Ignore the the group and task [`shutdown_delay`] configuration so that
there is no delay between service deregistration and task
shutdown. Note that using this flag will result in failed network
connections to the allocation being stopped.
## Examples ## Examples
```shell-session ```shell-session
@ -58,3 +64,4 @@ $ nomad alloc stop -detach eb17e557
``` ```
[eval status]: /docs/commands/eval-status [eval status]: /docs/commands/eval-status
[`shutdown_delay`]: /docs/job-specification/group#shutdown_delay

View File

@ -55,6 +55,12 @@ When ACLs are enabled, this command requires a token with the `submit-job`,
Stop a [multi-region] job in all its regions. By default, `job stop` will Stop a [multi-region] job in all its regions. By default, `job stop` will
stop only a single region at a time. Ignored for single-region jobs. stop only a single region at a time. Ignored for single-region jobs.
- `-no-shutdown-delay`
Ignore the the group and task [`shutdown_delay`] configuration so that
there is no delay between service deregistration and task
shutdown. Note that using this flag will result in failed network
connections to the allocations being stopped.
## Examples ## Examples
Stop the job with ID "job1": Stop the job with ID "job1":
@ -75,3 +81,4 @@ $ nomad job stop -detach job1
[eval status]: /docs/commands/eval-status [eval status]: /docs/commands/eval-status
[multi-region]: /docs/job-specification/multiregion [multi-region]: /docs/job-specification/multiregion
[`shutdown_delay`]: /docs/job-specification/group#shutdown_delay