client: enforce max_kill_timeout client configuration
This PR fixes a bug where client configuration max_kill_timeout was not being enforced. The feature was introduced in 9f44780 but seems to have been removed during the major drivers refactoring. We can make sure the value is enforced by pluming it through the DriverHandler, which now uses the lesser of the task.killTimeout or client.maxKillTimeout. Also updates Event.SetKillTimeout to require both the task.killTimeout and client.maxKillTimeout so that we don't make the mistake of using the wrong value - as it was being given only the task.killTimeout before.
This commit is contained in:
parent
29c6b9dfdf
commit
5dd8aa3e27
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:bug
|
||||||
|
client: Fixed a bug where max_kill_timeout client config was ignored
|
||||||
|
```
|
|
@ -620,7 +620,7 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
|
||||||
}
|
}
|
||||||
|
|
||||||
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
||||||
taskEvent.SetKillTimeout(tr.Task().KillTimeout)
|
taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
|
||||||
err := tr.Kill(context.TODO(), taskEvent)
|
err := tr.Kill(context.TODO(), taskEvent)
|
||||||
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
||||||
ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
|
ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
|
||||||
|
@ -643,7 +643,7 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
|
||||||
go func(name string, tr *taskrunner.TaskRunner) {
|
go func(name string, tr *taskrunner.TaskRunner) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
||||||
taskEvent.SetKillTimeout(tr.Task().KillTimeout)
|
taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
|
||||||
err := tr.Kill(context.TODO(), taskEvent)
|
err := tr.Kill(context.TODO(), taskEvent)
|
||||||
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
||||||
ar.logger.Warn("error stopping task", "error", err, "task_name", name)
|
ar.logger.Warn("error stopping task", "error", err, "task_name", name)
|
||||||
|
@ -667,7 +667,7 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
|
||||||
go func(name string, tr *taskrunner.TaskRunner) {
|
go func(name string, tr *taskrunner.TaskRunner) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
|
||||||
taskEvent.SetKillTimeout(tr.Task().KillTimeout)
|
taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
|
||||||
err := tr.Kill(context.TODO(), taskEvent)
|
err := tr.Kill(context.TODO(), taskEvent)
|
||||||
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
||||||
ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name)
|
ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name)
|
||||||
|
|
|
@ -126,7 +126,7 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
|
||||||
|
|
||||||
expectedKillingMsg := "Sent interrupt. Waiting 10ms before force killing"
|
expectedKillingMsg := "Sent interrupt. Waiting 10ms before force killing"
|
||||||
if killingMsg != expectedKillingMsg {
|
if killingMsg != expectedKillingMsg {
|
||||||
return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", killingMsg, expectedKillingMsg)
|
return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", expectedKillingMsg, killingMsg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Task Two should be dead
|
// Task Two should be dead
|
||||||
|
|
|
@ -6,18 +6,24 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||||
|
"github.com/hashicorp/nomad/helper"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
"github.com/hashicorp/nomad/plugins/drivers"
|
"github.com/hashicorp/nomad/plugins/drivers"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewDriverHandle returns a handle for task operations on a specific task
|
// NewDriverHandle returns a handle for task operations on a specific task
|
||||||
func NewDriverHandle(driver drivers.DriverPlugin, taskID string, task *structs.Task, net *drivers.DriverNetwork) *DriverHandle {
|
func NewDriverHandle(
|
||||||
|
driver drivers.DriverPlugin,
|
||||||
|
taskID string,
|
||||||
|
task *structs.Task,
|
||||||
|
maxKillTimeout time.Duration,
|
||||||
|
net *drivers.DriverNetwork) *DriverHandle {
|
||||||
return &DriverHandle{
|
return &DriverHandle{
|
||||||
driver: driver,
|
driver: driver,
|
||||||
net: net,
|
net: net,
|
||||||
taskID: taskID,
|
taskID: taskID,
|
||||||
killSignal: task.KillSignal,
|
killSignal: task.KillSignal,
|
||||||
killTimeout: task.KillTimeout,
|
killTimeout: helper.Min(task.KillTimeout, maxKillTimeout),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrest
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), taskInfo.NetworkOverride))
|
h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), h.tr.clientConfig.MaxKillTimeout, taskInfo.NetworkOverride))
|
||||||
|
|
||||||
h.tr.stateLock.Lock()
|
h.tr.stateLock.Lock()
|
||||||
h.tr.localState.TaskHandle = th
|
h.tr.localState.TaskHandle = th
|
||||||
|
|
|
@ -883,7 +883,7 @@ func (tr *TaskRunner) runDriver() error {
|
||||||
}
|
}
|
||||||
tr.stateLock.Unlock()
|
tr.stateLock.Unlock()
|
||||||
|
|
||||||
tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), net))
|
tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net))
|
||||||
|
|
||||||
// Emit an event that we started
|
// Emit an event that we started
|
||||||
tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
|
tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
|
||||||
|
@ -1182,7 +1182,7 @@ func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *drivers
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update driver handle on task runner
|
// Update driver handle on task runner
|
||||||
tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), net))
|
tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net))
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/nomad/ci"
|
"github.com/hashicorp/nomad/ci"
|
||||||
"github.com/hashicorp/nomad/helper"
|
"github.com/hashicorp/nomad/helper"
|
||||||
|
@ -64,5 +65,9 @@ func TestClientConfig(t testing.T) (*Config, func()) {
|
||||||
// Loosen GC threshold
|
// Loosen GC threshold
|
||||||
conf.GCDiskUsageThreshold = 98.0
|
conf.GCDiskUsageThreshold = 98.0
|
||||||
conf.GCInodeUsageThreshold = 98.0
|
conf.GCInodeUsageThreshold = 98.0
|
||||||
|
|
||||||
|
// Same as default; necessary for task Event messages
|
||||||
|
conf.MaxKillTimeout = 30 * time.Second
|
||||||
|
|
||||||
return conf, cleanup
|
return conf, cleanup
|
||||||
}
|
}
|
||||||
|
|
|
@ -6976,7 +6976,7 @@ type Task struct {
|
||||||
// task exits, other tasks will be gracefully terminated.
|
// task exits, other tasks will be gracefully terminated.
|
||||||
Leader bool
|
Leader bool
|
||||||
|
|
||||||
// ShutdownDelay is the duration of the delay between deregistering a
|
// ShutdownDelay is the duration of the delay between de-registering a
|
||||||
// task from Consul and sending it a signal to shutdown. See #2441
|
// task from Consul and sending it a signal to shutdown. See #2441
|
||||||
ShutdownDelay time.Duration
|
ShutdownDelay time.Duration
|
||||||
|
|
||||||
|
@ -8379,9 +8379,10 @@ func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
|
func (e *TaskEvent) SetKillTimeout(timeout, maxTimeout time.Duration) *TaskEvent {
|
||||||
e.KillTimeout = timeout
|
actual := helper.Min(timeout, maxTimeout)
|
||||||
e.Details["kill_timeout"] = timeout.String()
|
e.KillTimeout = actual
|
||||||
|
e.Details["kill_timeout"] = actual.String()
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6137,7 +6137,8 @@ func TestTaskEventPopulate(t *testing.T) {
|
||||||
{NewTaskEvent(TaskRestarting).SetRestartReason("Chaos Monkey did it"), "Chaos Monkey did it - Task restarting in 0s"},
|
{NewTaskEvent(TaskRestarting).SetRestartReason("Chaos Monkey did it"), "Chaos Monkey did it - Task restarting in 0s"},
|
||||||
{NewTaskEvent(TaskKilling), "Sent interrupt"},
|
{NewTaskEvent(TaskKilling), "Sent interrupt"},
|
||||||
{NewTaskEvent(TaskKilling).SetKillReason("Its time for you to die"), "Its time for you to die"},
|
{NewTaskEvent(TaskKilling).SetKillReason("Its time for you to die"), "Its time for you to die"},
|
||||||
{NewTaskEvent(TaskKilling).SetKillTimeout(1 * time.Second), "Sent interrupt. Waiting 1s before force killing"},
|
{NewTaskEvent(TaskKilling).SetKillTimeout(1*time.Second, 5*time.Second), "Sent interrupt. Waiting 1s before force killing"},
|
||||||
|
{NewTaskEvent(TaskKilling).SetKillTimeout(10*time.Second, 5*time.Second), "Sent interrupt. Waiting 5s before force killing"},
|
||||||
{NewTaskEvent(TaskTerminated).SetExitCode(-1).SetSignal(3), "Exit Code: -1, Signal: 3"},
|
{NewTaskEvent(TaskTerminated).SetExitCode(-1).SetSignal(3), "Exit Code: -1, Signal: 3"},
|
||||||
{NewTaskEvent(TaskTerminated).SetMessage("Goodbye"), "Exit Code: 0, Exit Message: \"Goodbye\""},
|
{NewTaskEvent(TaskTerminated).SetMessage("Goodbye"), "Exit Code: 0, Exit Message: \"Goodbye\""},
|
||||||
{NewTaskEvent(TaskKilled), "Task successfully killed"},
|
{NewTaskEvent(TaskKilled), "Task successfully killed"},
|
||||||
|
|
Loading…
Reference in New Issue