Merge pull request #624 from hashicorp/f-adjustable-timeout

User specifiable kill timeout and operator configurable max
This commit is contained in:
Alex Dadgar 2016-01-04 11:44:22 -08:00
commit 1e5c776e40
22 changed files with 307 additions and 89 deletions

View file

@ -85,6 +85,7 @@ type Task struct {
Services []Service
Resources *Resources
Meta map[string]string
KillTimeout time.Duration
}
// NewTask creates and initializes a new Task.

View file

@ -5,6 +5,7 @@ import (
"io"
"strconv"
"strings"
"time"
"github.com/hashicorp/nomad/nomad/structs"
)
@ -41,6 +42,11 @@ type Config struct {
// be determined dynamically.
NetworkSpeed int
// MaxKillTimeout allows capping the user-specifiable KillTimeout. If the
// task's KillTimeout is greater than the MaxKillTimeout, MaxKillTimeout is
// used.
MaxKillTimeout time.Duration
// Servers is a list of known server addresses. These are as "host:port"
Servers []string

View file

@ -9,6 +9,7 @@ import (
"strconv"
"strings"
"sync"
"time"
docker "github.com/fsouza/go-dockerclient"
@ -67,6 +68,7 @@ func (c *DockerDriverConfig) Validate() error {
type dockerPID struct {
ImageID string
ContainerID string
KillTimeout time.Duration
}
type DockerHandle struct {
@ -76,6 +78,7 @@ type DockerHandle struct {
cleanupImage bool
imageID string
containerID string
killTimeout time.Duration
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
@ -502,6 +505,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
logger: d.logger,
imageID: dockerImage.ID,
containerID: container.ID,
killTimeout: d.DriverContext.KillTimeout(task),
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
@ -555,6 +559,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
logger: d.logger,
imageID: pid.ImageID,
containerID: pid.ContainerID,
killTimeout: pid.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
@ -567,6 +572,7 @@ func (h *DockerHandle) ID() string {
pid := dockerPID{
ImageID: h.imageID,
ContainerID: h.containerID,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(pid)
if err != nil {
@ -588,10 +594,10 @@ func (h *DockerHandle) Update(task *structs.Task) error {
return nil
}
// Kill is used to terminate the task. This uses docker stop -t 5
// Kill is used to terminate the task. This uses `docker stop -t killTimeout`
func (h *DockerHandle) Kill() error {
// Stop the container
err := h.client.StopContainer(h.containerID, 5)
err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
if err != nil {
h.logger.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID)
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)

View file

@ -123,12 +123,13 @@ func TestDockerDriver_Handle(t *testing.T) {
h := &DockerHandle{
imageID: "imageid",
containerID: "containerid",
killTimeout: 5 * time.Nanosecond,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
actual := h.ID()
expected := `DOCKER:{"ImageID":"imageid","ContainerID":"containerid"}`
expected := `DOCKER:{"ImageID":"imageid","ContainerID":"containerid","KillTimeout":5}`
if actual != expected {
t.Errorf("Expected `%s`, found `%s`", expected, actual)
}

View file

@ -5,6 +5,7 @@ import (
"log"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
@ -80,6 +81,18 @@ func NewDriverContext(taskName string, config *config.Config, node *structs.Node
}
}
// KillTimeout returns the timeout that should be used for the task between
// signaling and killing the task.
func (d *DriverContext) KillTimeout(task *structs.Task) time.Duration {
max := d.config.MaxKillTimeout.Nanoseconds()
desired := task.KillTimeout.Nanoseconds()
if desired < max {
return task.KillTimeout
}
return d.config.MaxKillTimeout
}
// DriverHandle is an opaque handle into a driver used for task
// manipulation
type DriverHandle interface {

View file

@ -8,6 +8,7 @@ import (
"path/filepath"
"reflect"
"testing"
"time"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
@ -60,6 +61,24 @@ func testDriverExecContext(task *structs.Task, driverCtx *DriverContext) *ExecCo
return ctx
}
func TestDriver_KillTimeout(t *testing.T) {
ctx := testDriverContext("foo")
ctx.config.MaxKillTimeout = 10 * time.Second
expected := 1 * time.Second
task := &structs.Task{KillTimeout: expected}
if actual := ctx.KillTimeout(task); expected != actual {
t.Fatalf("KillTimeout(%v) returned %v; want %v", task, actual, expected)
}
expected = 10 * time.Second
task = &structs.Task{KillTimeout: 11 * time.Second}
if actual := ctx.KillTimeout(task); expected != actual {
t.Fatalf("KillTimeout(%v) returned %v; want %v", task, actual, expected)
}
}
func TestDriver_TaskEnvironmentVariables(t *testing.T) {
t.Parallel()
ctx := &ExecContext{}

View file

@ -1,7 +1,9 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"path/filepath"
"runtime"
"syscall"
@ -32,9 +34,11 @@ type ExecDriverConfig struct {
// execHandle is returned from Start/Open as a handle to the PID
type execHandle struct {
cmd executor.Executor
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
cmd executor.Executor
killTimeout time.Duration
logger *log.Logger
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
// NewExecDriver is used to create a new exec driver
@ -110,34 +114,57 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
// Return a driver handle
h := &execHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
killTimeout: d.DriverContext.KillTimeout(task),
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
type execId struct {
ExecutorId string
KillTimeout time.Duration
}
func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &execId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
// Find the process
cmd, err := executor.OpenId(handleID)
cmd, err := executor.OpenId(id.ExecutorId)
if err != nil {
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
}
// Return a driver handle
h := &execHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
logger: d.logger,
killTimeout: id.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
func (h *execHandle) ID() string {
id, _ := h.cmd.ID()
return id
executorId, _ := h.cmd.ID()
id := execId{
ExecutorId: executorId,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.exec: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *execHandle) WaitCh() chan *cstructs.WaitResult {
@ -154,7 +181,7 @@ func (h *execHandle) Kill() error {
select {
case <-h.doneCh:
return nil
case <-time.After(5 * time.Second):
case <-time.After(h.killTimeout):
return h.cmd.ForceStop()
}
}

View file

@ -2,7 +2,9 @@ package driver
import (
"bytes"
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"runtime"
@ -36,9 +38,11 @@ type JavaDriverConfig struct {
// javaHandle is returned from Start/Open as a handle to the PID
type javaHandle struct {
cmd executor.Executor
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
cmd executor.Executor
killTimeout time.Duration
logger *log.Logger
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
// NewJavaDriver is used to create a new exec driver
@ -158,27 +162,41 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
// Return a driver handle
h := &javaHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
killTimeout: d.DriverContext.KillTimeout(task),
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
type javaId struct {
ExecutorId string
KillTimeout time.Duration
}
func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &javaId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
// Find the process
cmd, err := executor.OpenId(handleID)
cmd, err := executor.OpenId(id.ExecutorId)
if err != nil {
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
}
// Return a driver handle
h := &javaHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
logger: d.logger,
killTimeout: id.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
@ -186,8 +204,17 @@ func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro
}
func (h *javaHandle) ID() string {
id, _ := h.cmd.ID()
return id
executorId, _ := h.cmd.ID()
id := javaId{
ExecutorId: executorId,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.java: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *javaHandle) WaitCh() chan *cstructs.WaitResult {
@ -204,7 +231,7 @@ func (h *javaHandle) Kill() error {
select {
case <-h.doneCh:
return nil
case <-time.After(5 * time.Second):
case <-time.After(h.killTimeout):
return h.cmd.ForceStop()
}
}

View file

@ -1,7 +1,9 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"regexp"
@ -40,9 +42,11 @@ type QemuDriverConfig struct {
// qemuHandle is returned from Start/Open as a handle to the PID
type qemuHandle struct {
cmd executor.Executor
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
cmd executor.Executor
killTimeout time.Duration
logger *log.Logger
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
// NewQemuDriver is used to create a new exec driver
@ -197,35 +201,58 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
// Create and Return Handle
h := &qemuHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
killTimeout: d.DriverContext.KillTimeout(task),
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
type qemuId struct {
ExecutorId string
KillTimeout time.Duration
}
func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &qemuId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
// Find the process
cmd, err := executor.OpenId(handleID)
cmd, err := executor.OpenId(id.ExecutorId)
if err != nil {
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
}
// Return a driver handle
h := &execHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
logger: d.logger,
killTimeout: id.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
func (h *qemuHandle) ID() string {
id, _ := h.cmd.ID()
return id
executorId, _ := h.cmd.ID()
id := qemuId{
ExecutorId: executorId,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.qemu: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *qemuHandle) WaitCh() chan *cstructs.WaitResult {
@ -244,7 +271,7 @@ func (h *qemuHandle) Kill() error {
select {
case <-h.doneCh:
return nil
case <-time.After(5 * time.Second):
case <-time.After(h.killTimeout):
return h.cmd.ForceStop()
}
}

View file

@ -1,7 +1,9 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"path/filepath"
"time"
@ -30,9 +32,11 @@ type RawExecDriver struct {
// rawExecHandle is returned from Start/Open as a handle to the PID
type rawExecHandle struct {
cmd executor.Executor
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
cmd executor.Executor
killTimeout time.Duration
logger *log.Logger
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
// NewRawExecDriver is used to create a new raw exec driver
@ -109,34 +113,57 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
// Return a driver handle
h := &execHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
killTimeout: d.DriverContext.KillTimeout(task),
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
type rawExecId struct {
ExecutorId string
KillTimeout time.Duration
}
func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &rawExecId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
// Find the process
cmd := executor.NewBasicExecutor()
if err := cmd.Open(handleID); err != nil {
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
if err := cmd.Open(id.ExecutorId); err != nil {
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
}
// Return a driver handle
h := &execHandle{
cmd: cmd,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
cmd: cmd,
logger: d.logger,
killTimeout: id.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
}
func (h *rawExecHandle) ID() string {
id, _ := h.cmd.ID()
return id
executorId, _ := h.cmd.ID()
id := rawExecId{
ExecutorId: executorId,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.raw_exec: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *rawExecHandle) WaitCh() chan *cstructs.WaitResult {
@ -153,7 +180,7 @@ func (h *rawExecHandle) Kill() error {
select {
case <-h.doneCh:
return nil
case <-time.After(5 * time.Second):
case <-time.After(h.killTimeout):
return h.cmd.ForceStop()
}
}

View file

@ -54,18 +54,20 @@ type RktDriverConfig struct {
// rktHandle is returned from Start/Open as a handle to the PID
type rktHandle struct {
proc *os.Process
image string
logger *log.Logger
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
proc *os.Process
image string
logger *log.Logger
killTimeout time.Duration
waitCh chan *cstructs.WaitResult
doneCh chan struct{}
}
// rktPID is a struct to map the pid running the process to the vm image on
// disk
type rktPID struct {
Pid int
Image string
Pid int
Image string
KillTimeout time.Duration
}
// NewRktDriver is used to create a new exec driver
@ -216,11 +218,12 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmd.Args)
h := &rktHandle{
proc: cmd.Process,
image: img,
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
proc: cmd.Process,
image: img,
logger: d.logger,
killTimeout: d.DriverContext.KillTimeout(task),
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
return h, nil
@ -242,11 +245,12 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
// Return a driver handle
h := &rktHandle{
proc: proc,
image: qpid.Image,
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
proc: proc,
image: qpid.Image,
logger: d.logger,
killTimeout: qpid.KillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
go h.run()
@ -256,8 +260,9 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
func (h *rktHandle) ID() string {
// Return a handle to the PID
pid := &rktPID{
Pid: h.proc.Pid,
Image: h.image,
Pid: h.proc.Pid,
Image: h.image,
KillTimeout: h.killTimeout,
}
data, err := json.Marshal(pid)
if err != nil {
@ -282,7 +287,7 @@ func (h *rktHandle) Kill() error {
select {
case <-h.doneCh:
return nil
case <-time.After(5 * time.Second):
case <-time.After(h.killTimeout):
return h.proc.Kill()
}
}

View file

@ -33,14 +33,15 @@ func TestRktVersionRegex(t *testing.T) {
func TestRktDriver_Handle(t *testing.T) {
h := &rktHandle{
proc: &os.Process{Pid: 123},
image: "foo",
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
proc: &os.Process{Pid: 123},
image: "foo",
killTimeout: 5 * time.Nanosecond,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
actual := h.ID()
expected := `Rkt:{"Pid":123,"Image":"foo"}`
expected := `Rkt:{"Pid":123,"Image":"foo","KillTimeout":5}`
if actual != expected {
t.Errorf("Expected `%s`, found `%s`", expected, actual)
}

View file

@ -208,6 +208,13 @@ func (a *Agent) setupClient() error {
if a.config.Client.NetworkSpeed != 0 {
conf.NetworkSpeed = a.config.Client.NetworkSpeed
}
if a.config.Client.MaxKillTimeout != "" {
dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout)
if err != nil {
return fmt.Errorf("Error parsing retry interval: %s", err)
}
conf.MaxKillTimeout = dur
}
// Setup the node
conf.Node = new(structs.Node)

View file

@ -156,6 +156,9 @@ type ClientConfig struct {
// The network link speed to use if it can not be determined dynamically.
NetworkSpeed int `hcl:"network_speed"`
// MaxKillTimeout allows capping the user-specifiable KillTimeout.
MaxKillTimeout string `hcl:"max_kill_timeout"`
}
// ServerConfig is configuration specific to the server mode
@ -281,8 +284,9 @@ func DefaultConfig() *Config {
AdvertiseAddrs: &AdvertiseAddrs{},
Atlas: &AtlasConfig{},
Client: &ClientConfig{
Enabled: false,
NetworkSpeed: 100,
Enabled: false,
NetworkSpeed: 100,
MaxKillTimeout: "30s",
},
Server: &ServerConfig{
Enabled: false,
@ -500,6 +504,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
if b.NetworkSpeed != 0 {
result.NetworkSpeed = b.NetworkSpeed
}
if b.MaxKillTimeout != "" {
result.MaxKillTimeout = b.MaxKillTimeout
}
// Add the servers
result.Servers = append(result.Servers, b.Servers...)

View file

@ -40,7 +40,8 @@ func TestConfig_Merge(t *testing.T) {
Options: map[string]string{
"foo": "bar",
},
NetworkSpeed: 100,
NetworkSpeed: 100,
MaxKillTimeout: "20s",
},
Server: &ServerConfig{
Enabled: false,
@ -105,7 +106,8 @@ func TestConfig_Merge(t *testing.T) {
"foo": "bar",
"baz": "zip",
},
NetworkSpeed: 100,
NetworkSpeed: 105,
MaxKillTimeout: "50s",
},
Server: &ServerConfig{
Enabled: true,

View file

@ -161,6 +161,10 @@ job "example" {
}
}
}
# Controls the timeout between signalling a task it will be killed
# and killing the task. If not set a default is used.
# kill_timeout = "20s"
}
}
}

View file

@ -408,7 +408,15 @@ func parseTasks(jobName string, taskGroupName string, result *[]*structs.Task, l
if taskGroupName == "" {
taskGroupName = n
}
if err := mapstructure.WeakDecode(m, &t); err != nil {
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
WeaklyTypedInput: true,
Result: &t,
})
if err != nil {
return err
}
if err := dec.Decode(m); err != nil {
return err
}

View file

@ -121,6 +121,7 @@ func TestParse(t *testing.T) {
},
},
},
KillTimeout: 22 * time.Second,
},
&structs.Task{
Name: "storagelocker",

View file

@ -77,6 +77,8 @@ job "binstore-storagelocker" {
port "admin" {}
}
}
kill_timeout = "22s"
}
task "storagelocker" {

View file

@ -1303,6 +1303,12 @@ func (s *Service) Hash() string {
return fmt.Sprintf("%x", h.Sum(nil))
}
const (
// DefaultKillTimeout is the default timeout between signaling a task it
// will be killed and killing it.
DefaultKillTimeout = 5 * time.Second
)
// Task is a single process typically that is executed as part of a task group.
type Task struct {
// Name of the task
@ -1330,11 +1336,20 @@ type Task struct {
// Meta is used to associate arbitrary metadata with this
// task. This is opaque to Nomad.
Meta map[string]string
// KillTimeout is the time between signaling a task that it will be
// killed and killing it.
KillTimeout time.Duration `mapstructure:"kill_timeout"`
}
// InitFields initializes fields in the task.
func (t *Task) InitFields(job *Job, tg *TaskGroup) {
t.InitServiceFields(job.Name, tg.Name)
// Set the default timeout if it is not specified.
if t.KillTimeout == 0 {
t.KillTimeout = DefaultKillTimeout
}
}
// InitServiceFields interpolates values of Job, Task Group
@ -1460,6 +1475,9 @@ func (t *Task) Validate() error {
if t.Resources == nil {
mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
}
if t.KillTimeout.Nanoseconds() < 0 {
mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
}
for idx, constr := range t.Constraints {
if err := constr.Validate(); err != nil {
outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)

View file

@ -281,6 +281,11 @@ configured on server nodes.
* <a id="network_speed">`network_speed`</a>: This is an int that sets the
default link speed of network interfaces, in megabits, if their speed can
not be determined dynamically.
* `max_kill_timeout`: `max_kill_timeout` is a time duration that can be
specified using the `s`, `m`, and `h` suffixes, such as `30s`. If a job's
task specifies a `kill_timeout` greater than `max_kill_timeout`,
`max_kill_timeout` is used. This is to prevent a user being able to set an
unreasonable timeout. If unset, a default is used.
### Client Options Map <a id="options_map"></a>

View file

@ -222,6 +222,10 @@ The `task` object supports the following keys:
* `meta` - Annotates the task group with opaque metadata.
* `kill_timeout` - `kill_timeout` is a time duration that can be specified using
the `s`, `m`, and `h` suffixes, such as `30s`. It can be used to configure the
time between signaling a task it will be killed and actually killing it.
### Resources
The `resources` object supports the following keys: