Merge pull request #624 from hashicorp/f-adjustable-timeout
User specifiable kill timeout and operator configurable max
This commit is contained in:
commit
1e5c776e40
|
@ -85,6 +85,7 @@ type Task struct {
|
|||
Services []Service
|
||||
Resources *Resources
|
||||
Meta map[string]string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
// NewTask creates and initializes a new Task.
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
@ -41,6 +42,11 @@ type Config struct {
|
|||
// be determined dynamically.
|
||||
NetworkSpeed int
|
||||
|
||||
// MaxKillTimeout allows capping the user-specifiable KillTimeout. If the
|
||||
// task's KillTimeout is greater than the MaxKillTimeout, MaxKillTimeout is
|
||||
// used.
|
||||
MaxKillTimeout time.Duration
|
||||
|
||||
// Servers is a list of known server addresses. These are as "host:port"
|
||||
Servers []string
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
|
||||
|
@ -67,6 +68,7 @@ func (c *DockerDriverConfig) Validate() error {
|
|||
type dockerPID struct {
|
||||
ImageID string
|
||||
ContainerID string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
type DockerHandle struct {
|
||||
|
@ -76,6 +78,7 @@ type DockerHandle struct {
|
|||
cleanupImage bool
|
||||
imageID string
|
||||
containerID string
|
||||
killTimeout time.Duration
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
@ -502,6 +505,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
|
|||
logger: d.logger,
|
||||
imageID: dockerImage.ID,
|
||||
containerID: container.ID,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
@ -555,6 +559,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
|
|||
logger: d.logger,
|
||||
imageID: pid.ImageID,
|
||||
containerID: pid.ContainerID,
|
||||
killTimeout: pid.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
@ -567,6 +572,7 @@ func (h *DockerHandle) ID() string {
|
|||
pid := dockerPID{
|
||||
ImageID: h.imageID,
|
||||
ContainerID: h.containerID,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
data, err := json.Marshal(pid)
|
||||
if err != nil {
|
||||
|
@ -588,10 +594,10 @@ func (h *DockerHandle) Update(task *structs.Task) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Kill is used to terminate the task. This uses docker stop -t 5
|
||||
// Kill is used to terminate the task. This uses `docker stop -t killTimeout`
|
||||
func (h *DockerHandle) Kill() error {
|
||||
// Stop the container
|
||||
err := h.client.StopContainer(h.containerID, 5)
|
||||
err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID)
|
||||
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
|
||||
|
|
|
@ -123,12 +123,13 @@ func TestDockerDriver_Handle(t *testing.T) {
|
|||
h := &DockerHandle{
|
||||
imageID: "imageid",
|
||||
containerID: "containerid",
|
||||
killTimeout: 5 * time.Nanosecond,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
actual := h.ID()
|
||||
expected := `DOCKER:{"ImageID":"imageid","ContainerID":"containerid"}`
|
||||
expected := `DOCKER:{"ImageID":"imageid","ContainerID":"containerid","KillTimeout":5}`
|
||||
if actual != expected {
|
||||
t.Errorf("Expected `%s`, found `%s`", expected, actual)
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"log"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
|
@ -80,6 +81,18 @@ func NewDriverContext(taskName string, config *config.Config, node *structs.Node
|
|||
}
|
||||
}
|
||||
|
||||
// KillTimeout returns the timeout that should be used for the task between
|
||||
// signaling and killing the task.
|
||||
func (d *DriverContext) KillTimeout(task *structs.Task) time.Duration {
|
||||
max := d.config.MaxKillTimeout.Nanoseconds()
|
||||
desired := task.KillTimeout.Nanoseconds()
|
||||
if desired < max {
|
||||
return task.KillTimeout
|
||||
}
|
||||
|
||||
return d.config.MaxKillTimeout
|
||||
}
|
||||
|
||||
// DriverHandle is an opaque handle into a driver used for task
|
||||
// manipulation
|
||||
type DriverHandle interface {
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"path/filepath"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
|
@ -60,6 +61,24 @@ func testDriverExecContext(task *structs.Task, driverCtx *DriverContext) *ExecCo
|
|||
return ctx
|
||||
}
|
||||
|
||||
func TestDriver_KillTimeout(t *testing.T) {
|
||||
ctx := testDriverContext("foo")
|
||||
ctx.config.MaxKillTimeout = 10 * time.Second
|
||||
expected := 1 * time.Second
|
||||
task := &structs.Task{KillTimeout: expected}
|
||||
|
||||
if actual := ctx.KillTimeout(task); expected != actual {
|
||||
t.Fatalf("KillTimeout(%v) returned %v; want %v", task, actual, expected)
|
||||
}
|
||||
|
||||
expected = 10 * time.Second
|
||||
task = &structs.Task{KillTimeout: 11 * time.Second}
|
||||
|
||||
if actual := ctx.KillTimeout(task); expected != actual {
|
||||
t.Fatalf("KillTimeout(%v) returned %v; want %v", task, actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriver_TaskEnvironmentVariables(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := &ExecContext{}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"syscall"
|
||||
|
@ -32,9 +34,11 @@ type ExecDriverConfig struct {
|
|||
|
||||
// execHandle is returned from Start/Open as a handle to the PID
|
||||
type execHandle struct {
|
||||
cmd executor.Executor
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
cmd executor.Executor
|
||||
killTimeout time.Duration
|
||||
logger *log.Logger
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewExecDriver is used to create a new exec driver
|
||||
|
@ -110,34 +114,57 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
|||
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type execId struct {
|
||||
ExecutorId string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &execId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
// Find the process
|
||||
cmd, err := executor.OpenId(handleID)
|
||||
cmd, err := executor.OpenId(id.ExecutorId)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *execHandle) ID() string {
|
||||
id, _ := h.cmd.ID()
|
||||
return id
|
||||
executorId, _ := h.cmd.ID()
|
||||
id := execId{
|
||||
ExecutorId: executorId,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *execHandle) WaitCh() chan *cstructs.WaitResult {
|
||||
|
@ -154,7 +181,7 @@ func (h *execHandle) Kill() error {
|
|||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.cmd.ForceStop()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,9 @@ package driver
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
@ -36,9 +38,11 @@ type JavaDriverConfig struct {
|
|||
|
||||
// javaHandle is returned from Start/Open as a handle to the PID
|
||||
type javaHandle struct {
|
||||
cmd executor.Executor
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
cmd executor.Executor
|
||||
killTimeout time.Duration
|
||||
logger *log.Logger
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewJavaDriver is used to create a new exec driver
|
||||
|
@ -158,27 +162,41 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
|||
|
||||
// Return a driver handle
|
||||
h := &javaHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type javaId struct {
|
||||
ExecutorId string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &javaId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
// Find the process
|
||||
cmd, err := executor.OpenId(handleID)
|
||||
cmd, err := executor.OpenId(id.ExecutorId)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &javaHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
go h.run()
|
||||
|
@ -186,8 +204,17 @@ func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, erro
|
|||
}
|
||||
|
||||
func (h *javaHandle) ID() string {
|
||||
id, _ := h.cmd.ID()
|
||||
return id
|
||||
executorId, _ := h.cmd.ID()
|
||||
id := javaId{
|
||||
ExecutorId: executorId,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.java: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *javaHandle) WaitCh() chan *cstructs.WaitResult {
|
||||
|
@ -204,7 +231,7 @@ func (h *javaHandle) Kill() error {
|
|||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.cmd.ForceStop()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
|
@ -40,9 +42,11 @@ type QemuDriverConfig struct {
|
|||
|
||||
// qemuHandle is returned from Start/Open as a handle to the PID
|
||||
type qemuHandle struct {
|
||||
cmd executor.Executor
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
cmd executor.Executor
|
||||
killTimeout time.Duration
|
||||
logger *log.Logger
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewQemuDriver is used to create a new exec driver
|
||||
|
@ -197,35 +201,58 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
|||
|
||||
// Create and Return Handle
|
||||
h := &qemuHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type qemuId struct {
|
||||
ExecutorId string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &qemuId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
// Find the process
|
||||
cmd, err := executor.OpenId(handleID)
|
||||
cmd, err := executor.OpenId(id.ExecutorId)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *qemuHandle) ID() string {
|
||||
id, _ := h.cmd.ID()
|
||||
return id
|
||||
executorId, _ := h.cmd.ID()
|
||||
id := qemuId{
|
||||
ExecutorId: executorId,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *qemuHandle) WaitCh() chan *cstructs.WaitResult {
|
||||
|
@ -244,7 +271,7 @@ func (h *qemuHandle) Kill() error {
|
|||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.cmd.ForceStop()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
|
@ -30,9 +32,11 @@ type RawExecDriver struct {
|
|||
|
||||
// rawExecHandle is returned from Start/Open as a handle to the PID
|
||||
type rawExecHandle struct {
|
||||
cmd executor.Executor
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
cmd executor.Executor
|
||||
killTimeout time.Duration
|
||||
logger *log.Logger
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewRawExecDriver is used to create a new raw exec driver
|
||||
|
@ -109,34 +113,57 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
|
|||
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type rawExecId struct {
|
||||
ExecutorId string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &rawExecId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
// Find the process
|
||||
cmd := executor.NewBasicExecutor()
|
||||
if err := cmd.Open(handleID); err != nil {
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", handleID, err)
|
||||
if err := cmd.Open(id.ExecutorId); err != nil {
|
||||
return nil, fmt.Errorf("failed to open ID %v: %v", id.ExecutorId, err)
|
||||
}
|
||||
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
cmd: cmd,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
cmd: cmd,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) ID() string {
|
||||
id, _ := h.cmd.ID()
|
||||
return id
|
||||
executorId, _ := h.cmd.ID()
|
||||
id := rawExecId{
|
||||
ExecutorId: executorId,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) WaitCh() chan *cstructs.WaitResult {
|
||||
|
@ -153,7 +180,7 @@ func (h *rawExecHandle) Kill() error {
|
|||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.cmd.ForceStop()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,18 +54,20 @@ type RktDriverConfig struct {
|
|||
|
||||
// rktHandle is returned from Start/Open as a handle to the PID
|
||||
type rktHandle struct {
|
||||
proc *os.Process
|
||||
image string
|
||||
logger *log.Logger
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
proc *os.Process
|
||||
image string
|
||||
logger *log.Logger
|
||||
killTimeout time.Duration
|
||||
waitCh chan *cstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// rktPID is a struct to map the pid running the process to the vm image on
|
||||
// disk
|
||||
type rktPID struct {
|
||||
Pid int
|
||||
Image string
|
||||
Pid int
|
||||
Image string
|
||||
KillTimeout time.Duration
|
||||
}
|
||||
|
||||
// NewRktDriver is used to create a new exec driver
|
||||
|
@ -216,11 +218,12 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
|
|||
|
||||
d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmd.Args)
|
||||
h := &rktHandle{
|
||||
proc: cmd.Process,
|
||||
image: img,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
proc: cmd.Process,
|
||||
image: img,
|
||||
logger: d.logger,
|
||||
killTimeout: d.DriverContext.KillTimeout(task),
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
|
@ -242,11 +245,12 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
|
|||
|
||||
// Return a driver handle
|
||||
h := &rktHandle{
|
||||
proc: proc,
|
||||
image: qpid.Image,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
proc: proc,
|
||||
image: qpid.Image,
|
||||
logger: d.logger,
|
||||
killTimeout: qpid.KillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
go h.run()
|
||||
|
@ -256,8 +260,9 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
|
|||
func (h *rktHandle) ID() string {
|
||||
// Return a handle to the PID
|
||||
pid := &rktPID{
|
||||
Pid: h.proc.Pid,
|
||||
Image: h.image,
|
||||
Pid: h.proc.Pid,
|
||||
Image: h.image,
|
||||
KillTimeout: h.killTimeout,
|
||||
}
|
||||
data, err := json.Marshal(pid)
|
||||
if err != nil {
|
||||
|
@ -282,7 +287,7 @@ func (h *rktHandle) Kill() error {
|
|||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.proc.Kill()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,14 +33,15 @@ func TestRktVersionRegex(t *testing.T) {
|
|||
|
||||
func TestRktDriver_Handle(t *testing.T) {
|
||||
h := &rktHandle{
|
||||
proc: &os.Process{Pid: 123},
|
||||
image: "foo",
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
proc: &os.Process{Pid: 123},
|
||||
image: "foo",
|
||||
killTimeout: 5 * time.Nanosecond,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *cstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
actual := h.ID()
|
||||
expected := `Rkt:{"Pid":123,"Image":"foo"}`
|
||||
expected := `Rkt:{"Pid":123,"Image":"foo","KillTimeout":5}`
|
||||
if actual != expected {
|
||||
t.Errorf("Expected `%s`, found `%s`", expected, actual)
|
||||
}
|
||||
|
|
|
@ -208,6 +208,13 @@ func (a *Agent) setupClient() error {
|
|||
if a.config.Client.NetworkSpeed != 0 {
|
||||
conf.NetworkSpeed = a.config.Client.NetworkSpeed
|
||||
}
|
||||
if a.config.Client.MaxKillTimeout != "" {
|
||||
dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing retry interval: %s", err)
|
||||
}
|
||||
conf.MaxKillTimeout = dur
|
||||
}
|
||||
|
||||
// Setup the node
|
||||
conf.Node = new(structs.Node)
|
||||
|
|
|
@ -156,6 +156,9 @@ type ClientConfig struct {
|
|||
|
||||
// The network link speed to use if it can not be determined dynamically.
|
||||
NetworkSpeed int `hcl:"network_speed"`
|
||||
|
||||
// MaxKillTimeout allows capping the user-specifiable KillTimeout.
|
||||
MaxKillTimeout string `hcl:"max_kill_timeout"`
|
||||
}
|
||||
|
||||
// ServerConfig is configuration specific to the server mode
|
||||
|
@ -281,8 +284,9 @@ func DefaultConfig() *Config {
|
|||
AdvertiseAddrs: &AdvertiseAddrs{},
|
||||
Atlas: &AtlasConfig{},
|
||||
Client: &ClientConfig{
|
||||
Enabled: false,
|
||||
NetworkSpeed: 100,
|
||||
Enabled: false,
|
||||
NetworkSpeed: 100,
|
||||
MaxKillTimeout: "30s",
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: false,
|
||||
|
@ -500,6 +504,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
|
|||
if b.NetworkSpeed != 0 {
|
||||
result.NetworkSpeed = b.NetworkSpeed
|
||||
}
|
||||
if b.MaxKillTimeout != "" {
|
||||
result.MaxKillTimeout = b.MaxKillTimeout
|
||||
}
|
||||
|
||||
// Add the servers
|
||||
result.Servers = append(result.Servers, b.Servers...)
|
||||
|
|
|
@ -40,7 +40,8 @@ func TestConfig_Merge(t *testing.T) {
|
|||
Options: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
NetworkSpeed: 100,
|
||||
NetworkSpeed: 100,
|
||||
MaxKillTimeout: "20s",
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: false,
|
||||
|
@ -105,7 +106,8 @@ func TestConfig_Merge(t *testing.T) {
|
|||
"foo": "bar",
|
||||
"baz": "zip",
|
||||
},
|
||||
NetworkSpeed: 100,
|
||||
NetworkSpeed: 105,
|
||||
MaxKillTimeout: "50s",
|
||||
},
|
||||
Server: &ServerConfig{
|
||||
Enabled: true,
|
||||
|
|
|
@ -161,6 +161,10 @@ job "example" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Controls the timeout between signalling a task it will be killed
|
||||
# and killing the task. If not set a default is used.
|
||||
# kill_timeout = "20s"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -408,7 +408,15 @@ func parseTasks(jobName string, taskGroupName string, result *[]*structs.Task, l
|
|||
if taskGroupName == "" {
|
||||
taskGroupName = n
|
||||
}
|
||||
if err := mapstructure.WeakDecode(m, &t); err != nil {
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||
WeaklyTypedInput: true,
|
||||
Result: &t,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := dec.Decode(m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -121,6 +121,7 @@ func TestParse(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
KillTimeout: 22 * time.Second,
|
||||
},
|
||||
&structs.Task{
|
||||
Name: "storagelocker",
|
||||
|
|
|
@ -77,6 +77,8 @@ job "binstore-storagelocker" {
|
|||
port "admin" {}
|
||||
}
|
||||
}
|
||||
|
||||
kill_timeout = "22s"
|
||||
}
|
||||
|
||||
task "storagelocker" {
|
||||
|
|
|
@ -1303,6 +1303,12 @@ func (s *Service) Hash() string {
|
|||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
const (
|
||||
// DefaultKillTimeout is the default timeout between signaling a task it
|
||||
// will be killed and killing it.
|
||||
DefaultKillTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// Task is a single process typically that is executed as part of a task group.
|
||||
type Task struct {
|
||||
// Name of the task
|
||||
|
@ -1330,11 +1336,20 @@ type Task struct {
|
|||
// Meta is used to associate arbitrary metadata with this
|
||||
// task. This is opaque to Nomad.
|
||||
Meta map[string]string
|
||||
|
||||
// KillTimeout is the time between signaling a task that it will be
|
||||
// killed and killing it.
|
||||
KillTimeout time.Duration `mapstructure:"kill_timeout"`
|
||||
}
|
||||
|
||||
// InitFields initializes fields in the task.
|
||||
func (t *Task) InitFields(job *Job, tg *TaskGroup) {
|
||||
t.InitServiceFields(job.Name, tg.Name)
|
||||
|
||||
// Set the default timeout if it is not specified.
|
||||
if t.KillTimeout == 0 {
|
||||
t.KillTimeout = DefaultKillTimeout
|
||||
}
|
||||
}
|
||||
|
||||
// InitServiceFields interpolates values of Job, Task Group
|
||||
|
@ -1460,6 +1475,9 @@ func (t *Task) Validate() error {
|
|||
if t.Resources == nil {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
|
||||
}
|
||||
if t.KillTimeout.Nanoseconds() < 0 {
|
||||
mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
|
||||
}
|
||||
for idx, constr := range t.Constraints {
|
||||
if err := constr.Validate(); err != nil {
|
||||
outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
|
||||
|
|
|
@ -281,6 +281,11 @@ configured on server nodes.
|
|||
* <a id="network_speed">`network_speed`</a>: This is an int that sets the
|
||||
default link speed of network interfaces, in megabits, if their speed can
|
||||
not be determined dynamically.
|
||||
* `max_kill_timeout`: `max_kill_timeout` is a time duration that can be
|
||||
specified using the `s`, `m`, and `h` suffixes, such as `30s`. If a job's
|
||||
task specifies a `kill_timeout` greater than `max_kill_timeout`,
|
||||
`max_kill_timeout` is used. This is to prevent a user being able to set an
|
||||
unreasonable timeout. If unset, a default is used.
|
||||
|
||||
### Client Options Map <a id="options_map"></a>
|
||||
|
||||
|
|
|
@ -222,6 +222,10 @@ The `task` object supports the following keys:
|
|||
|
||||
* `meta` - Annotates the task group with opaque metadata.
|
||||
|
||||
* `kill_timeout` - `kill_timeout` is a time duration that can be specified using
|
||||
the `s`, `m`, and `h` suffixes, such as `30s`. It can be used to configure the
|
||||
time between signaling a task it will be killed and actually killing it.
|
||||
|
||||
### Resources
|
||||
|
||||
The `resources` object supports the following keys:
|
||||
|
|
Loading…
Reference in a new issue