Merge pull request #8390 from hashicorp/lifecycle-poststart-hook
task lifecycle poststart hook
This commit is contained in:
commit
71a694f39c
|
@ -632,6 +632,7 @@ type DispatchPayloadConfig struct {
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TaskLifecycleHookPrestart = "prestart"
|
TaskLifecycleHookPrestart = "prestart"
|
||||||
|
TaskLifecycleHookPoststart = "poststart"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TaskLifecycle struct {
|
type TaskLifecycle struct {
|
||||||
|
|
|
@ -142,6 +142,100 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAllocRunner_Lifecycle_Poststart asserts that a service job with 2
|
||||||
|
// poststart lifecycle hooks (1 sidecar, 1 ephemeral) starts all 3 tasks, only
|
||||||
|
// the ephemeral one finishes, and the other 2 exit when the alloc is stopped.
|
||||||
|
func TestAllocRunner_Lifecycle_Poststart(t *testing.T) {
|
||||||
|
alloc := mock.LifecycleAlloc()
|
||||||
|
|
||||||
|
alloc.Job.Type = structs.JobTypeService
|
||||||
|
mainTask := alloc.Job.TaskGroups[0].Tasks[0]
|
||||||
|
mainTask.Config["run_for"] = "100s"
|
||||||
|
|
||||||
|
sidecarTask := alloc.Job.TaskGroups[0].Tasks[1]
|
||||||
|
sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
|
||||||
|
sidecarTask.Config["run_for"] = "100s"
|
||||||
|
|
||||||
|
ephemeralTask := alloc.Job.TaskGroups[0].Tasks[2]
|
||||||
|
ephemeralTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
|
||||||
|
|
||||||
|
conf, cleanup := testAllocRunnerConfig(t, alloc)
|
||||||
|
defer cleanup()
|
||||||
|
ar, err := NewAllocRunner(conf)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer destroy(ar)
|
||||||
|
go ar.Run()
|
||||||
|
|
||||||
|
upd := conf.StateUpdater.(*MockStateUpdater)
|
||||||
|
|
||||||
|
// Wait for main and sidecar tasks to be running, and that the
|
||||||
|
// ephemeral task ran and exited.
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
last := upd.Last()
|
||||||
|
if last == nil {
|
||||||
|
return false, fmt.Errorf("No updates")
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.ClientStatus != structs.AllocClientStatusRunning {
|
||||||
|
return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning {
|
||||||
|
return false, fmt.Errorf("expected main task to be running not %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning {
|
||||||
|
return false, fmt.Errorf("expected sidecar task to be running not %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead {
|
||||||
|
return false, fmt.Errorf("expected ephemeral task to be dead not %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.TaskStates[ephemeralTask.Name].Failed {
|
||||||
|
return false, fmt.Errorf("expected ephemeral task to be successful not failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("error waiting for initial state:\n%v", err)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Tell the alloc to stop
|
||||||
|
stopAlloc := alloc.Copy()
|
||||||
|
stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop
|
||||||
|
ar.Update(stopAlloc)
|
||||||
|
|
||||||
|
// Wait for main and sidecar tasks to stop.
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
last := upd.Last()
|
||||||
|
|
||||||
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||||
|
return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead {
|
||||||
|
return false, fmt.Errorf("expected main task to be dead not %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.TaskStates[mainTask.Name].Failed {
|
||||||
|
return false, fmt.Errorf("expected main task to be successful not failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead {
|
||||||
|
return false, fmt.Errorf("expected sidecar task to be dead not %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.TaskStates[sidecarTask.Name].Failed {
|
||||||
|
return false, fmt.Errorf("expected sidecar task to be successful not failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("error waiting for initial state:\n%v", err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the
|
// TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the
|
||||||
// entire task group is killed.
|
// entire task group is killed.
|
||||||
func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
|
func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
|
||||||
|
@ -152,20 +246,34 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
|
||||||
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
|
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
|
||||||
alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0
|
alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0
|
||||||
|
|
||||||
// Create three tasks in the task group
|
// Create four tasks in the task group
|
||||||
sidecar := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
prestart := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
||||||
sidecar.Name = "sidecar"
|
prestart.Name = "prestart-sidecar"
|
||||||
sidecar.Driver = "mock_driver"
|
prestart.Driver = "mock_driver"
|
||||||
sidecar.KillTimeout = 10 * time.Millisecond
|
prestart.KillTimeout = 10 * time.Millisecond
|
||||||
sidecar.Lifecycle = &structs.TaskLifecycleConfig{
|
prestart.Lifecycle = &structs.TaskLifecycleConfig{
|
||||||
Hook: structs.TaskLifecycleHookPrestart,
|
Hook: structs.TaskLifecycleHookPrestart,
|
||||||
Sidecar: true,
|
Sidecar: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
sidecar.Config = map[string]interface{}{
|
prestart.Config = map[string]interface{}{
|
||||||
"run_for": "100s",
|
"run_for": "100s",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
poststart := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
||||||
|
poststart.Name = "poststart-sidecar"
|
||||||
|
poststart.Driver = "mock_driver"
|
||||||
|
poststart.KillTimeout = 10 * time.Millisecond
|
||||||
|
poststart.Lifecycle = &structs.TaskLifecycleConfig{
|
||||||
|
Hook: structs.TaskLifecycleHookPoststart,
|
||||||
|
Sidecar: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
poststart.Config = map[string]interface{}{
|
||||||
|
"run_for": "100s",
|
||||||
|
}
|
||||||
|
|
||||||
|
// these two main tasks have the same name, is that ok?
|
||||||
main1 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
main1 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
||||||
main1.Name = "task2"
|
main1.Name = "task2"
|
||||||
main1.Driver = "mock_driver"
|
main1.Driver = "mock_driver"
|
||||||
|
@ -180,11 +288,12 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
|
||||||
"run_for": "2s",
|
"run_for": "2s",
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc.Job.TaskGroups[0].Tasks = []*structs.Task{sidecar, main1, main2}
|
alloc.Job.TaskGroups[0].Tasks = []*structs.Task{prestart, poststart, main1, main2}
|
||||||
alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{
|
alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{
|
||||||
sidecar.Name: tr,
|
prestart.Name: tr,
|
||||||
main1.Name: tr,
|
poststart.Name: tr,
|
||||||
main2.Name: tr,
|
main1.Name: tr,
|
||||||
|
main2.Name: tr,
|
||||||
}
|
}
|
||||||
|
|
||||||
conf, cleanup := testAllocRunnerConfig(t, alloc)
|
conf, cleanup := testAllocRunnerConfig(t, alloc)
|
||||||
|
@ -217,8 +326,30 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
|
||||||
|
|
||||||
var state *structs.TaskState
|
var state *structs.TaskState
|
||||||
|
|
||||||
// Task1 should be killed because Task2 exited
|
// both sidecars should be killed because Task2 exited
|
||||||
state = last.TaskStates[sidecar.Name]
|
state = last.TaskStates[prestart.Name]
|
||||||
|
if state == nil {
|
||||||
|
return false, fmt.Errorf("could not find state for task %s", prestart.Name)
|
||||||
|
}
|
||||||
|
if state.State != structs.TaskStateDead {
|
||||||
|
return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead)
|
||||||
|
}
|
||||||
|
if state.FinishedAt.IsZero() || state.StartedAt.IsZero() {
|
||||||
|
return false, fmt.Errorf("expected to have a start and finish time")
|
||||||
|
}
|
||||||
|
if len(state.Events) < 2 {
|
||||||
|
// At least have a received and destroyed
|
||||||
|
return false, fmt.Errorf("Unexpected number of events")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasTaskMainEvent(state) {
|
||||||
|
return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events)
|
||||||
|
}
|
||||||
|
|
||||||
|
state = last.TaskStates[poststart.Name]
|
||||||
|
if state == nil {
|
||||||
|
return false, fmt.Errorf("could not find state for task %s", poststart.Name)
|
||||||
|
}
|
||||||
if state.State != structs.TaskStateDead {
|
if state.State != structs.TaskStateDead {
|
||||||
return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead)
|
return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,23 +18,31 @@ type taskHookCoordinator struct {
|
||||||
mainTaskCtx context.Context
|
mainTaskCtx context.Context
|
||||||
mainTaskCtxCancel func()
|
mainTaskCtxCancel func()
|
||||||
|
|
||||||
|
poststartTaskCtx context.Context
|
||||||
|
poststartTaskCtxCancel func()
|
||||||
|
|
||||||
prestartSidecar map[string]struct{}
|
prestartSidecar map[string]struct{}
|
||||||
prestartEphemeral map[string]struct{}
|
prestartEphemeral map[string]struct{}
|
||||||
|
mainTasksPending map[string]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHookCoordinator {
|
func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHookCoordinator {
|
||||||
closedCh := make(chan struct{})
|
closedCh := make(chan struct{})
|
||||||
close(closedCh)
|
close(closedCh)
|
||||||
|
|
||||||
mainTaskCtx, cancelFn := context.WithCancel(context.Background())
|
mainTaskCtx, mainCancelFn := context.WithCancel(context.Background())
|
||||||
|
poststartTaskCtx, poststartCancelFn := context.WithCancel(context.Background())
|
||||||
|
|
||||||
c := &taskHookCoordinator{
|
c := &taskHookCoordinator{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
closedCh: closedCh,
|
closedCh: closedCh,
|
||||||
mainTaskCtx: mainTaskCtx,
|
mainTaskCtx: mainTaskCtx,
|
||||||
mainTaskCtxCancel: cancelFn,
|
mainTaskCtxCancel: mainCancelFn,
|
||||||
prestartSidecar: map[string]struct{}{},
|
prestartSidecar: map[string]struct{}{},
|
||||||
prestartEphemeral: map[string]struct{}{},
|
prestartEphemeral: map[string]struct{}{},
|
||||||
|
mainTasksPending: map[string]struct{}{},
|
||||||
|
poststartTaskCtx: poststartTaskCtx,
|
||||||
|
poststartTaskCtxCancel: poststartCancelFn,
|
||||||
}
|
}
|
||||||
c.setTasks(tasks)
|
c.setTasks(tasks)
|
||||||
return c
|
return c
|
||||||
|
@ -44,7 +52,7 @@ func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) {
|
||||||
for _, task := range tasks {
|
for _, task := range tasks {
|
||||||
|
|
||||||
if task.Lifecycle == nil {
|
if task.Lifecycle == nil {
|
||||||
// move nothing
|
c.mainTasksPending[task.Name] = struct{}{}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,9 +63,10 @@ func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) {
|
||||||
} else {
|
} else {
|
||||||
c.prestartEphemeral[task.Name] = struct{}{}
|
c.prestartEphemeral[task.Name] = struct{}{}
|
||||||
}
|
}
|
||||||
|
case structs.TaskLifecycleHookPoststart:
|
||||||
|
// Poststart hooks don't need to be tracked.
|
||||||
default:
|
default:
|
||||||
c.logger.Error("invalid lifecycle hook", "hook", task.Lifecycle.Hook)
|
c.logger.Error("invalid lifecycle hook", "task", task.Name, "hook", task.Lifecycle.Hook)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,22 +79,28 @@ func (c *taskHookCoordinator) hasPrestartTasks() bool {
|
||||||
return len(c.prestartSidecar)+len(c.prestartEphemeral) > 0
|
return len(c.prestartSidecar)+len(c.prestartEphemeral) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *taskHookCoordinator) hasPendingMainTasks() bool {
|
||||||
|
return len(c.mainTasksPending) > 0
|
||||||
|
}
|
||||||
|
|
||||||
func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan struct{} {
|
func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan struct{} {
|
||||||
if task.Lifecycle != nil && task.Lifecycle.Hook == structs.TaskLifecycleHookPrestart {
|
if task.Lifecycle == nil {
|
||||||
return c.closedCh
|
return c.mainTaskCtx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.mainTaskCtx.Done()
|
switch task.Lifecycle.Hook {
|
||||||
|
case structs.TaskLifecycleHookPrestart:
|
||||||
|
// Prestart tasks start without checking status of other tasks
|
||||||
|
return c.closedCh
|
||||||
|
case structs.TaskLifecycleHookPoststart:
|
||||||
|
return c.poststartTaskCtx.Done()
|
||||||
|
default:
|
||||||
|
return c.mainTaskCtx.Done()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is not thread safe! This must only be called from one thread per alloc runner.
|
// This is not thread safe! This must only be called from one thread per alloc runner.
|
||||||
func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskState) {
|
func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskState) {
|
||||||
if c.mainTaskCtx.Err() != nil {
|
|
||||||
// nothing to do here
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for task := range c.prestartSidecar {
|
for task := range c.prestartSidecar {
|
||||||
st := states[task]
|
st := states[task]
|
||||||
if st == nil || st.StartedAt.IsZero() {
|
if st == nil || st.StartedAt.IsZero() {
|
||||||
|
@ -104,10 +119,23 @@ func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskSt
|
||||||
delete(c.prestartEphemeral, task)
|
delete(c.prestartEphemeral, task)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for task := range c.mainTasksPending {
|
||||||
|
st := states[task]
|
||||||
|
if st == nil || st.StartedAt.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
delete(c.mainTasksPending, task)
|
||||||
|
}
|
||||||
|
|
||||||
// everything well
|
// everything well
|
||||||
if !c.hasPrestartTasks() {
|
if !c.hasPrestartTasks() {
|
||||||
c.mainTaskCtxCancel()
|
c.mainTaskCtxCancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !c.hasPendingMainTasks() {
|
||||||
|
c.poststartTaskCtxCancel()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks
|
// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks
|
||||||
|
|
|
@ -224,6 +224,52 @@ func TestTaskHookCoordinator_SidecarNeverStarts(t *testing.T) {
|
||||||
require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name)
|
require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) {
|
||||||
|
logger := testlog.HCLogger(t)
|
||||||
|
|
||||||
|
alloc := mock.LifecycleAlloc()
|
||||||
|
tasks := alloc.Job.TaskGroups[0].Tasks
|
||||||
|
|
||||||
|
mainTask := tasks[0]
|
||||||
|
sideTask := tasks[1]
|
||||||
|
postTask := tasks[2]
|
||||||
|
|
||||||
|
// Make the the third task a poststart hook
|
||||||
|
postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
|
||||||
|
|
||||||
|
coord := newTaskHookCoordinator(logger, tasks)
|
||||||
|
postCh := coord.startConditionForTask(postTask)
|
||||||
|
sideCh := coord.startConditionForTask(sideTask)
|
||||||
|
mainCh := coord.startConditionForTask(mainTask)
|
||||||
|
|
||||||
|
require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name)
|
||||||
|
require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name)
|
||||||
|
require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", postTask.Name)
|
||||||
|
|
||||||
|
states := map[string]*structs.TaskState{
|
||||||
|
postTask.Name: {
|
||||||
|
State: structs.TaskStatePending,
|
||||||
|
Failed: false,
|
||||||
|
},
|
||||||
|
mainTask.Name: {
|
||||||
|
State: structs.TaskStateRunning,
|
||||||
|
Failed: false,
|
||||||
|
StartedAt: time.Now(),
|
||||||
|
},
|
||||||
|
sideTask.Name: {
|
||||||
|
State: structs.TaskStateRunning,
|
||||||
|
Failed: false,
|
||||||
|
StartedAt: time.Now(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
coord.taskStateUpdated(states)
|
||||||
|
|
||||||
|
require.Truef(t, isChannelClosed(postCh), "%s channel was open, should be closed", postTask.Name)
|
||||||
|
require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name)
|
||||||
|
require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name)
|
||||||
|
}
|
||||||
|
|
||||||
func isChannelClosed(ch <-chan struct{}) bool {
|
func isChannelClosed(ch <-chan struct{}) bool {
|
||||||
select {
|
select {
|
||||||
case <-ch:
|
case <-ch:
|
||||||
|
|
|
@ -21,11 +21,19 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewRestartTracker(policy *structs.RestartPolicy, jobType string, tlc *structs.TaskLifecycleConfig) *RestartTracker {
|
func NewRestartTracker(policy *structs.RestartPolicy, jobType string, tlc *structs.TaskLifecycleConfig) *RestartTracker {
|
||||||
|
// Batch jobs should not restart if they exit successfully
|
||||||
onSuccess := jobType != structs.JobTypeBatch
|
onSuccess := jobType != structs.JobTypeBatch
|
||||||
|
|
||||||
|
// Prestart sidecars should get restarted on success
|
||||||
if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPrestart {
|
if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPrestart {
|
||||||
onSuccess = tlc.Sidecar
|
onSuccess = tlc.Sidecar
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Poststart sidecars should get restarted on success
|
||||||
|
if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPoststart {
|
||||||
|
onSuccess = tlc.Sidecar
|
||||||
|
}
|
||||||
|
|
||||||
return &RestartTracker{
|
return &RestartTracker{
|
||||||
startTime: time.Now(),
|
startTime: time.Now(),
|
||||||
onSuccess: onSuccess,
|
onSuccess: onSuccess,
|
||||||
|
|
|
@ -17,6 +17,7 @@ import (
|
||||||
_ "github.com/hashicorp/nomad/e2e/deployment"
|
_ "github.com/hashicorp/nomad/e2e/deployment"
|
||||||
_ "github.com/hashicorp/nomad/e2e/example"
|
_ "github.com/hashicorp/nomad/e2e/example"
|
||||||
_ "github.com/hashicorp/nomad/e2e/hostvolumes"
|
_ "github.com/hashicorp/nomad/e2e/hostvolumes"
|
||||||
|
_ "github.com/hashicorp/nomad/e2e/lifecycle"
|
||||||
_ "github.com/hashicorp/nomad/e2e/metrics"
|
_ "github.com/hashicorp/nomad/e2e/metrics"
|
||||||
_ "github.com/hashicorp/nomad/e2e/nomad09upgrade"
|
_ "github.com/hashicorp/nomad/e2e/nomad09upgrade"
|
||||||
_ "github.com/hashicorp/nomad/e2e/nomadexec"
|
_ "github.com/hashicorp/nomad/e2e/nomadexec"
|
||||||
|
|
127
e2e/lifecycle/inputs/batch.nomad
Normal file
127
e2e/lifecycle/inputs/batch.nomad
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
# lifecycle hook test job for batch jobs. touches, removes, and tests
|
||||||
|
# for the existence of files to assert the order of running tasks.
|
||||||
|
# all tasks should exit 0 and the alloc dir should contain the following
|
||||||
|
# files: ./init-ran, ./main-ran, ./poststart-run
|
||||||
|
|
||||||
|
job "batch-lifecycle" {
|
||||||
|
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
type = "batch"
|
||||||
|
|
||||||
|
group "test" {
|
||||||
|
|
||||||
|
task "init" {
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
hook = "prestart"
|
||||||
|
}
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/prestart.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
sleep 1
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/init-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/init-running
|
||||||
|
sleep 5
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/main ]; then exit 7; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/poststart-running ]; then exit 8; fi
|
||||||
|
rm ${NOMAD_ALLOC_DIR}/init-running
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/prestart.sh"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "main" {
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/main.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
sleep 1
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-running
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-started
|
||||||
|
# NEED TO HANG AROUND TO GIVE POSTSTART TIME TO GET STARTED
|
||||||
|
sleep 10
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/init-ran ]; then exit 9; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/init-running ]; then exit 10; fi
|
||||||
|
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/poststart-started ]; then exit 11; fi
|
||||||
|
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-ran
|
||||||
|
rm ${NOMAD_ALLOC_DIR}/main-running
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/main.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
task "poststart" {
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
hook = "poststart"
|
||||||
|
}
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/poststart.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
sleep 1
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-running
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-started
|
||||||
|
sleep 10
|
||||||
|
# THIS IS WHERE THE ACTUAL TESTING HAPPENS
|
||||||
|
# IF init-ran doesn't exist, then the init task hasn't run yet, so fail
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/init-ran ]; then exit 12; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/main-started ]; then exit 15; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/init-running ]; then exit 14; fi
|
||||||
|
rm ${NOMAD_ALLOC_DIR}/poststart-running
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/poststart.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
160
e2e/lifecycle/inputs/service.nomad
Normal file
160
e2e/lifecycle/inputs/service.nomad
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
# lifecycle hook test job for service jobs. touches, removes, and tests
|
||||||
|
# for the existence of files to assert the order of running tasks.
|
||||||
|
# after stopping, the alloc dir should contain the following files:
|
||||||
|
# files: ./init-ran, ./sidecar-ran, ./main-ran, ./poststart-run but not
|
||||||
|
# the ./main-running, ./sidecar-running, or ./poststart-running files
|
||||||
|
|
||||||
|
job "service-lifecycle" {
|
||||||
|
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
type = "service"
|
||||||
|
|
||||||
|
group "test" {
|
||||||
|
|
||||||
|
task "init" {
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
hook = "prestart"
|
||||||
|
}
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/prestart.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
sleep 1
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/init-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/init-running
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/main ]; then exit 7; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/poststart ]; then exit 8; fi
|
||||||
|
rm ${NOMAD_ALLOC_DIR}/init-running
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/prestart.sh"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "sidecar" {
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
hook = "prestart"
|
||||||
|
sidecar = true
|
||||||
|
}
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/sidecar.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/sidecar-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/sidecar-running
|
||||||
|
sleep 5
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/main-running ]; then exit 9; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/poststart-running ]; then exit 10; fi
|
||||||
|
sleep 300
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/sidecar.sh"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "main" {
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/main.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-running
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-started
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/init-ran ]; then exit 11; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/init-running ]; then exit 12; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/sidecar-ran ]; then exit 13; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/sidecar-running ]; then exit 14; fi
|
||||||
|
sleep 2
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/poststart-started ]; then exit 15; fi
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/main-checked
|
||||||
|
sleep 300
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/main.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
task "poststart" {
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
hook = "poststart"
|
||||||
|
}
|
||||||
|
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "busybox:1"
|
||||||
|
command = "/bin/sh"
|
||||||
|
args = ["local/poststart.sh"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOT
|
||||||
|
#!/bin/sh
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-ran
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-running
|
||||||
|
touch ${NOMAD_ALLOC_DIR}/poststart-started
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/init-ran ]; then exit 16; fi
|
||||||
|
if [ -f ${NOMAD_ALLOC_DIR}/init-running ]; then exit 17; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/sidecar-ran ]; then exit 18; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/sidecar-running ]; then exit 19; fi
|
||||||
|
if [ ! -f ${NOMAD_ALLOC_DIR}/main-started ]; then exit 20; fi
|
||||||
|
rm ${NOMAD_ALLOC_DIR}/poststart-running
|
||||||
|
EOT
|
||||||
|
|
||||||
|
destination = "local/poststart.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 64
|
||||||
|
memory = 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
144
e2e/lifecycle/lifecycle.go
Normal file
144
e2e/lifecycle/lifecycle.go
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
package lifecycle
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/api"
|
||||||
|
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||||
|
"github.com/hashicorp/nomad/e2e/framework"
|
||||||
|
"github.com/hashicorp/nomad/helper/uuid"
|
||||||
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
|
"github.com/hashicorp/nomad/testutil"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LifecycleE2ETest struct {
|
||||||
|
framework.TC
|
||||||
|
jobIDs []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
framework.AddSuites(&framework.TestSuite{
|
||||||
|
Component: "Lifecycle",
|
||||||
|
CanRunLocal: true,
|
||||||
|
Cases: []framework.TestCase{new(LifecycleE2ETest)},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure cluster has leader and at least 1 client node
|
||||||
|
// in a ready state before running tests
|
||||||
|
func (tc *LifecycleE2ETest) BeforeAll(f *framework.F) {
|
||||||
|
e2eutil.WaitForLeader(f.T(), tc.Nomad())
|
||||||
|
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBatchJob runs a batch job with prestart and poststop hooks
|
||||||
|
func (tc *LifecycleE2ETest) TestBatchJob(f *framework.F) {
|
||||||
|
t := f.T()
|
||||||
|
require := require.New(t)
|
||||||
|
nomadClient := tc.Nomad()
|
||||||
|
uuid := uuid.Generate()
|
||||||
|
jobID := "lifecycle-" + uuid[0:8]
|
||||||
|
tc.jobIDs = append(tc.jobIDs, jobID)
|
||||||
|
|
||||||
|
allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, "lifecycle/inputs/batch.nomad", jobID, "")
|
||||||
|
require.Equal(1, len(allocs))
|
||||||
|
allocID := allocs[0].ID
|
||||||
|
|
||||||
|
// wait for the job to stop and assert we stopped successfully, not failed
|
||||||
|
e2eutil.WaitForAllocStopped(t, nomadClient, allocID)
|
||||||
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
||||||
|
require.NoError(err)
|
||||||
|
require.Equal(structs.AllocClientStatusComplete, alloc.ClientStatus)
|
||||||
|
|
||||||
|
// assert the files were written as expected
|
||||||
|
afi, _, err := nomadClient.AllocFS().List(alloc, "alloc", nil)
|
||||||
|
require.NoError(err)
|
||||||
|
expected := map[string]bool{
|
||||||
|
"init-ran": true, "main-ran": true, "poststart-ran": true,
|
||||||
|
"init-running": false, "main-running": false, "poststart-running": false}
|
||||||
|
got := checkFiles(expected, afi)
|
||||||
|
require.Equal(expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestServiceJob runs a service job with prestart and poststop hooks
|
||||||
|
func (tc *LifecycleE2ETest) TestServiceJob(f *framework.F) {
|
||||||
|
t := f.T()
|
||||||
|
require := require.New(t)
|
||||||
|
nomadClient := tc.Nomad()
|
||||||
|
uuid := uuid.Generate()
|
||||||
|
jobID := "lifecycle-" + uuid[0:8]
|
||||||
|
tc.jobIDs = append(tc.jobIDs, jobID)
|
||||||
|
|
||||||
|
allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, "lifecycle/inputs/service.nomad", jobID, "")
|
||||||
|
require.Equal(1, len(allocs))
|
||||||
|
allocID := allocs[0].ID
|
||||||
|
|
||||||
|
//e2eutil.WaitForAllocRunning(t, nomadClient, allocID)
|
||||||
|
testutil.WaitForResult(func() (bool, error) {
|
||||||
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if alloc.ClientStatus != structs.AllocClientStatusRunning {
|
||||||
|
return false, fmt.Errorf("expected status running, but was: %s", alloc.ClientStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
if alloc.TaskStates["poststart"].FinishedAt.IsZero() {
|
||||||
|
return false, fmt.Errorf("poststart task hasn't started")
|
||||||
|
}
|
||||||
|
|
||||||
|
afi, _, err := nomadClient.AllocFS().List(alloc, "alloc", nil)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
expected := map[string]bool{
|
||||||
|
"main-checked": true}
|
||||||
|
got := checkFiles(expected, afi)
|
||||||
|
if !got["main-checked"] {
|
||||||
|
return false, fmt.Errorf("main-checked file has not been written")
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}, func(err error) {
|
||||||
|
t.Fatalf("failed to wait on alloc: %v", err)
|
||||||
|
})
|
||||||
|
|
||||||
|
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
require.False(alloc.TaskStates["poststart"].Failed)
|
||||||
|
|
||||||
|
// stop the job
|
||||||
|
_, _, err = nomadClient.Jobs().Deregister(jobID, false, nil)
|
||||||
|
require.NoError(err)
|
||||||
|
e2eutil.WaitForAllocStopped(t, nomadClient, allocID)
|
||||||
|
|
||||||
|
// assert the files were written as expected
|
||||||
|
afi, _, err := nomadClient.AllocFS().List(alloc, "alloc", nil)
|
||||||
|
require.NoError(err)
|
||||||
|
expected := map[string]bool{
|
||||||
|
"init-ran": true, "sidecar-ran": true, "main-ran": true, "poststart-ran": true,
|
||||||
|
"poststart-started": true, "main-started": true,
|
||||||
|
"init-running": false, "poststart-running": false,
|
||||||
|
"main-checked": true}
|
||||||
|
got := checkFiles(expected, afi)
|
||||||
|
require.Equal(expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkFiles returns a map of whether the expected files were found
|
||||||
|
// in the file info response
|
||||||
|
func checkFiles(expected map[string]bool, got []*api.AllocFileInfo) map[string]bool {
|
||||||
|
results := map[string]bool{}
|
||||||
|
for expect := range expected {
|
||||||
|
results[expect] = false
|
||||||
|
}
|
||||||
|
for _, file := range got {
|
||||||
|
// there will be files unrelated to the test, so ignore those
|
||||||
|
if _, ok := results[file.Name]; ok {
|
||||||
|
results[file.Name] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
}
|
|
@ -368,6 +368,7 @@ func VariableLifecycleJob(resources structs.Resources, main int, init int, side
|
||||||
job.Canonicalize()
|
job.Canonicalize()
|
||||||
return job
|
return job
|
||||||
}
|
}
|
||||||
|
|
||||||
func LifecycleJob() *structs.Job {
|
func LifecycleJob() *structs.Job {
|
||||||
job := &structs.Job{
|
job := &structs.Job{
|
||||||
Region: "global",
|
Region: "global",
|
||||||
|
@ -455,6 +456,7 @@ func LifecycleJob() *structs.Job {
|
||||||
job.Canonicalize()
|
job.Canonicalize()
|
||||||
return job
|
return job
|
||||||
}
|
}
|
||||||
|
|
||||||
func LifecycleAlloc() *structs.Allocation {
|
func LifecycleAlloc() *structs.Allocation {
|
||||||
alloc := &structs.Allocation{
|
alloc := &structs.Allocation{
|
||||||
ID: uuid.Generate(),
|
ID: uuid.Generate(),
|
||||||
|
|
|
@ -4900,7 +4900,8 @@ func (d *DispatchPayloadConfig) Validate() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TaskLifecycleHookPrestart = "prestart"
|
TaskLifecycleHookPrestart = "prestart"
|
||||||
|
TaskLifecycleHookPoststart = "poststart"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TaskLifecycleConfig struct {
|
type TaskLifecycleConfig struct {
|
||||||
|
@ -4924,6 +4925,7 @@ func (d *TaskLifecycleConfig) Validate() error {
|
||||||
|
|
||||||
switch d.Hook {
|
switch d.Hook {
|
||||||
case TaskLifecycleHookPrestart:
|
case TaskLifecycleHookPrestart:
|
||||||
|
case TaskLifecycleHookPoststart:
|
||||||
case "":
|
case "":
|
||||||
return fmt.Errorf("no lifecycle hook provided")
|
return fmt.Errorf("no lifecycle hook provided")
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in a new issue