2018-06-11 20:33:18 +00:00
|
|
|
package allocrunner
|
2015-08-30 23:35:04 +00:00
|
|
|
|
|
|
|
import (
|
2016-02-04 03:58:39 +00:00
|
|
|
"fmt"
|
2016-09-16 00:24:09 +00:00
|
|
|
"io/ioutil"
|
2015-09-24 21:29:53 +00:00
|
|
|
"os"
|
2016-09-16 00:24:09 +00:00
|
|
|
"path/filepath"
|
2017-05-03 18:15:30 +00:00
|
|
|
"strings"
|
2015-08-30 23:35:04 +00:00
|
|
|
"testing"
|
2015-08-31 00:10:17 +00:00
|
|
|
"time"
|
2015-08-30 23:35:04 +00:00
|
|
|
|
2017-05-03 18:15:30 +00:00
|
|
|
"github.com/boltdb/bolt"
|
2017-07-04 19:24:27 +00:00
|
|
|
"github.com/hashicorp/consul/api"
|
2017-08-07 22:54:05 +00:00
|
|
|
"github.com/hashicorp/nomad/command/agent/consul"
|
2018-01-15 22:56:38 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
2017-09-29 16:58:48 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
2015-08-30 23:35:04 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2015-08-31 00:10:17 +00:00
|
|
|
"github.com/hashicorp/nomad/testutil"
|
2017-08-12 21:42:53 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2015-09-23 00:10:03 +00:00
|
|
|
|
2018-10-04 22:45:46 +00:00
|
|
|
"github.com/hashicorp/nomad/client/allocrunnerdeprecated/taskrunner"
|
2018-06-11 20:33:18 +00:00
|
|
|
consulApi "github.com/hashicorp/nomad/client/consul"
|
|
|
|
"github.com/hashicorp/nomad/client/state"
|
2018-04-04 19:38:15 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2015-08-30 23:35:04 +00:00
|
|
|
)
|
|
|
|
|
2017-09-26 22:26:33 +00:00
|
|
|
// allocationBucketExists checks if the allocation bucket was created.
|
|
|
|
func allocationBucketExists(tx *bolt.Tx, allocID string) bool {
|
2018-06-11 20:33:18 +00:00
|
|
|
bucket, err := state.GetAllocationBucket(tx, allocID)
|
|
|
|
return err == nil && bucket != nil
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
|
|
|
|
2015-08-30 23:35:04 +00:00
|
|
|
func TestAllocRunner_SimpleRun(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
2015-08-30 23:35:04 +00:00
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-02-04 03:58:39 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 03:58:39 +00:00
|
|
|
}
|
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
|
|
|
|
2018-04-04 19:38:15 +00:00
|
|
|
// Test that FinisheAt is set when the alloc is in a terminal state
|
|
|
|
func TestAllocRunner_FinishedAtSet(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
require := require.New(t)
|
2018-06-11 20:33:18 +00:00
|
|
|
_, ar := TestAllocRunner(t, false)
|
2018-04-04 19:38:15 +00:00
|
|
|
ar.allocClientStatus = structs.AllocClientStatusFailed
|
|
|
|
alloc := ar.Alloc()
|
|
|
|
taskFinishedAt := make(map[string]time.Time)
|
|
|
|
require.NotEmpty(alloc.TaskStates)
|
|
|
|
for name, s := range alloc.TaskStates {
|
|
|
|
require.False(s.FinishedAt.IsZero())
|
|
|
|
taskFinishedAt[name] = s.FinishedAt
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify that calling again should not mutate finishedAt
|
|
|
|
alloc2 := ar.Alloc()
|
|
|
|
for name, s := range alloc2.TaskStates {
|
|
|
|
require.Equal(taskFinishedAt[name], s.FinishedAt)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that FinisheAt is set when the alloc is in a terminal state
|
|
|
|
func TestAllocRunner_FinishedAtSet_TaskEvents(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
require := require.New(t)
|
2018-06-11 20:33:18 +00:00
|
|
|
_, ar := TestAllocRunner(t, false)
|
2018-04-04 19:38:15 +00:00
|
|
|
ar.taskStates[ar.alloc.Job.TaskGroups[0].Tasks[0].Name] = &structs.TaskState{State: structs.TaskStateDead, Failed: true}
|
|
|
|
|
|
|
|
alloc := ar.Alloc()
|
|
|
|
taskFinishedAt := make(map[string]time.Time)
|
|
|
|
require.NotEmpty(alloc.TaskStates)
|
|
|
|
for name, s := range alloc.TaskStates {
|
|
|
|
require.False(s.FinishedAt.IsZero())
|
|
|
|
taskFinishedAt[name] = s.FinishedAt
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify that calling again should not mutate finishedAt
|
|
|
|
alloc2 := ar.Alloc()
|
|
|
|
for name, s := range alloc2.TaskStates {
|
|
|
|
require.Equal(taskFinishedAt[name], s.FinishedAt)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-07-04 19:24:27 +00:00
|
|
|
// Test that the watcher will mark the allocation as unhealthy.
|
|
|
|
func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2017-08-12 21:42:53 +00:00
|
|
|
assert := assert.New(t)
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Make the task fail
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["start_error"] = "test error"
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if *last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got healthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2017-08-12 21:42:53 +00:00
|
|
|
|
|
|
|
// Assert that we have an event explaining why we are unhealthy.
|
|
|
|
assert.Len(ar.taskStates, 1)
|
|
|
|
state := ar.taskStates[task.Name]
|
|
|
|
assert.NotNil(state)
|
|
|
|
assert.NotEmpty(state.Events)
|
|
|
|
last := state.Events[len(state.Events)-1]
|
2017-11-03 14:34:30 +00:00
|
|
|
assert.Equal(allocHealthEventSource, last.Type)
|
2017-08-12 21:42:53 +00:00
|
|
|
assert.Contains(last.Message, "failed task")
|
2017-07-04 19:24:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Test that the watcher will mark the allocation as unhealthy if it hits its
|
|
|
|
// deadline.
|
|
|
|
func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2017-07-04 19:24:27 +00:00
|
|
|
|
2018-04-18 00:14:59 +00:00
|
|
|
// Don't restart but force service job type
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2018-04-18 00:14:59 +00:00
|
|
|
ar.alloc.Job.Type = structs.JobTypeService
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Make the task block
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
2018-03-20 22:28:18 +00:00
|
|
|
task.Config["start_block_for"] = "4s"
|
2017-07-04 19:24:27 +00:00
|
|
|
task.Config["run_for"] = "10s"
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 100 * time.Millisecond
|
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-04-18 00:14:59 +00:00
|
|
|
|
|
|
|
// Assert alloc is unhealthy
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if *last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got healthy")
|
|
|
|
}
|
2018-04-18 00:14:59 +00:00
|
|
|
|
|
|
|
// Assert there is a task event explaining why we are unhealthy.
|
|
|
|
state, ok := last.TaskStates[task.Name]
|
|
|
|
if !ok {
|
|
|
|
return false, fmt.Errorf("missing state for task %s", task.Name)
|
|
|
|
}
|
|
|
|
n := len(state.Events)
|
|
|
|
if n == 0 {
|
|
|
|
return false, fmt.Errorf("no task events")
|
|
|
|
}
|
|
|
|
lastEvent := state.Events[n-1]
|
|
|
|
if lastEvent.Type != allocHealthEventSource {
|
|
|
|
return false, fmt.Errorf("expected %q; found %q", allocHealthEventSource, lastEvent.Type)
|
|
|
|
}
|
|
|
|
if !strings.Contains(lastEvent.Message, "not running by deadline") {
|
|
|
|
return false, fmt.Errorf(`expected "not running by deadline" but found: %s`, lastEvent.Message)
|
|
|
|
}
|
|
|
|
|
2017-07-04 19:24:27 +00:00
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that the watcher will mark the allocation as healthy.
|
|
|
|
func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Make the task run healthy
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
|
|
|
|
|
|
|
// Create a task that takes longer to become healthy
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy())
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[1]
|
|
|
|
task2.Name = "task 2"
|
|
|
|
task2.Config["start_block_for"] = "500ms"
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if !*last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status healthy; got unhealthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
if d := time.Now().Sub(start); d < 500*time.Millisecond {
|
|
|
|
t.Fatalf("didn't wait for second task group. Only took %v", d)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that the watcher will mark the allocation as healthy with checks
|
|
|
|
func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Make the task fail
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
|
|
|
|
|
|
|
// Create a task that has no checks
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy())
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[1]
|
|
|
|
task2.Name = "task 2"
|
|
|
|
task2.Services = nil
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
|
|
|
|
|
|
|
|
checkHealthy := &api.AgentCheck{
|
2017-09-29 16:58:48 +00:00
|
|
|
CheckID: uuid.Generate(),
|
2017-07-04 19:24:27 +00:00
|
|
|
Status: api.HealthPassing,
|
|
|
|
}
|
|
|
|
checkUnhealthy := &api.AgentCheck{
|
|
|
|
CheckID: checkHealthy.CheckID,
|
|
|
|
Status: api.HealthWarning,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only return the check as healthy after a duration
|
|
|
|
trigger := time.After(500 * time.Millisecond)
|
2018-06-11 20:33:18 +00:00
|
|
|
ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) {
|
2017-07-04 19:24:27 +00:00
|
|
|
select {
|
|
|
|
case <-trigger:
|
2017-08-07 22:54:05 +00:00
|
|
|
return &consul.AllocRegistration{
|
|
|
|
Tasks: map[string]*consul.TaskRegistration{
|
|
|
|
task.Name: {
|
|
|
|
Services: map[string]*consul.ServiceRegistration{
|
|
|
|
"123": {
|
2017-08-12 21:42:53 +00:00
|
|
|
Service: &api.AgentService{Service: "foo"},
|
|
|
|
Checks: []*api.AgentCheck{checkHealthy},
|
2017-08-07 22:54:05 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}, nil
|
2017-07-04 19:24:27 +00:00
|
|
|
default:
|
2017-08-07 22:54:05 +00:00
|
|
|
return &consul.AllocRegistration{
|
|
|
|
Tasks: map[string]*consul.TaskRegistration{
|
|
|
|
task.Name: {
|
|
|
|
Services: map[string]*consul.ServiceRegistration{
|
|
|
|
"123": {
|
2017-08-12 21:42:53 +00:00
|
|
|
Service: &api.AgentService{Service: "foo"},
|
|
|
|
Checks: []*api.AgentCheck{checkUnhealthy},
|
2017-08-07 22:54:05 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}, nil
|
2017-07-04 19:24:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if !*last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status healthy; got unhealthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
if d := time.Now().Sub(start); d < 500*time.Millisecond {
|
|
|
|
t.Fatalf("didn't wait for second task group. Only took %v", d)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-12 21:42:53 +00:00
|
|
|
// Test that the watcher will mark the allocation as unhealthy with failing
|
|
|
|
// checks
|
|
|
|
func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
assert := assert.New(t)
|
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2017-08-12 21:42:53 +00:00
|
|
|
|
|
|
|
// Make the task fail
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-08-12 21:42:53 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 1 * time.Second
|
|
|
|
|
|
|
|
checkUnhealthy := &api.AgentCheck{
|
2017-09-29 16:58:48 +00:00
|
|
|
CheckID: uuid.Generate(),
|
2017-08-12 21:42:53 +00:00
|
|
|
Status: api.HealthWarning,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only return the check as healthy after a duration
|
2018-06-11 20:33:18 +00:00
|
|
|
ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) {
|
2017-08-12 21:42:53 +00:00
|
|
|
return &consul.AllocRegistration{
|
|
|
|
Tasks: map[string]*consul.TaskRegistration{
|
|
|
|
task.Name: {
|
|
|
|
Services: map[string]*consul.ServiceRegistration{
|
|
|
|
"123": {
|
|
|
|
Service: &api.AgentService{Service: "foo"},
|
|
|
|
Checks: []*api.AgentCheck{checkUnhealthy},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-08-12 21:42:53 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-08-12 21:42:53 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if *last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got healthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Assert that we have an event explaining why we are unhealthy.
|
|
|
|
assert.Len(ar.taskStates, 1)
|
|
|
|
state := ar.taskStates[task.Name]
|
|
|
|
assert.NotNil(state)
|
|
|
|
assert.NotEmpty(state.Events)
|
|
|
|
last := state.Events[len(state.Events)-1]
|
2017-11-03 14:34:30 +00:00
|
|
|
assert.Equal(allocHealthEventSource, last.Type)
|
2017-08-12 21:42:53 +00:00
|
|
|
assert.Contains(last.Message, "Services not healthy by deadline")
|
|
|
|
}
|
|
|
|
|
2017-07-04 19:24:27 +00:00
|
|
|
// Test that the watcher will mark the allocation as healthy.
|
|
|
|
func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2017-07-04 19:24:27 +00:00
|
|
|
|
|
|
|
// Make the task run healthy
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "30s"
|
|
|
|
|
|
|
|
// Make the alloc be part of a deployment
|
2017-09-29 16:58:48 +00:00
|
|
|
ar.alloc.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
|
|
|
|
ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
|
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if !*last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status healthy; got unhealthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Mimick an update to a new deployment id
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-04 19:24:27 +00:00
|
|
|
last.DeploymentStatus = nil
|
2017-09-29 16:58:48 +00:00
|
|
|
last.DeploymentID = uuid.Generate()
|
2017-07-04 19:24:27 +00:00
|
|
|
ar.Update(last)
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2017-07-04 19:24:27 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if !*last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status healthy; got unhealthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2018-03-28 00:22:04 +00:00
|
|
|
// Test that health is reported for services that got migrated; not just part
|
|
|
|
// of deployments.
|
|
|
|
func TestAllocRunner_DeploymentHealth_Healthy_Migration(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, true)
|
2018-03-28 00:22:04 +00:00
|
|
|
|
|
|
|
// Make the task run healthy
|
|
|
|
tg := ar.alloc.Job.TaskGroups[0]
|
|
|
|
task := tg.Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "30s"
|
|
|
|
|
|
|
|
// Shorten the default migration healthy time
|
|
|
|
tg.Migrate = structs.DefaultMigrateStrategy()
|
|
|
|
tg.Migrate.MinHealthyTime = 100 * time.Millisecond
|
|
|
|
tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates
|
|
|
|
|
|
|
|
// Ensure the alloc is *not* part of a deployment
|
|
|
|
ar.alloc.DeploymentID = ""
|
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2018-03-28 00:22:04 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2018-03-28 23:49:56 +00:00
|
|
|
if !last.DeploymentStatus.HasHealth() {
|
2018-03-28 00:22:04 +00:00
|
|
|
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
|
|
|
} else if !*last.DeploymentStatus.Healthy {
|
|
|
|
return false, fmt.Errorf("want deployment status healthy; got unhealthy")
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that health is *not* reported for batch jobs
|
|
|
|
func TestAllocRunner_DeploymentHealth_BatchDisabled(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
// Ensure the task fails and restarts
|
|
|
|
alloc := mock.BatchAlloc()
|
|
|
|
tg := alloc.Job.TaskGroups[0]
|
|
|
|
|
|
|
|
// This should not be possile as validation should prevent batch jobs
|
|
|
|
// from having a migration stanza!
|
|
|
|
tg.Migrate = structs.DefaultMigrateStrategy()
|
|
|
|
tg.Migrate.MinHealthyTime = 1 * time.Millisecond
|
|
|
|
tg.Migrate.HealthyDeadline = 2 * time.Millisecond
|
|
|
|
tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates
|
|
|
|
|
|
|
|
task := tg.Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "5s"
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunnerFromAlloc(t, alloc, false)
|
2018-03-28 00:22:04 +00:00
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
2018-03-29 00:16:22 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
last := upd.Last()
|
2018-03-28 00:22:04 +00:00
|
|
|
if last == nil {
|
2018-03-29 00:16:22 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
2018-03-28 00:22:04 +00:00
|
|
|
}
|
|
|
|
if last.DeploymentStatus != nil {
|
2018-03-29 00:16:22 +00:00
|
|
|
return false, fmt.Errorf("unexpected deployment health set: %v", last.DeploymentStatus.Healthy)
|
2018-03-28 00:22:04 +00:00
|
|
|
}
|
2018-03-29 00:16:22 +00:00
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2018-03-28 00:22:04 +00:00
|
|
|
}
|
|
|
|
|
2016-08-25 00:40:11 +00:00
|
|
|
// TestAllocRuner_RetryArtifact ensures that if one task in a task group is
|
2016-10-11 19:31:40 +00:00
|
|
|
// retrying fetching an artifact, other tasks in the group should be able
|
2016-08-25 21:42:50 +00:00
|
|
|
// to proceed.
|
2016-08-25 00:40:11 +00:00
|
|
|
func TestAllocRunner_RetryArtifact(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2016-08-25 00:40:11 +00:00
|
|
|
|
|
|
|
alloc := mock.Alloc()
|
2016-08-25 21:42:50 +00:00
|
|
|
alloc.Job.Type = structs.JobTypeBatch
|
2016-10-21 00:27:16 +00:00
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail
|
2016-08-26 00:25:51 +00:00
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
|
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second
|
2016-08-25 00:40:11 +00:00
|
|
|
|
2016-10-21 00:27:16 +00:00
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
|
|
|
|
2016-08-25 21:42:50 +00:00
|
|
|
// Create a new task with a bad artifact
|
2016-08-25 00:40:11 +00:00
|
|
|
badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
badtask.Name = "bad"
|
|
|
|
badtask.Artifacts = []*structs.TaskArtifact{
|
2017-07-23 02:04:36 +00:00
|
|
|
{GetterSource: "http://127.0.0.1:0/foo/bar/baz"},
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunnerFromAlloc(t, alloc, true)
|
2016-08-25 00:40:11 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
2016-08-25 23:05:19 +00:00
|
|
|
|
|
|
|
// web task should have completed successfully while bad task
|
2018-03-11 17:41:02 +00:00
|
|
|
// retries artifact fetching
|
2018-03-29 00:16:22 +00:00
|
|
|
webstate, ok := last.TaskStates["web"]
|
|
|
|
if !ok {
|
|
|
|
return false, fmt.Errorf("no task state for web")
|
|
|
|
}
|
2016-08-25 21:42:50 +00:00
|
|
|
if webstate.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
|
|
|
|
}
|
|
|
|
if !webstate.Successful() {
|
|
|
|
return false, fmt.Errorf("expected web to have exited successfully")
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
2016-08-25 23:05:19 +00:00
|
|
|
|
|
|
|
// bad task should have failed
|
|
|
|
badstate := last.TaskStates["bad"]
|
|
|
|
if badstate.State != structs.TaskStateDead {
|
2016-10-21 00:27:16 +00:00
|
|
|
return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State)
|
2016-08-25 23:05:19 +00:00
|
|
|
}
|
2016-10-21 00:27:16 +00:00
|
|
|
if !badstate.Failed {
|
|
|
|
return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events)
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2016-02-04 21:09:53 +00:00
|
|
|
func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2017-05-25 16:28:10 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
2016-02-04 21:09:53 +00:00
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2016-02-09 02:51:11 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusRunning {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Update the alloc to be terminal which should cause the alloc runner to
|
|
|
|
// stop the tasks and wait for a destroy.
|
|
|
|
update := ar.alloc.Copy()
|
|
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
ar.Update(update)
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
2017-05-03 18:15:30 +00:00
|
|
|
// Check the allocation state still exists
|
|
|
|
if err := ar.stateDB.View(func(tx *bolt.Tx) error {
|
|
|
|
if !allocationBucketExists(tx, ar.Alloc().ID) {
|
|
|
|
return fmt.Errorf("no bucket for alloc")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return false, fmt.Errorf("state destroyed")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory still exists
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
|
|
|
|
return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// Send the destroy signal and ensure the AllocRunner cleans up.
|
|
|
|
ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
2017-05-03 18:15:30 +00:00
|
|
|
if err := ar.stateDB.View(func(tx *bolt.Tx) error {
|
|
|
|
if allocationBucketExists(tx, ar.Alloc().ID) {
|
|
|
|
return fmt.Errorf("bucket for alloc exists")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return false, fmt.Errorf("state not destroyed")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2015-08-30 23:35:04 +00:00
|
|
|
func TestAllocRunner_Destroy(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2015-08-31 00:10:17 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2017-05-25 16:28:10 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
// Begin the tear down
|
|
|
|
go func() {
|
2016-09-15 18:37:20 +00:00
|
|
|
time.Sleep(1 * time.Second)
|
2015-08-31 00:10:17 +00:00
|
|
|
ar.Destroy()
|
|
|
|
}()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Check the status has changed.
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
2017-05-03 18:15:30 +00:00
|
|
|
if err := ar.stateDB.View(func(tx *bolt.Tx) error {
|
|
|
|
if allocationBucketExists(tx, ar.Alloc().ID) {
|
|
|
|
return fmt.Errorf("bucket for alloc exists")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
2017-05-23 22:57:35 +00:00
|
|
|
return false, fmt.Errorf("state not destroyed: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
|
|
|
|
2016-11-30 00:18:28 +00:00
|
|
|
if elapsed := time.Since(start); elapsed > 20*time.Second {
|
|
|
|
t.Fatalf("took too long to terminate: %s", elapsed)
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAllocRunner_Update(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
_, ar := TestAllocRunner(t, false)
|
2015-08-31 00:10:17 +00:00
|
|
|
|
2017-07-21 23:17:23 +00:00
|
|
|
// Deep copy the alloc to avoid races when updating
|
|
|
|
newAlloc := ar.Alloc().Copy()
|
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2017-05-25 16:28:10 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config["run_for"] = "10s"
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
// Update the alloc definition
|
2016-02-01 21:57:35 +00:00
|
|
|
newAlloc.Name = "FOO"
|
|
|
|
newAlloc.AllocModifyIndex++
|
2015-08-31 00:10:17 +00:00
|
|
|
ar.Update(newAlloc)
|
|
|
|
|
2016-02-01 21:57:35 +00:00
|
|
|
// Check the alloc runner stores the update allocation.
|
2015-08-31 00:10:17 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2016-02-01 21:57:35 +00:00
|
|
|
return ar.Alloc().Name == "FOO", nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2016-02-01 21:57:35 +00:00
|
|
|
t.Fatalf("err: %v %#v", err, ar.Alloc())
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestAllocRunner_SaveRestoreState(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2016-09-16 00:24:09 +00:00
|
|
|
alloc := mock.Alloc()
|
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
2015-08-31 00:10:17 +00:00
|
|
|
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunnerFromAlloc(t, alloc, false)
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
2017-05-31 18:38:41 +00:00
|
|
|
defer ar.Destroy()
|
2015-08-31 00:10:17 +00:00
|
|
|
|
|
|
|
// Snapshot state
|
2016-01-21 22:52:41 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2017-07-21 21:25:51 +00:00
|
|
|
ar.taskLock.RLock()
|
|
|
|
defer ar.taskLock.RUnlock()
|
2016-01-21 22:52:41 +00:00
|
|
|
return len(ar.tasks) == 1, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("task never started: %v", err)
|
|
|
|
})
|
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
err := ar.SaveState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a new alloc runner
|
2018-06-11 20:33:18 +00:00
|
|
|
l2 := testlog.WithPrefix(t, "----- ar2: ")
|
2017-08-11 17:27:21 +00:00
|
|
|
alloc2 := &structs.Allocation{ID: ar.alloc.ID}
|
2018-06-11 20:33:18 +00:00
|
|
|
prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "")
|
2017-05-03 18:15:30 +00:00
|
|
|
ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update,
|
2017-08-11 17:27:21 +00:00
|
|
|
alloc2, ar.vaultClient, ar.consulClient, prevAlloc)
|
2015-08-31 00:10:17 +00:00
|
|
|
err = ar2.RestoreState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
go ar2.Run()
|
|
|
|
|
2016-09-16 00:24:09 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if len(ar2.tasks) != 1 {
|
|
|
|
return false, fmt.Errorf("Incorrect number of tasks")
|
|
|
|
}
|
|
|
|
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-09-16 00:24:09 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return last.ClientStatus == structs.AllocClientStatusRunning, nil
|
|
|
|
}, func(err error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 20:43:32 +00:00
|
|
|
t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates["web"])
|
2016-09-16 00:24:09 +00:00
|
|
|
})
|
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
// Destroy and wait
|
|
|
|
ar2.Destroy()
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2016-09-16 00:24:09 +00:00
|
|
|
alloc := ar2.Alloc()
|
|
|
|
if alloc.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete)
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
2016-09-16 00:24:09 +00:00
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 20:43:32 +00:00
|
|
|
t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates)
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
|
|
|
|
2016-09-16 00:24:09 +00:00
|
|
|
if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second {
|
2015-08-31 00:10:17 +00:00
|
|
|
t.Fatalf("took too long to terminate")
|
|
|
|
}
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
|
|
|
ar.logger = testlog.WithPrefix(t, "ar1: ")
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
2016-09-05 02:09:08 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
2016-02-04 21:09:53 +00:00
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2016-09-05 02:09:08 +00:00
|
|
|
task.Config["run_for"] = "10s"
|
2016-02-04 21:09:53 +00:00
|
|
|
go ar.Run()
|
2017-05-31 18:38:41 +00:00
|
|
|
defer ar.Destroy()
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
2017-07-21 19:24:40 +00:00
|
|
|
|
2016-03-22 20:49:52 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusRunning {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Update the alloc to be terminal which should cause the alloc runner to
|
|
|
|
// stop the tasks and wait for a destroy.
|
|
|
|
update := ar.alloc.Copy()
|
|
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
ar.Update(update)
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2017-07-21 21:00:21 +00:00
|
|
|
return ar.Alloc().DesiredStatus == structs.AllocDesiredStatusStop, nil
|
2016-02-04 21:09:53 +00:00
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
err := ar.SaveState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2017-01-17 21:10:20 +00:00
|
|
|
// Ensure ar1 doesn't recreate the state file
|
2017-05-03 18:15:30 +00:00
|
|
|
ar.allocLock.Lock()
|
|
|
|
defer ar.allocLock.Unlock()
|
2017-01-17 21:10:20 +00:00
|
|
|
|
2016-02-04 21:09:53 +00:00
|
|
|
// Create a new alloc runner
|
2018-06-11 20:33:18 +00:00
|
|
|
l2 := testlog.WithPrefix(t, "ar2: ")
|
2017-08-11 17:27:21 +00:00
|
|
|
alloc2 := &structs.Allocation{ID: ar.alloc.ID}
|
2018-06-11 20:33:18 +00:00
|
|
|
prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "")
|
2017-08-11 17:27:21 +00:00
|
|
|
ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update,
|
|
|
|
alloc2, ar.vaultClient, ar.consulClient, prevAlloc)
|
2016-02-04 21:09:53 +00:00
|
|
|
err = ar2.RestoreState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
2017-05-31 18:38:41 +00:00
|
|
|
ar2.logger.Println("[TESTING] running second alloc runner")
|
2016-02-04 21:09:53 +00:00
|
|
|
go ar2.Run()
|
2017-05-31 18:38:41 +00:00
|
|
|
defer ar2.Destroy() // Just-in-case of failure before Destroy below
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
// Check the state still exists
|
2017-05-03 18:15:30 +00:00
|
|
|
if err := ar.stateDB.View(func(tx *bolt.Tx) error {
|
|
|
|
if !allocationBucketExists(tx, ar2.Alloc().ID) {
|
|
|
|
return fmt.Errorf("no bucket for alloc")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return false, fmt.Errorf("state destroyed")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory still exists
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
|
|
|
|
return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 20:43:32 +00:00
|
|
|
t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// Send the destroy signal and ensure the AllocRunner cleans up.
|
2016-03-22 20:49:52 +00:00
|
|
|
ar2.logger.Println("[TESTING] destroying second alloc runner")
|
2016-02-04 21:09:53 +00:00
|
|
|
ar2.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
2017-05-03 18:15:30 +00:00
|
|
|
if err := ar.stateDB.View(func(tx *bolt.Tx) error {
|
|
|
|
if allocationBucketExists(tx, ar2.Alloc().ID) {
|
|
|
|
return fmt.Errorf("bucket for alloc exists")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return false, fmt.Errorf("state not destroyed")
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
}
|
2016-03-22 20:49:52 +00:00
|
|
|
|
|
|
|
func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2016-03-22 20:49:52 +00:00
|
|
|
|
|
|
|
// Create two tasks in the task group
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2017-01-23 22:12:38 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.KillTimeout = 10 * time.Millisecond
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
2016-03-22 20:49:52 +00:00
|
|
|
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task2.Name = "task 2"
|
2017-01-23 22:12:38 +00:00
|
|
|
task2.Driver = "mock_driver"
|
|
|
|
task2.Config = map[string]interface{}{
|
|
|
|
"start_error": "fail task please",
|
|
|
|
}
|
2016-03-22 20:49:52 +00:00
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
|
2018-10-03 16:47:18 +00:00
|
|
|
ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy()
|
2016-03-22 20:49:52 +00:00
|
|
|
go ar.Run()
|
2017-05-31 18:38:41 +00:00
|
|
|
defer ar.Destroy()
|
2016-03-22 20:49:52 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-03-22 20:49:52 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusFailed {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Task One should be killed
|
|
|
|
state1 := last.TaskStates[task.Name]
|
|
|
|
if state1.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
|
|
|
|
}
|
2017-01-23 22:12:38 +00:00
|
|
|
if len(state1.Events) < 2 {
|
|
|
|
// At least have a received and destroyed
|
2016-09-14 22:04:25 +00:00
|
|
|
return false, fmt.Errorf("Unexpected number of events")
|
|
|
|
}
|
2017-01-23 22:12:38 +00:00
|
|
|
|
|
|
|
found := false
|
|
|
|
for _, e := range state1.Events {
|
|
|
|
if e.Type != structs.TaskSiblingFailed {
|
|
|
|
found = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !found {
|
|
|
|
return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed)
|
2016-03-22 20:49:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Task Two should be failed
|
|
|
|
state2 := last.TaskStates[task2.Name]
|
|
|
|
if state2.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
|
|
|
|
}
|
2016-10-21 00:27:16 +00:00
|
|
|
if !state2.Failed {
|
2016-03-22 20:49:52 +00:00
|
|
|
return false, fmt.Errorf("task2 should have failed")
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
2016-09-16 00:24:09 +00:00
|
|
|
|
2017-02-11 01:55:19 +00:00
|
|
|
func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2017-02-11 01:55:19 +00:00
|
|
|
|
|
|
|
// Create two tasks in the task group
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.KillTimeout = 10 * time.Millisecond
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task2.Name = "task 2"
|
|
|
|
task2.Driver = "mock_driver"
|
|
|
|
task2.Leader = true
|
|
|
|
task2.Config = map[string]interface{}{
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
|
2018-10-03 16:47:18 +00:00
|
|
|
ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy()
|
2017-02-11 01:55:19 +00:00
|
|
|
go ar.Run()
|
2017-07-21 19:24:40 +00:00
|
|
|
defer ar.Destroy()
|
2017-02-11 01:55:19 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2017-02-11 01:55:19 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Task One should be killed
|
|
|
|
state1 := last.TaskStates[task.Name]
|
|
|
|
if state1.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
|
|
|
|
}
|
2017-03-31 22:57:10 +00:00
|
|
|
if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() {
|
|
|
|
return false, fmt.Errorf("expected to have a start and finish time")
|
|
|
|
}
|
2017-02-11 01:55:19 +00:00
|
|
|
if len(state1.Events) < 2 {
|
|
|
|
// At least have a received and destroyed
|
|
|
|
return false, fmt.Errorf("Unexpected number of events")
|
|
|
|
}
|
|
|
|
|
|
|
|
found := false
|
|
|
|
for _, e := range state1.Events {
|
|
|
|
if e.Type != structs.TaskLeaderDead {
|
|
|
|
found = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !found {
|
|
|
|
return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Task Two should be dead
|
|
|
|
state2 := last.TaskStates[task2.Name]
|
|
|
|
if state2.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
|
|
|
|
}
|
2017-03-31 22:57:10 +00:00
|
|
|
if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() {
|
|
|
|
return false, fmt.Errorf("expected to have a start and finish time")
|
|
|
|
}
|
2017-02-11 01:55:19 +00:00
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2017-06-29 00:12:11 +00:00
|
|
|
// TestAllocRunner_TaskLeader_StopTG asserts that when stopping a task group
|
|
|
|
// with a leader the leader is stopped before other tasks.
|
|
|
|
func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunner(t, false)
|
2017-06-29 00:12:11 +00:00
|
|
|
|
|
|
|
// Create 3 tasks in the task group
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Name = "follower1"
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.KillTimeout = 10 * time.Millisecond
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task2.Name = "leader"
|
|
|
|
task2.Driver = "mock_driver"
|
|
|
|
task2.Leader = true
|
|
|
|
task2.KillTimeout = 10 * time.Millisecond
|
|
|
|
task2.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
|
|
|
|
task3 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task3.Name = "follower2"
|
|
|
|
task3.Driver = "mock_driver"
|
|
|
|
task3.KillTimeout = 10 * time.Millisecond
|
|
|
|
task3.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3)
|
2018-10-03 16:47:18 +00:00
|
|
|
ar.alloc.AllocatedResources.Tasks[task.Name] = ar.alloc.AllocatedResources.Tasks["web"].Copy()
|
|
|
|
ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy()
|
|
|
|
ar.alloc.AllocatedResources.Tasks[task3.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy()
|
2017-07-21 19:24:40 +00:00
|
|
|
defer ar.Destroy()
|
2017-06-29 00:12:11 +00:00
|
|
|
|
|
|
|
go ar.Run()
|
|
|
|
|
2017-07-21 19:24:40 +00:00
|
|
|
// Wait for tasks to start
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last = upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
if n := len(last.TaskStates); n != 3 {
|
|
|
|
return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
|
|
|
|
}
|
|
|
|
for name, state := range last.TaskStates {
|
|
|
|
if state.State != structs.TaskStateRunning {
|
|
|
|
return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2017-06-29 00:12:11 +00:00
|
|
|
|
2018-03-29 00:16:22 +00:00
|
|
|
// Reset updates
|
|
|
|
upd.mu.Lock()
|
|
|
|
upd.Allocs = upd.Allocs[:0]
|
|
|
|
upd.mu.Unlock()
|
|
|
|
|
2017-07-21 19:24:40 +00:00
|
|
|
// Stop alloc
|
|
|
|
update := ar.Alloc()
|
|
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
ar.Update(update)
|
|
|
|
|
|
|
|
// Wait for tasks to stop
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
|
|
|
if last == nil {
|
|
|
|
return false, fmt.Errorf("No updates")
|
2017-07-21 19:24:40 +00:00
|
|
|
}
|
|
|
|
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
|
2017-07-21 21:00:21 +00:00
|
|
|
return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s",
|
2017-07-21 19:24:40 +00:00
|
|
|
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
|
|
|
|
}
|
|
|
|
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
|
2017-07-21 21:00:21 +00:00
|
|
|
return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s",
|
2017-07-21 19:24:40 +00:00
|
|
|
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 21:00:21 +00:00
|
|
|
for name, state := range last.TaskStates {
|
|
|
|
t.Logf("%s: %s", name, state.State)
|
|
|
|
}
|
2017-07-21 19:24:40 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2017-06-29 00:12:11 +00:00
|
|
|
}
|
|
|
|
|
2017-11-03 22:42:15 +00:00
|
|
|
// TestAllocRunner_TaskLeader_StopRestoredTG asserts that when stopping a
|
|
|
|
// restored task group with a leader that failed before restoring the leader is
|
|
|
|
// not stopped as it does not exist.
|
|
|
|
// See https://github.com/hashicorp/nomad/issues/3420#issuecomment-341666932
|
|
|
|
func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) {
|
2018-06-12 19:16:20 +00:00
|
|
|
t.Skip("Skipping because the functionality being tested doesn't exist")
|
2017-11-03 22:42:15 +00:00
|
|
|
t.Parallel()
|
2018-06-11 20:33:18 +00:00
|
|
|
_, ar := TestAllocRunner(t, false)
|
2017-11-03 22:42:15 +00:00
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
// Create a leader and follower task in the task group
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Name = "follower1"
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.KillTimeout = 10 * time.Second
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task2.Name = "leader"
|
|
|
|
task2.Driver = "mock_driver"
|
|
|
|
task2.Leader = true
|
|
|
|
task2.KillTimeout = 10 * time.Millisecond
|
|
|
|
task2.Config = map[string]interface{}{
|
|
|
|
"run_for": "0s",
|
|
|
|
}
|
|
|
|
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
|
2018-10-03 16:47:18 +00:00
|
|
|
ar.alloc.AllocatedResources.Tasks[task.Name] = ar.alloc.AllocatedResources.Tasks["web"].Copy()
|
|
|
|
ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy()
|
2017-11-03 22:42:15 +00:00
|
|
|
|
|
|
|
// Mimic Nomad exiting before the leader stopping is able to stop other tasks.
|
2018-06-11 20:33:18 +00:00
|
|
|
ar.tasks = map[string]*taskrunner.TaskRunner{
|
|
|
|
"leader": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState,
|
2017-11-03 22:42:15 +00:00
|
|
|
ar.allocDir.NewTaskDir(task2.Name), ar.Alloc(), task2.Copy(),
|
|
|
|
ar.vaultClient, ar.consulClient),
|
2018-06-11 20:33:18 +00:00
|
|
|
"follower1": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState,
|
2017-11-03 22:42:15 +00:00
|
|
|
ar.allocDir.NewTaskDir(task.Name), ar.Alloc(), task.Copy(),
|
|
|
|
ar.vaultClient, ar.consulClient),
|
|
|
|
}
|
|
|
|
ar.taskStates = map[string]*structs.TaskState{
|
|
|
|
"leader": {State: structs.TaskStateDead},
|
|
|
|
"follower1": {State: structs.TaskStateRunning},
|
|
|
|
}
|
|
|
|
if err := ar.SaveState(); err != nil {
|
|
|
|
t.Fatalf("error saving state: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a new AllocRunner to test RestoreState and Run
|
|
|
|
upd2 := &MockAllocStateUpdater{}
|
|
|
|
ar2 := NewAllocRunner(ar.logger, ar.config, ar.stateDB, upd2.Update, ar.alloc,
|
|
|
|
ar.vaultClient, ar.consulClient, ar.prevAlloc)
|
|
|
|
defer ar2.Destroy()
|
|
|
|
|
|
|
|
if err := ar2.RestoreState(); err != nil {
|
|
|
|
t.Fatalf("error restoring state: %v", err)
|
|
|
|
}
|
|
|
|
go ar2.Run()
|
|
|
|
|
|
|
|
// Wait for tasks to be stopped because leader is dead
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-06-12 19:16:20 +00:00
|
|
|
alloc := ar2.Alloc()
|
|
|
|
for task, state := range alloc.TaskStates {
|
|
|
|
if state.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("Task %q should be dead: %v", task, state.State)
|
|
|
|
}
|
2017-11-03 22:42:15 +00:00
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Make sure it GCs properly
|
|
|
|
ar2.Destroy()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ar2.WaitCh():
|
|
|
|
// exited as expected
|
|
|
|
case <-time.After(10 * time.Second):
|
|
|
|
t.Fatalf("timed out waiting for AR to GC")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-11 17:27:21 +00:00
|
|
|
// TestAllocRunner_MoveAllocDir asserts that a file written to an alloc's
|
|
|
|
// local/ dir will be moved to a replacement alloc's local/ dir if sticky
|
|
|
|
// volumes is on.
|
2016-10-03 16:59:57 +00:00
|
|
|
func TestAllocRunner_MoveAllocDir(t *testing.T) {
|
2017-07-23 02:04:36 +00:00
|
|
|
t.Parallel()
|
2016-10-03 16:59:57 +00:00
|
|
|
// Create an alloc runner
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
2018-06-11 20:33:18 +00:00
|
|
|
upd, ar := TestAllocRunnerFromAlloc(t, alloc, false)
|
2016-10-03 16:59:57 +00:00
|
|
|
go ar.Run()
|
2017-05-31 18:38:41 +00:00
|
|
|
defer ar.Destroy()
|
2016-10-03 16:59:57 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-10-03 16:59:57 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Write some data in data dir and task dir of the alloc
|
2016-12-03 01:04:07 +00:00
|
|
|
dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm)
|
2016-12-03 01:04:07 +00:00
|
|
|
taskDir := ar.allocDir.TaskDirs[task.Name]
|
|
|
|
taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm)
|
|
|
|
|
|
|
|
// Create another alloc runner
|
2017-08-11 17:27:21 +00:00
|
|
|
alloc2 := mock.Alloc()
|
|
|
|
alloc2.PreviousAllocation = ar.allocID
|
|
|
|
alloc2.Job.TaskGroups[0].EphemeralDisk.Sticky = true
|
|
|
|
task = alloc2.Job.TaskGroups[0].Tasks[0]
|
2016-10-03 16:59:57 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
2018-06-11 20:33:18 +00:00
|
|
|
upd2, ar2 := TestAllocRunnerFromAlloc(t, alloc2, false)
|
2017-08-11 17:27:21 +00:00
|
|
|
|
|
|
|
// Set prevAlloc like Client does
|
2018-06-11 20:33:18 +00:00
|
|
|
ar2.prevAlloc = NewAllocWatcher(alloc2, ar, nil, ar2.config, ar2.logger, "")
|
2017-08-11 17:27:21 +00:00
|
|
|
|
|
|
|
go ar2.Run()
|
|
|
|
defer ar2.Destroy()
|
2016-10-03 16:59:57 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2018-03-29 00:16:22 +00:00
|
|
|
last := upd2.Last()
|
2017-07-21 19:24:40 +00:00
|
|
|
if last == nil {
|
2016-10-03 16:59:57 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
2017-08-11 17:27:21 +00:00
|
|
|
// Ensure that data from ar was moved to ar2
|
|
|
|
taskDir = ar2.allocDir.TaskDirs[task.Name]
|
2016-12-03 01:04:07 +00:00
|
|
|
taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil {
|
|
|
|
t.Fatalf("file %v not found", taskLocalFile)
|
|
|
|
}
|
|
|
|
|
2017-08-11 17:27:21 +00:00
|
|
|
dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
if fileInfo, _ := os.Stat(dataFile); fileInfo == nil {
|
|
|
|
t.Fatalf("file %v not found", dataFile)
|
|
|
|
}
|
|
|
|
}
|