2015-08-30 23:35:04 +00:00
|
|
|
package client
|
|
|
|
|
|
|
|
import (
|
2016-02-04 03:58:39 +00:00
|
|
|
"fmt"
|
2016-09-16 00:24:09 +00:00
|
|
|
"io/ioutil"
|
2015-09-24 21:29:53 +00:00
|
|
|
"os"
|
2016-09-16 00:24:09 +00:00
|
|
|
"path/filepath"
|
2015-08-30 23:35:04 +00:00
|
|
|
"testing"
|
2017-01-05 19:51:03 +00:00
|
|
|
"text/template"
|
2015-08-31 00:10:17 +00:00
|
|
|
"time"
|
2015-08-30 23:35:04 +00:00
|
|
|
|
2017-01-05 19:51:03 +00:00
|
|
|
"github.com/hashicorp/go-multierror"
|
2015-08-30 23:35:04 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2015-08-31 00:10:17 +00:00
|
|
|
"github.com/hashicorp/nomad/testutil"
|
2015-09-23 00:10:03 +00:00
|
|
|
|
2016-06-01 08:22:39 +00:00
|
|
|
"github.com/hashicorp/nomad/client/config"
|
2015-09-23 01:48:42 +00:00
|
|
|
ctestutil "github.com/hashicorp/nomad/client/testutil"
|
2016-09-14 22:04:25 +00:00
|
|
|
"github.com/hashicorp/nomad/client/vaultclient"
|
2015-08-30 23:35:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type MockAllocStateUpdater struct {
|
|
|
|
Count int
|
|
|
|
Allocs []*structs.Allocation
|
|
|
|
}
|
|
|
|
|
2016-02-22 03:20:50 +00:00
|
|
|
func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
|
2015-08-30 23:35:04 +00:00
|
|
|
m.Count += 1
|
|
|
|
m.Allocs = append(m.Allocs, alloc)
|
|
|
|
}
|
|
|
|
|
2016-09-02 00:23:15 +00:00
|
|
|
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
2015-08-30 23:35:04 +00:00
|
|
|
logger := testLogger()
|
2016-06-01 08:22:39 +00:00
|
|
|
conf := config.DefaultConfig()
|
2015-09-24 21:29:53 +00:00
|
|
|
conf.StateDir = os.TempDir()
|
|
|
|
conf.AllocDir = os.TempDir()
|
2015-08-30 23:35:04 +00:00
|
|
|
upd := &MockAllocStateUpdater{}
|
2015-11-14 06:07:13 +00:00
|
|
|
if !restarts {
|
2016-02-02 23:08:07 +00:00
|
|
|
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
|
2016-02-02 23:35:25 +00:00
|
|
|
alloc.Job.Type = structs.JobTypeBatch
|
2015-11-14 06:07:13 +00:00
|
|
|
}
|
2016-09-16 00:24:09 +00:00
|
|
|
vclient := vaultclient.NewMockVaultClient()
|
2016-09-14 22:04:25 +00:00
|
|
|
ar := NewAllocRunner(logger, conf, upd.Update, alloc, vclient)
|
2015-08-30 23:35:04 +00:00
|
|
|
return upd, ar
|
|
|
|
}
|
|
|
|
|
2016-09-02 00:23:15 +00:00
|
|
|
func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
|
|
|
return testAllocRunnerFromAlloc(mock.Alloc(), restarts)
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
|
|
|
|
2015-08-30 23:35:04 +00:00
|
|
|
func TestAllocRunner_SimpleRun(t *testing.T) {
|
2015-09-23 01:48:42 +00:00
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunner(false)
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
2015-08-30 23:35:04 +00:00
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
2016-02-04 03:58:39 +00:00
|
|
|
return false, fmt.Errorf("No updates")
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 03:58:39 +00:00
|
|
|
}
|
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
|
|
|
|
2016-08-25 00:40:11 +00:00
|
|
|
// TestAllocRuner_RetryArtifact ensures that if one task in a task group is
|
2016-10-11 19:31:40 +00:00
|
|
|
// retrying fetching an artifact, other tasks in the group should be able
|
2016-08-25 21:42:50 +00:00
|
|
|
// to proceed.
|
2016-08-25 00:40:11 +00:00
|
|
|
func TestAllocRunner_RetryArtifact(t *testing.T) {
|
|
|
|
ctestutil.ExecCompatible(t)
|
|
|
|
|
|
|
|
alloc := mock.Alloc()
|
2016-08-25 21:42:50 +00:00
|
|
|
alloc.Job.Type = structs.JobTypeBatch
|
2016-10-21 00:27:16 +00:00
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail
|
2016-08-26 00:25:51 +00:00
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
|
|
|
|
alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second
|
2016-08-25 00:40:11 +00:00
|
|
|
|
2016-10-21 00:27:16 +00:00
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
|
|
|
|
2016-08-25 21:42:50 +00:00
|
|
|
// Create a new task with a bad artifact
|
2016-08-25 00:40:11 +00:00
|
|
|
badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
badtask.Name = "bad"
|
|
|
|
badtask.Artifacts = []*structs.TaskArtifact{
|
|
|
|
{GetterSource: "http://127.1.1.111:12315/foo/bar/baz"},
|
|
|
|
}
|
|
|
|
|
|
|
|
alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunnerFromAlloc(alloc, true)
|
2016-08-25 00:40:11 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count < 6 {
|
|
|
|
return false, fmt.Errorf("Not enough updates")
|
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-08-25 23:05:19 +00:00
|
|
|
|
|
|
|
// web task should have completed successfully while bad task
|
|
|
|
// retries artififact fetching
|
2016-08-25 21:42:50 +00:00
|
|
|
webstate := last.TaskStates["web"]
|
|
|
|
if webstate.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
|
|
|
|
}
|
|
|
|
if !webstate.Successful() {
|
|
|
|
return false, fmt.Errorf("expected web to have exited successfully")
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
2016-08-25 23:05:19 +00:00
|
|
|
|
|
|
|
// bad task should have failed
|
|
|
|
badstate := last.TaskStates["bad"]
|
|
|
|
if badstate.State != structs.TaskStateDead {
|
2016-10-21 00:27:16 +00:00
|
|
|
return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State)
|
2016-08-25 23:05:19 +00:00
|
|
|
}
|
2016-10-21 00:27:16 +00:00
|
|
|
if !badstate.Failed {
|
|
|
|
return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events)
|
2016-08-25 00:40:11 +00:00
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2016-02-04 21:09:53 +00:00
|
|
|
func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunner(false)
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Config["command"] = "/bin/sleep"
|
|
|
|
task.Config["args"] = []string{"10"}
|
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-02-09 02:51:11 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusRunning {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Update the alloc to be terminal which should cause the alloc runner to
|
|
|
|
// stop the tasks and wait for a destroy.
|
|
|
|
update := ar.alloc.Copy()
|
|
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
ar.Update(update)
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state still exists
|
|
|
|
if _, err := os.Stat(ar.stateFilePath()); err != nil {
|
|
|
|
return false, fmt.Errorf("state file destroyed: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory still exists
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
|
|
|
|
return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// Send the destroy signal and ensure the AllocRunner cleans up.
|
|
|
|
ar.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
|
|
|
if _, err := os.Stat(ar.stateFilePath()); err == nil {
|
|
|
|
return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
|
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2015-08-30 23:35:04 +00:00
|
|
|
func TestAllocRunner_Destroy(t *testing.T) {
|
2015-09-23 01:48:42 +00:00
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunner(false)
|
2015-08-31 00:10:17 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Config["command"] = "/bin/sleep"
|
2015-11-18 23:16:42 +00:00
|
|
|
task.Config["args"] = []string{"10"}
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
// Begin the tear down
|
|
|
|
go func() {
|
2016-09-15 18:37:20 +00:00
|
|
|
time.Sleep(1 * time.Second)
|
2015-08-31 00:10:17 +00:00
|
|
|
ar.Destroy()
|
|
|
|
}()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Check the status has changed.
|
2015-08-31 00:10:17 +00:00
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
|
|
|
if _, err := os.Stat(ar.stateFilePath()); err == nil {
|
|
|
|
return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
|
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
|
|
|
|
2016-11-30 00:18:28 +00:00
|
|
|
if elapsed := time.Since(start); elapsed > 20*time.Second {
|
|
|
|
t.Fatalf("took too long to terminate: %s", elapsed)
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAllocRunner_Update(t *testing.T) {
|
2015-09-23 01:48:42 +00:00
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
_, ar := testAllocRunner(false)
|
2015-08-31 00:10:17 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Config["command"] = "/bin/sleep"
|
2015-11-18 23:16:42 +00:00
|
|
|
task.Config["args"] = []string{"10"}
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
// Update the alloc definition
|
|
|
|
newAlloc := new(structs.Allocation)
|
|
|
|
*newAlloc = *ar.alloc
|
2016-02-01 21:57:35 +00:00
|
|
|
newAlloc.Name = "FOO"
|
|
|
|
newAlloc.AllocModifyIndex++
|
2015-08-31 00:10:17 +00:00
|
|
|
ar.Update(newAlloc)
|
|
|
|
|
2016-02-01 21:57:35 +00:00
|
|
|
// Check the alloc runner stores the update allocation.
|
2015-08-31 00:10:17 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2016-02-01 21:57:35 +00:00
|
|
|
return ar.Alloc().Name == "FOO", nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2016-02-01 21:57:35 +00:00
|
|
|
t.Fatalf("err: %v %#v", err, ar.Alloc())
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestAllocRunner_SaveRestoreState(t *testing.T) {
|
2016-09-16 00:24:09 +00:00
|
|
|
alloc := mock.Alloc()
|
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
2015-08-31 00:10:17 +00:00
|
|
|
|
2016-09-16 00:24:09 +00:00
|
|
|
upd, ar := testAllocRunnerFromAlloc(alloc, false)
|
2015-08-31 00:10:17 +00:00
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
// Snapshot state
|
2016-01-21 22:52:41 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
return len(ar.tasks) == 1, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("task never started: %v", err)
|
|
|
|
})
|
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
err := ar.SaveState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a new alloc runner
|
|
|
|
ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update,
|
2016-09-14 22:04:25 +00:00
|
|
|
&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient)
|
2015-08-31 00:10:17 +00:00
|
|
|
err = ar2.RestoreState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
go ar2.Run()
|
|
|
|
|
2016-09-16 00:24:09 +00:00
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if len(ar2.tasks) != 1 {
|
|
|
|
return false, fmt.Errorf("Incorrect number of tasks")
|
|
|
|
}
|
|
|
|
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
|
|
|
return last.ClientStatus == structs.AllocClientStatusRunning, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
|
|
|
|
})
|
|
|
|
|
2015-08-31 00:10:17 +00:00
|
|
|
// Destroy and wait
|
|
|
|
ar2.Destroy()
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
2016-09-16 00:24:09 +00:00
|
|
|
alloc := ar2.Alloc()
|
|
|
|
if alloc.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete)
|
2015-08-31 00:10:17 +00:00
|
|
|
}
|
2016-09-16 00:24:09 +00:00
|
|
|
return true, nil
|
2015-08-31 00:10:17 +00:00
|
|
|
}, func(err error) {
|
2015-11-14 06:07:13 +00:00
|
|
|
t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
|
2015-08-31 00:10:17 +00:00
|
|
|
})
|
|
|
|
|
2016-09-16 00:24:09 +00:00
|
|
|
if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second {
|
2015-08-31 00:10:17 +00:00
|
|
|
t.Fatalf("took too long to terminate")
|
|
|
|
}
|
2015-08-30 23:35:04 +00:00
|
|
|
}
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
|
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunner(false)
|
2016-03-22 20:49:52 +00:00
|
|
|
ar.logger = prefixedTestLogger("ar1: ")
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
// Ensure task takes some time
|
2016-09-05 02:09:08 +00:00
|
|
|
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
2016-02-04 21:09:53 +00:00
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
2016-09-05 02:09:08 +00:00
|
|
|
task.Config["run_for"] = "10s"
|
2016-02-04 21:09:53 +00:00
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-22 20:49:52 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusRunning {
|
2016-02-04 21:09:53 +00:00
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Update the alloc to be terminal which should cause the alloc runner to
|
|
|
|
// stop the tasks and wait for a destroy.
|
|
|
|
update := ar.alloc.Copy()
|
|
|
|
update.DesiredStatus = structs.AllocDesiredStatusStop
|
|
|
|
ar.Update(update)
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
return ar.alloc.DesiredStatus == structs.AllocDesiredStatusStop, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
err := ar.SaveState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
|
2017-01-17 21:10:20 +00:00
|
|
|
// Ensure ar1 doesn't recreate the state file
|
|
|
|
ar.persistLock.Lock()
|
|
|
|
defer ar.persistLock.Unlock()
|
|
|
|
|
2016-02-04 22:19:27 +00:00
|
|
|
// Ensure both alloc runners don't destroy
|
|
|
|
ar.destroy = true
|
|
|
|
|
2016-02-04 21:09:53 +00:00
|
|
|
// Create a new alloc runner
|
|
|
|
ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update,
|
2016-09-14 22:04:25 +00:00
|
|
|
&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient)
|
2016-03-22 20:49:52 +00:00
|
|
|
ar2.logger = prefixedTestLogger("ar2: ")
|
2016-02-04 21:09:53 +00:00
|
|
|
err = ar2.RestoreState()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
}
|
|
|
|
go ar2.Run()
|
2016-03-22 20:49:52 +00:00
|
|
|
ar2.logger.Println("[TESTING] starting second alloc runner")
|
2016-02-04 21:09:53 +00:00
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
// Check the state still exists
|
|
|
|
if _, err := os.Stat(ar.stateFilePath()); err != nil {
|
|
|
|
return false, fmt.Errorf("state file destroyed: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory still exists
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
|
|
|
|
return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Send the destroy signal and ensure the AllocRunner cleans up.
|
2016-03-22 20:49:52 +00:00
|
|
|
ar2.logger.Println("[TESTING] destroying second alloc runner")
|
2016-02-04 21:09:53 +00:00
|
|
|
ar2.Destroy()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the status has changed.
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
2016-03-24 01:08:19 +00:00
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
2016-02-04 21:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the state was cleaned
|
|
|
|
if _, err := os.Stat(ar.stateFilePath()); err == nil {
|
|
|
|
return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
|
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the alloc directory was cleaned
|
2016-12-03 01:04:07 +00:00
|
|
|
if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
|
|
|
|
return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
|
2016-02-04 21:09:53 +00:00
|
|
|
} else if !os.IsNotExist(err) {
|
|
|
|
return false, fmt.Errorf("stat err: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
2016-02-04 22:19:27 +00:00
|
|
|
t.Fatalf("err: %v", err)
|
2016-02-04 21:09:53 +00:00
|
|
|
})
|
|
|
|
}
|
2016-03-22 20:49:52 +00:00
|
|
|
|
2017-01-05 19:51:03 +00:00
|
|
|
// Ensure pre-#2132 state files containing the Context struct are properly
|
|
|
|
// migrated to the new format.
|
|
|
|
//
|
|
|
|
// Old Context State:
|
|
|
|
//
|
|
|
|
// "Context": {
|
|
|
|
// "AllocDir": {
|
|
|
|
// "AllocDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb",
|
|
|
|
// "SharedDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/alloc",
|
|
|
|
// "TaskDirs": {
|
|
|
|
// "echo1": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/echo1"
|
|
|
|
// }
|
|
|
|
// },
|
|
|
|
// "AllocID": "2a54fcff-fc44-8d4f-e025-53c48e9cbbbb"
|
|
|
|
// }
|
|
|
|
func TestAllocRunner_RestoreOldState(t *testing.T) {
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "10s",
|
|
|
|
}
|
|
|
|
|
|
|
|
logger := testLogger()
|
|
|
|
conf := config.DefaultConfig()
|
|
|
|
conf.StateDir = os.TempDir()
|
|
|
|
conf.AllocDir = os.TempDir()
|
|
|
|
|
|
|
|
if err := os.MkdirAll(filepath.Join(conf.StateDir, "alloc", alloc.ID), 0777); err != nil {
|
|
|
|
t.Fatalf("error creating state dir: %v", err)
|
|
|
|
}
|
|
|
|
statePath := filepath.Join(conf.StateDir, "alloc", alloc.ID, "state.json")
|
|
|
|
w, err := os.Create(statePath)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("error creating state file: %v", err)
|
|
|
|
}
|
|
|
|
tmplctx := &struct {
|
|
|
|
AllocID string
|
|
|
|
AllocDir string
|
|
|
|
}{alloc.ID, conf.AllocDir}
|
|
|
|
err = template.Must(template.New("test_state").Parse(`{
|
|
|
|
"Version": "0.5.1",
|
|
|
|
"Alloc": {
|
|
|
|
"ID": "{{ .AllocID }}",
|
|
|
|
"Name": "example",
|
|
|
|
"JobID": "example",
|
|
|
|
"Job": {
|
|
|
|
"ID": "example",
|
|
|
|
"Name": "example",
|
|
|
|
"Type": "batch",
|
|
|
|
"TaskGroups": [
|
|
|
|
{
|
|
|
|
"Name": "example",
|
|
|
|
"Tasks": [
|
|
|
|
{
|
|
|
|
"Name": "example",
|
|
|
|
"Driver": "mock",
|
|
|
|
"Config": {
|
|
|
|
"exit_code": "0",
|
|
|
|
"run_for": "10s"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"TaskGroup": "example",
|
|
|
|
"DesiredStatus": "run",
|
|
|
|
"ClientStatus": "running",
|
|
|
|
"TaskStates": {
|
|
|
|
"example": {
|
|
|
|
"State": "running",
|
|
|
|
"Failed": false,
|
|
|
|
"Events": []
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"Context": {
|
|
|
|
"AllocDir": {
|
|
|
|
"AllocDir": "{{ .AllocDir }}/{{ .AllocID }}",
|
|
|
|
"SharedDir": "{{ .AllocDir }}/{{ .AllocID }}/alloc",
|
|
|
|
"TaskDirs": {
|
|
|
|
"example": "{{ .AllocDir }}/{{ .AllocID }}/example"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"AllocID": "{{ .AllocID }}"
|
|
|
|
}
|
|
|
|
}`)).Execute(w, tmplctx)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("error writing state file: %v", err)
|
|
|
|
}
|
|
|
|
w.Close()
|
|
|
|
|
|
|
|
upd := &MockAllocStateUpdater{}
|
|
|
|
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
|
|
|
|
alloc.Job.Type = structs.JobTypeBatch
|
|
|
|
vclient := vaultclient.NewMockVaultClient()
|
|
|
|
ar := NewAllocRunner(logger, conf, upd.Update, alloc, vclient)
|
|
|
|
defer ar.Destroy()
|
|
|
|
|
|
|
|
// RestoreState should fail on the task state since we only test the
|
|
|
|
// alloc state restoring.
|
|
|
|
err = ar.RestoreState()
|
|
|
|
if err == nil {
|
|
|
|
t.Fatal("expected error restoring Task state")
|
|
|
|
}
|
|
|
|
merr, ok := err.(*multierror.Error)
|
|
|
|
if !ok {
|
|
|
|
t.Fatalf("expected RestoreState to return a multierror but found: %T -> %v", err, err)
|
|
|
|
}
|
|
|
|
if len(merr.Errors) != 1 {
|
|
|
|
t.Fatalf("expected exactly 1 error from RestoreState but found: %d: %v", len(merr.Errors), err)
|
|
|
|
}
|
|
|
|
if expected := "task runner snapshot includes nil Task"; merr.Errors[0].Error() != expected {
|
|
|
|
t.Fatalf("expected %q but got: %q", merr.Errors[0].Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := ar.SaveState(); err != nil {
|
|
|
|
t.Fatalf("error saving new state: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-22 20:49:52 +00:00
|
|
|
func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
|
|
|
|
ctestutil.ExecCompatible(t)
|
2016-09-02 00:23:15 +00:00
|
|
|
upd, ar := testAllocRunner(false)
|
2016-03-22 20:49:52 +00:00
|
|
|
|
|
|
|
// Create two tasks in the task group
|
|
|
|
task := ar.alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Config["command"] = "/bin/sleep"
|
|
|
|
task.Config["args"] = []string{"1000"}
|
|
|
|
|
|
|
|
task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
|
|
|
task2.Name = "task 2"
|
|
|
|
task2.Config = map[string]interface{}{"command": "invalidBinaryToFail"}
|
|
|
|
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
|
|
|
|
ar.alloc.TaskResources[task2.Name] = task2.Resources
|
|
|
|
//t.Logf("%#v", ar.alloc.Job.TaskGroups[0])
|
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusFailed {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Task One should be killed
|
|
|
|
state1 := last.TaskStates[task.Name]
|
|
|
|
if state1.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
|
|
|
|
}
|
2016-09-14 22:04:25 +00:00
|
|
|
if len(state1.Events) < 3 {
|
|
|
|
return false, fmt.Errorf("Unexpected number of events")
|
|
|
|
}
|
|
|
|
if lastE := state1.Events[len(state1.Events)-3]; lastE.Type != structs.TaskSiblingFailed {
|
2016-08-25 18:11:10 +00:00
|
|
|
return false, fmt.Errorf("got last event %v; want %v", lastE.Type, structs.TaskSiblingFailed)
|
2016-03-22 20:49:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Task Two should be failed
|
|
|
|
state2 := last.TaskStates[task2.Name]
|
|
|
|
if state2.State != structs.TaskStateDead {
|
|
|
|
return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
|
|
|
|
}
|
2016-10-21 00:27:16 +00:00
|
|
|
if !state2.Failed {
|
2016-03-22 20:49:52 +00:00
|
|
|
return false, fmt.Errorf("task2 should have failed")
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
}
|
2016-09-16 00:24:09 +00:00
|
|
|
|
2016-10-03 16:59:57 +00:00
|
|
|
func TestAllocRunner_MoveAllocDir(t *testing.T) {
|
|
|
|
// Create an alloc runner
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
|
|
|
upd, ar := testAllocRunnerFromAlloc(alloc, false)
|
|
|
|
go ar.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd.Count == 0 {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
last := upd.Allocs[upd.Count-1]
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Write some data in data dir and task dir of the alloc
|
2016-12-03 01:04:07 +00:00
|
|
|
dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm)
|
2016-12-03 01:04:07 +00:00
|
|
|
taskDir := ar.allocDir.TaskDirs[task.Name]
|
|
|
|
taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm)
|
|
|
|
|
|
|
|
// Create another alloc runner
|
|
|
|
alloc1 := mock.Alloc()
|
2016-10-26 22:17:57 +00:00
|
|
|
task = alloc1.Job.TaskGroups[0].Tasks[0]
|
2016-10-03 16:59:57 +00:00
|
|
|
task.Driver = "mock_driver"
|
|
|
|
task.Config = map[string]interface{}{
|
|
|
|
"run_for": "1s",
|
|
|
|
}
|
|
|
|
upd1, ar1 := testAllocRunnerFromAlloc(alloc1, false)
|
2016-12-03 01:04:07 +00:00
|
|
|
ar1.SetPreviousAllocDir(ar.allocDir)
|
2016-10-03 16:59:57 +00:00
|
|
|
go ar1.Run()
|
|
|
|
|
|
|
|
testutil.WaitForResult(func() (bool, error) {
|
|
|
|
if upd1.Count == 0 {
|
|
|
|
return false, fmt.Errorf("No updates")
|
|
|
|
}
|
|
|
|
last := upd1.Allocs[upd1.Count-1]
|
|
|
|
if last.ClientStatus != structs.AllocClientStatusComplete {
|
|
|
|
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}, func(err error) {
|
|
|
|
t.Fatalf("err: %v", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Ensure that data from ar1 was moved to ar
|
2016-12-03 01:04:07 +00:00
|
|
|
taskDir = ar1.allocDir.TaskDirs[task.Name]
|
|
|
|
taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil {
|
|
|
|
t.Fatalf("file %v not found", taskLocalFile)
|
|
|
|
}
|
|
|
|
|
2016-12-03 01:04:07 +00:00
|
|
|
dataFile = filepath.Join(ar1.allocDir.SharedDir, "data", "data_file")
|
2016-10-03 16:59:57 +00:00
|
|
|
if fileInfo, _ := os.Stat(dataFile); fileInfo == nil {
|
|
|
|
t.Fatalf("file %v not found", dataFile)
|
|
|
|
}
|
|
|
|
}
|