Merge pull request #1653 from hashicorp/b-fix-artifact-retry
Don't fail other tasks when retrying artifact get
This commit is contained in:
commit
d31f373a5b
|
@ -25,22 +25,24 @@ func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
|
|||
m.Allocs = append(m.Allocs, alloc)
|
||||
}
|
||||
|
||||
func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
||||
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
||||
logger := testLogger()
|
||||
conf := config.DefaultConfig()
|
||||
conf.StateDir = os.TempDir()
|
||||
conf.AllocDir = os.TempDir()
|
||||
upd := &MockAllocStateUpdater{}
|
||||
alloc := mock.Alloc()
|
||||
if !restarts {
|
||||
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
|
||||
alloc.Job.Type = structs.JobTypeBatch
|
||||
}
|
||||
|
||||
ar := NewAllocRunner(logger, conf, upd.Update, alloc)
|
||||
return upd, ar
|
||||
}
|
||||
|
||||
func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
||||
return testAllocRunnerFromAlloc(mock.Alloc(), restarts)
|
||||
}
|
||||
|
||||
func TestAllocRunner_SimpleRun(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, ar := testAllocRunner(false)
|
||||
|
@ -61,6 +63,59 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
// TestAllocRuner_RetryArtifact ensures that if one task in a task group is
|
||||
// retrying fetching an artifact, other tasks in the the group should be able
|
||||
// to proceed.
|
||||
func TestAllocRunner_RetryArtifact(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job.Type = structs.JobTypeBatch
|
||||
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
|
||||
alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second
|
||||
|
||||
// Create a new task with a bad artifact
|
||||
badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
|
||||
badtask.Name = "bad"
|
||||
badtask.Artifacts = []*structs.TaskArtifact{
|
||||
{GetterSource: "http://127.1.1.111:12315/foo/bar/baz"},
|
||||
}
|
||||
|
||||
alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
|
||||
upd, ar := testAllocRunnerFromAlloc(alloc, true)
|
||||
go ar.Run()
|
||||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count < 6 {
|
||||
return false, fmt.Errorf("Not enough updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
|
||||
// web task should have completed successfully while bad task
|
||||
// retries artififact fetching
|
||||
webstate := last.TaskStates["web"]
|
||||
if webstate.State != structs.TaskStateDead {
|
||||
return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
|
||||
}
|
||||
if !webstate.Successful() {
|
||||
return false, fmt.Errorf("expected web to have exited successfully")
|
||||
}
|
||||
|
||||
// bad task should have failed
|
||||
badstate := last.TaskStates["bad"]
|
||||
if badstate.State != structs.TaskStateDead {
|
||||
return false, fmt.Errorf("expected bad to be dead but found %q", last.TaskStates["web"].State)
|
||||
}
|
||||
if !badstate.Failed() {
|
||||
return false, fmt.Errorf("expected bad to have failed")
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
||||
ctestutil.ExecCompatible(t)
|
||||
upd, ar := testAllocRunner(false)
|
||||
|
|
|
@ -318,7 +318,7 @@ func (r *TaskRunner) run() {
|
|||
|
||||
for _, artifact := range r.task.Artifacts {
|
||||
if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil {
|
||||
r.setState(structs.TaskStateDead,
|
||||
r.setState(structs.TaskStatePending,
|
||||
structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
|
||||
r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true))
|
||||
goto RESTART
|
||||
|
|
Loading…
Reference in a new issue