Fix TestAllocRunner_TaskLeader_StopTG
Also make alloc runner tests less racy. Basically every alloc runner test used to have races with `upd.{Count,Allocs}`
This commit is contained in:
parent
0fabb5902a
commit
6e80a8ee39
|
@ -6,6 +6,7 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"text/template"
|
||||
"time"
|
||||
|
@ -16,6 +17,7 @@ import (
|
|||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/kr/pretty"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
ctestutil "github.com/hashicorp/nomad/client/testutil"
|
||||
|
@ -23,13 +25,26 @@ import (
|
|||
)
|
||||
|
||||
type MockAllocStateUpdater struct {
|
||||
Count int
|
||||
Allocs []*structs.Allocation
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// Update fulfills the TaskStateUpdater interface
|
||||
func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
|
||||
m.Count += 1
|
||||
m.mu.Lock()
|
||||
m.Allocs = append(m.Allocs, alloc)
|
||||
m.mu.Unlock()
|
||||
}
|
||||
|
||||
// Last returns the total number of updates and the last alloc (or nil)
|
||||
func (m *MockAllocStateUpdater) Last() (int, *structs.Allocation) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
n := len(m.Allocs)
|
||||
if n == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
return n, m.Allocs[n-1].Copy()
|
||||
}
|
||||
|
||||
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
|
||||
|
@ -61,10 +76,10 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -96,10 +111,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if *last.DeploymentStatus.Healthy {
|
||||
|
@ -136,10 +151,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if *last.DeploymentStatus.Healthy {
|
||||
|
@ -181,10 +196,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if !*last.DeploymentStatus.Healthy {
|
||||
|
@ -249,10 +264,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if !*last.DeploymentStatus.Healthy {
|
||||
|
@ -291,10 +306,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if !*last.DeploymentStatus.Healthy {
|
||||
|
@ -306,17 +321,16 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
|
|||
})
|
||||
|
||||
// Mimick an update to a new deployment id
|
||||
oldCount := upd.Count
|
||||
last := upd.Allocs[oldCount-1].Copy()
|
||||
oldCount, last := upd.Last()
|
||||
last.DeploymentStatus = nil
|
||||
last.DeploymentID = structs.GenerateUUID()
|
||||
ar.Update(last)
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count <= oldCount {
|
||||
newCount, last := upd.Last()
|
||||
if newCount <= oldCount {
|
||||
return false, fmt.Errorf("No new updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
|
||||
return false, fmt.Errorf("want deployment status unhealthy; got unset")
|
||||
} else if !*last.DeploymentStatus.Healthy {
|
||||
|
@ -360,10 +374,10 @@ func TestAllocRunner_RetryArtifact(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count < 6 {
|
||||
return false, fmt.Errorf("Not enough updates")
|
||||
count, last := upd.Last()
|
||||
if min := 6; count < min {
|
||||
return false, fmt.Errorf("Not enough updates (%d < %d)", count, min)
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
|
||||
// web task should have completed successfully while bad task
|
||||
// retries artififact fetching
|
||||
|
@ -399,10 +413,10 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
|||
go ar.Run()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusRunning {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
||||
}
|
||||
|
@ -418,12 +432,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
|||
ar.Update(update)
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
return false, nil
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
|
||||
// Check the status has changed.
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -453,12 +467,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
|
|||
ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
return false, nil
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
|
||||
// Check the status has changed.
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -504,12 +518,12 @@ func TestAllocRunner_Destroy(t *testing.T) {
|
|||
}()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
return false, nil
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
|
||||
// Check the status has changed.
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -608,11 +622,11 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) {
|
|||
return false, fmt.Errorf("Incorrect number of tasks")
|
||||
}
|
||||
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
return last.ClientStatus == structs.AllocClientStatusRunning, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar2.alloc.TaskStates["web"])
|
||||
|
@ -649,10 +663,11 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
|
||||
if last.ClientStatus != structs.AllocClientStatusRunning {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
||||
}
|
||||
|
@ -721,12 +736,12 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
|
|||
ar2.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
return false, nil
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
|
||||
// Check the status has changed.
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -775,10 +790,11 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
|
|||
|
||||
// Snapshot state
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
|
||||
if last.ClientStatus != structs.AllocClientStatusRunning {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
|
||||
}
|
||||
|
@ -803,22 +819,19 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
|
|||
defer ar2.Destroy() // Just-in-case of failure before Destroy below
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if len(ar2.tasks) != 1 {
|
||||
return false, fmt.Errorf("Incorrect number of tasks")
|
||||
count, last := upd.Last()
|
||||
if min := 3; count < min {
|
||||
return false, fmt.Errorf("expected at least %d updates but found %d", min, count)
|
||||
}
|
||||
|
||||
if upd.Count < 3 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, ev := range ar2.taskStates["web"].Events {
|
||||
for _, ev := range last.TaskStates["web"].Events {
|
||||
if strings.HasSuffix(ev.RestartReason, pre06ScriptCheckReason) {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("no restart with proper reason found")
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v\nAllocs: %#v\nWeb State: %#v", err, upd.Allocs, ar2.taskStates["web"])
|
||||
count, last := upd.Last()
|
||||
t.Fatalf("err: %v\nAllocs: %d\nweb state: % #v", err, count, pretty.Formatter(last.TaskStates["web"]))
|
||||
})
|
||||
|
||||
// Destroy and wait
|
||||
|
@ -996,10 +1009,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusFailed {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
|
||||
}
|
||||
|
@ -1061,12 +1074,13 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
|
|||
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
|
||||
ar.alloc.TaskResources[task2.Name] = task2.Resources
|
||||
go ar.Run()
|
||||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -1142,30 +1156,53 @@ func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
|
|||
}
|
||||
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3)
|
||||
ar.alloc.TaskResources[task2.Name] = task2.Resources
|
||||
defer ar.Destroy()
|
||||
|
||||
// Destroy before running so it shuts down the alloc runner right after
|
||||
// starting all tasks
|
||||
ar.Destroy()
|
||||
go ar.Run()
|
||||
select {
|
||||
case <-ar.WaitCh():
|
||||
case <-time.After(8 * time.Second):
|
||||
t.Fatalf("timed out waiting for alloc runner to exit")
|
||||
}
|
||||
|
||||
if len(upd.Allocs) != 1 {
|
||||
t.Fatalf("expected 1 alloc update but found %d", len(upd.Allocs))
|
||||
}
|
||||
// Wait for tasks to start
|
||||
oldCount, last := upd.Last()
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
oldCount, last = upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
if n := len(last.TaskStates); n != 3 {
|
||||
return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
|
||||
}
|
||||
for name, state := range last.TaskStates {
|
||||
if state.State != structs.TaskStateRunning {
|
||||
return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
|
||||
a := upd.Allocs[0]
|
||||
if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower1"].FinishedAt.UnixNano() {
|
||||
t.Fatalf("expected leader to finish before follower1: %s >= %s",
|
||||
a.TaskStates["leader"].FinishedAt, a.TaskStates["follower1"].FinishedAt)
|
||||
}
|
||||
if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower2"].FinishedAt.UnixNano() {
|
||||
t.Fatalf("expected leader to finish before follower2: %s >= %s",
|
||||
a.TaskStates["leader"].FinishedAt, a.TaskStates["follower2"].FinishedAt)
|
||||
}
|
||||
// Stop alloc
|
||||
update := ar.Alloc()
|
||||
update.DesiredStatus = structs.AllocDesiredStatusStop
|
||||
ar.Update(update)
|
||||
|
||||
// Wait for tasks to stop
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
newCount, last := upd.Last()
|
||||
if newCount == oldCount {
|
||||
return false, fmt.Errorf("no new updates (count: %d)", newCount)
|
||||
}
|
||||
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
|
||||
t.Fatalf("expected leader to finish before follower1: %s >= %s",
|
||||
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
|
||||
}
|
||||
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
|
||||
t.Fatalf("expected leader to finish before follower2: %s >= %s",
|
||||
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("err: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAllocRunner_MoveAllocDir(t *testing.T) {
|
||||
|
@ -1181,10 +1218,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
|
|||
defer ar.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd.Count == 0 {
|
||||
_, last := upd.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd.Allocs[upd.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
@ -1213,10 +1250,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
|
|||
defer ar1.Destroy()
|
||||
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
if upd1.Count == 0 {
|
||||
_, last := upd1.Last()
|
||||
if last == nil {
|
||||
return false, fmt.Errorf("No updates")
|
||||
}
|
||||
last := upd1.Allocs[upd1.Count-1]
|
||||
if last.ClientStatus != structs.AllocClientStatusComplete {
|
||||
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue