Fix TestAllocRunner_TaskLeader_StopTG

Also make alloc runner tests less racy. Basically every alloc runner
test used to have races with `upd.{Count,Allocs}`
This commit is contained in:
Michael Schurter 2017-07-21 12:24:40 -07:00
parent 0fabb5902a
commit 6e80a8ee39
1 changed files with 115 additions and 78 deletions

View File

@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"testing"
"text/template"
"time"
@ -16,6 +17,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/kr/pretty"
"github.com/hashicorp/nomad/client/config"
ctestutil "github.com/hashicorp/nomad/client/testutil"
@ -23,13 +25,26 @@ import (
)
type MockAllocStateUpdater struct {
Count int
Allocs []*structs.Allocation
mu sync.Mutex
}
// Update fulfills the TaskStateUpdater interface
func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
m.Count += 1
m.mu.Lock()
m.Allocs = append(m.Allocs, alloc)
m.mu.Unlock()
}
// Last returns the total number of updates and the last alloc (or nil)
func (m *MockAllocStateUpdater) Last() (int, *structs.Allocation) {
m.mu.Lock()
defer m.mu.Unlock()
n := len(m.Allocs)
if n == 0 {
return 0, nil
}
return n, m.Allocs[n-1].Copy()
}
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
@ -61,10 +76,10 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -96,10 +111,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if *last.DeploymentStatus.Healthy {
@ -136,10 +151,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if *last.DeploymentStatus.Healthy {
@ -181,10 +196,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if !*last.DeploymentStatus.Healthy {
@ -249,10 +264,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if !*last.DeploymentStatus.Healthy {
@ -291,10 +306,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if !*last.DeploymentStatus.Healthy {
@ -306,17 +321,16 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
})
// Mimick an update to a new deployment id
oldCount := upd.Count
last := upd.Allocs[oldCount-1].Copy()
oldCount, last := upd.Last()
last.DeploymentStatus = nil
last.DeploymentID = structs.GenerateUUID()
ar.Update(last)
testutil.WaitForResult(func() (bool, error) {
if upd.Count <= oldCount {
newCount, last := upd.Last()
if newCount <= oldCount {
return false, fmt.Errorf("No new updates")
}
last := upd.Allocs[upd.Count-1]
if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
return false, fmt.Errorf("want deployment status unhealthy; got unset")
} else if !*last.DeploymentStatus.Healthy {
@ -360,10 +374,10 @@ func TestAllocRunner_RetryArtifact(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count < 6 {
return false, fmt.Errorf("Not enough updates")
count, last := upd.Last()
if min := 6; count < min {
return false, fmt.Errorf("Not enough updates (%d < %d)", count, min)
}
last := upd.Allocs[upd.Count-1]
// web task should have completed successfully while bad task
// retries artififact fetching
@ -399,10 +413,10 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
go ar.Run()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusRunning {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
}
@ -418,12 +432,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
ar.Update(update)
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
return false, nil
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
// Check the status has changed.
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -453,12 +467,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
return false, nil
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
// Check the status has changed.
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -504,12 +518,12 @@ func TestAllocRunner_Destroy(t *testing.T) {
}()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
return false, nil
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
// Check the status has changed.
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -608,11 +622,11 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) {
return false, fmt.Errorf("Incorrect number of tasks")
}
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, nil
}
last := upd.Allocs[upd.Count-1]
return last.ClientStatus == structs.AllocClientStatusRunning, nil
}, func(err error) {
t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar2.alloc.TaskStates["web"])
@ -649,10 +663,11 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusRunning {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
}
@ -721,12 +736,12 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
ar2.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
return false, nil
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
// Check the status has changed.
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -775,10 +790,11 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
// Snapshot state
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusRunning {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
}
@ -803,22 +819,19 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
defer ar2.Destroy() // Just-in-case of failure before Destroy below
testutil.WaitForResult(func() (bool, error) {
if len(ar2.tasks) != 1 {
return false, fmt.Errorf("Incorrect number of tasks")
count, last := upd.Last()
if min := 3; count < min {
return false, fmt.Errorf("expected at least %d updates but found %d", min, count)
}
if upd.Count < 3 {
return false, nil
}
for _, ev := range ar2.taskStates["web"].Events {
for _, ev := range last.TaskStates["web"].Events {
if strings.HasSuffix(ev.RestartReason, pre06ScriptCheckReason) {
return true, nil
}
}
return false, fmt.Errorf("no restart with proper reason found")
}, func(err error) {
t.Fatalf("err: %v\nAllocs: %#v\nWeb State: %#v", err, upd.Allocs, ar2.taskStates["web"])
count, last := upd.Last()
t.Fatalf("err: %v\nAllocs: %d\nweb state: % #v", err, count, pretty.Formatter(last.TaskStates["web"]))
})
// Destroy and wait
@ -996,10 +1009,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusFailed {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
}
@ -1061,12 +1074,13 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
ar.alloc.TaskResources[task2.Name] = task2.Resources
go ar.Run()
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -1142,30 +1156,53 @@ func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
}
ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3)
ar.alloc.TaskResources[task2.Name] = task2.Resources
defer ar.Destroy()
// Destroy before running so it shuts down the alloc runner right after
// starting all tasks
ar.Destroy()
go ar.Run()
select {
case <-ar.WaitCh():
case <-time.After(8 * time.Second):
t.Fatalf("timed out waiting for alloc runner to exit")
}
if len(upd.Allocs) != 1 {
t.Fatalf("expected 1 alloc update but found %d", len(upd.Allocs))
}
// Wait for tasks to start
oldCount, last := upd.Last()
testutil.WaitForResult(func() (bool, error) {
oldCount, last = upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
if n := len(last.TaskStates); n != 3 {
return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
}
for name, state := range last.TaskStates {
if state.State != structs.TaskStateRunning {
return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
}
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
a := upd.Allocs[0]
if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower1"].FinishedAt.UnixNano() {
t.Fatalf("expected leader to finish before follower1: %s >= %s",
a.TaskStates["leader"].FinishedAt, a.TaskStates["follower1"].FinishedAt)
}
if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower2"].FinishedAt.UnixNano() {
t.Fatalf("expected leader to finish before follower2: %s >= %s",
a.TaskStates["leader"].FinishedAt, a.TaskStates["follower2"].FinishedAt)
}
// Stop alloc
update := ar.Alloc()
update.DesiredStatus = structs.AllocDesiredStatusStop
ar.Update(update)
// Wait for tasks to stop
testutil.WaitForResult(func() (bool, error) {
newCount, last := upd.Last()
if newCount == oldCount {
return false, fmt.Errorf("no new updates (count: %d)", newCount)
}
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
t.Fatalf("expected leader to finish before follower1: %s >= %s",
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
}
if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
t.Fatalf("expected leader to finish before follower2: %s >= %s",
last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}
func TestAllocRunner_MoveAllocDir(t *testing.T) {
@ -1181,10 +1218,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
defer ar.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd.Count == 0 {
_, last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd.Allocs[upd.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}
@ -1213,10 +1250,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
defer ar1.Destroy()
testutil.WaitForResult(func() (bool, error) {
if upd1.Count == 0 {
_, last := upd1.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
last := upd1.Allocs[upd1.Count-1]
if last.ClientStatus != structs.AllocClientStatusComplete {
return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
}