Fix TestAllocRunner_TaskLeader_StopTG

Also make alloc runner tests less racy. Basically every alloc runner test used to have races with `upd.{Count,Allocs}`
2017-07-21 12:24:40 -07:00 · 2017-07-21 12:24:40 -07:00 · 6e80a8ee39
parent 0fabb5902a
commit 6e80a8ee39
1 changed files with 115 additions and 78 deletions
--- a/client/alloc_runner_test.go
+++ b/client/alloc_runner_test.go
@ -6,6 +6,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"
 	"testing"
 	"text/template"
 	"time"
@ -16,6 +17,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/testutil"
+	"github.com/kr/pretty"

 	"github.com/hashicorp/nomad/client/config"
 	ctestutil "github.com/hashicorp/nomad/client/testutil"
@ -23,13 +25,26 @@ import (
 )

 type MockAllocStateUpdater struct {
-	Count  int
 	Allocs []*structs.Allocation
+	mu     sync.Mutex
 }

+// Update fulfills the TaskStateUpdater interface
 func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
-	m.Count += 1
+	m.mu.Lock()
 	m.Allocs = append(m.Allocs, alloc)
+	m.mu.Unlock()
+}
+
+// Last returns the total number of updates and the last alloc (or nil)
+func (m *MockAllocStateUpdater) Last() (int, *structs.Allocation) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	n := len(m.Allocs)
+	if n == 0 {
+		return 0, nil
+	}
+	return n, m.Allocs[n-1].Copy()
 }

 func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
@ -61,10 +76,10 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -96,10 +111,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if *last.DeploymentStatus.Healthy {
@ -136,10 +151,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if *last.DeploymentStatus.Healthy {
@ -181,10 +196,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if !*last.DeploymentStatus.Healthy {
@ -249,10 +264,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if !*last.DeploymentStatus.Healthy {
@ -291,10 +306,10 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if !*last.DeploymentStatus.Healthy {
@ -306,17 +321,16 @@ func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
 	})

 	// Mimick an update to a new deployment id
-	oldCount := upd.Count
-	last := upd.Allocs[oldCount-1].Copy()
+	oldCount, last := upd.Last()
 	last.DeploymentStatus = nil
 	last.DeploymentID = structs.GenerateUUID()
 	ar.Update(last)

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count <= oldCount {
+		newCount, last := upd.Last()
+		if newCount <= oldCount {
 			return false, fmt.Errorf("No new updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
 			return false, fmt.Errorf("want deployment status unhealthy; got unset")
 		} else if !*last.DeploymentStatus.Healthy {
@ -360,10 +374,10 @@ func TestAllocRunner_RetryArtifact(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count < 6 {
-			return false, fmt.Errorf("Not enough updates")
+		count, last := upd.Last()
+		if min := 6; count < min {
+			return false, fmt.Errorf("Not enough updates (%d < %d)", count, min)
 		}
-		last := upd.Allocs[upd.Count-1]

 		// web task should have completed successfully while bad task
 		// retries artififact fetching
@ -399,10 +413,10 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
 	go ar.Run()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusRunning {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
 		}
@ -418,12 +432,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
 	ar.Update(update)

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
-			return false, nil
+		_, last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
 		}

 		// Check the status has changed.
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -453,12 +467,12 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
 	ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
-			return false, nil
+		_, last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
 		}

 		// Check the status has changed.
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -504,12 +518,12 @@ func TestAllocRunner_Destroy(t *testing.T) {
 	}()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
-			return false, nil
+		_, last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
 		}

 		// Check the status has changed.
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -608,11 +622,11 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) {
 			return false, fmt.Errorf("Incorrect number of tasks")
 		}

-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, nil
 		}

-		last := upd.Allocs[upd.Count-1]
 		return last.ClientStatus == structs.AllocClientStatusRunning, nil
 	}, func(err error) {
 		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar2.alloc.TaskStates["web"])
@ -649,10 +663,11 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
+
 		if last.ClientStatus != structs.AllocClientStatusRunning {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
 		}
@ -721,12 +736,12 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
 	ar2.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
-			return false, nil
+		_, last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
 		}

 		// Check the status has changed.
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -775,10 +790,11 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {

 	// Snapshot state
 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
+
 		if last.ClientStatus != structs.AllocClientStatusRunning {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
 		}
@ -803,22 +819,19 @@ func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
 	defer ar2.Destroy() // Just-in-case of failure before Destroy below

 	testutil.WaitForResult(func() (bool, error) {
-		if len(ar2.tasks) != 1 {
-			return false, fmt.Errorf("Incorrect number of tasks")
+		count, last := upd.Last()
+		if min := 3; count < min {
+			return false, fmt.Errorf("expected at least %d updates but found %d", min, count)
 		}
-
-		if upd.Count < 3 {
-			return false, nil
-		}
-
-		for _, ev := range ar2.taskStates["web"].Events {
+		for _, ev := range last.TaskStates["web"].Events {
 			if strings.HasSuffix(ev.RestartReason, pre06ScriptCheckReason) {
 				return true, nil
 			}
 		}
 		return false, fmt.Errorf("no restart with proper reason found")
 	}, func(err error) {
-		t.Fatalf("err: %v\nAllocs: %#v\nWeb State: %#v", err, upd.Allocs, ar2.taskStates["web"])
+		count, last := upd.Last()
+		t.Fatalf("err: %v\nAllocs: %d\nweb state: % #v", err, count, pretty.Formatter(last.TaskStates["web"]))
 	})

 	// Destroy and wait
@ -996,10 +1009,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusFailed {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
 		}
@ -1061,12 +1074,13 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
 	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
 	ar.alloc.TaskResources[task2.Name] = task2.Resources
 	go ar.Run()
+	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -1142,30 +1156,53 @@ func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
 	}
 	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3)
 	ar.alloc.TaskResources[task2.Name] = task2.Resources
+	defer ar.Destroy()

-	// Destroy before running so it shuts down the alloc runner right after
-	// starting all tasks
-	ar.Destroy()
 	go ar.Run()
-	select {
-	case <-ar.WaitCh():
-	case <-time.After(8 * time.Second):
-		t.Fatalf("timed out waiting for alloc runner to exit")
-	}

-	if len(upd.Allocs) != 1 {
-		t.Fatalf("expected 1 alloc update but found %d", len(upd.Allocs))
-	}
+	// Wait for tasks to start
+	oldCount, last := upd.Last()
+	testutil.WaitForResult(func() (bool, error) {
+		oldCount, last = upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("No updates")
+		}
+		if n := len(last.TaskStates); n != 3 {
+			return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
+		}
+		for name, state := range last.TaskStates {
+			if state.State != structs.TaskStateRunning {
+				return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
+			}
+		}
+		return true, nil
+	}, func(err error) {
+		t.Fatalf("err: %v", err)
+	})

-	a := upd.Allocs[0]
-	if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower1"].FinishedAt.UnixNano() {
-		t.Fatalf("expected leader to finish before follower1: %s >= %s",
-			a.TaskStates["leader"].FinishedAt, a.TaskStates["follower1"].FinishedAt)
-	}
-	if a.TaskStates["leader"].FinishedAt.UnixNano() >= a.TaskStates["follower2"].FinishedAt.UnixNano() {
-		t.Fatalf("expected leader to finish before follower2: %s >= %s",
-			a.TaskStates["leader"].FinishedAt, a.TaskStates["follower2"].FinishedAt)
-	}
+	// Stop alloc
+	update := ar.Alloc()
+	update.DesiredStatus = structs.AllocDesiredStatusStop
+	ar.Update(update)
+
+	// Wait for tasks to stop
+	testutil.WaitForResult(func() (bool, error) {
+		newCount, last := upd.Last()
+		if newCount == oldCount {
+			return false, fmt.Errorf("no new updates (count: %d)", newCount)
+		}
+		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
+			t.Fatalf("expected leader to finish before follower1: %s >= %s",
+				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
+		}
+		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
+			t.Fatalf("expected leader to finish before follower2: %s >= %s",
+				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
+		}
+		return true, nil
+	}, func(err error) {
+		t.Fatalf("err: %v", err)
+	})
 }

 func TestAllocRunner_MoveAllocDir(t *testing.T) {
@ -1181,10 +1218,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
 	defer ar.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd.Count == 0 {
+		_, last := upd.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd.Allocs[upd.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}
@ -1213,10 +1250,10 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
 	defer ar1.Destroy()

 	testutil.WaitForResult(func() (bool, error) {
-		if upd1.Count == 0 {
+		_, last := upd1.Last()
+		if last == nil {
 			return false, fmt.Errorf("No updates")
 		}
-		last := upd1.Allocs[upd1.Count-1]
 		if last.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
 		}