client: fix setting alloc unhealthy at deadline

During the 0.9 client refactor the code to fail a deployment when the deadline was reached was broken. This restores and tests that behavior.
2019-02-15 15:42:46 -08:00 · 2019-02-15 15:42:46 -08:00 · 159042a1a3
parent b5d9d6a32d
commit 159042a1a3
3 changed files with 54 additions and 21 deletions
--- a/client/allochealth/tracker.go
+++ b/client/allochealth/tracker.go
@ -300,8 +300,8 @@ func (t *Tracker) watchTaskEvents() {
 	}
 }

-// watchConsulEvents iis a long lived watcher that watches for the health of the
-// allocation's Consul checks.
+// watchConsulEvents is a long lived watcher for the health of the allocation's
+// Consul checks.
 func (t *Tracker) watchConsulEvents() {
 	// checkTicker is the ticker that triggers us to look at the checks in
 	// Consul
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@ -463,7 +463,6 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
 // TestAllocRunner_DeploymentHealth_Unhealthy_Checks asserts that the health
 // watcher will mark the allocation as unhealthy with failing checks.
 func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
-	t.Skip("FIXME(schmichael): fails and needs fixing")
 	t.Parallel()

 	alloc := mock.Alloc()
@ -473,6 +472,23 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
 		"run_for": "10s",
 	}

+	// Set a service with check
+	task.Services = []*structs.Service{
+		{
+			Name:      "fakservice",
+			PortLabel: "http",
+			Checks: []*structs.ServiceCheck{
+				{
+					Name:     "fakecheck",
+					Type:     structs.ServiceCheckScript,
+					Command:  "true",
+					Interval: 30 * time.Second,
+					Timeout:  5 * time.Second,
+				},
+			},
+		},
+	}
+
 	// Make the alloc be part of a deployment
 	alloc.DeploymentID = uuid.Generate()
 	alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
@ -497,7 +513,7 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
 				task.Name: {
 					Services: map[string]*consul.ServiceRegistration{
 						"123": {
-							Service: &api.AgentService{Service: "foo"},
+							Service: &api.AgentService{Service: "fakeservice"},
 							Checks:  []*api.AgentCheck{checkUnhealthy},
 						},
 					},
@ -509,7 +525,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
 	ar, err := NewAllocRunner(conf)
 	require.NoError(t, err)
 	go ar.Run()
-	defer ar.Destroy()
+	defer func() {
+		ar.Destroy()
+		<-ar.DestroyCh()
+	}()

 	var lastUpdate *structs.Allocation
 	upd := conf.StateUpdater.(*MockStateUpdater)
@ -536,5 +555,5 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
 	require.NotEmpty(t, state.Events)
 	last := state.Events[len(state.Events)-1]
 	require.Equal(t, allochealth.AllocHealthEventSource, last.Type)
-	require.Contains(t, last.Message, "Services not healthy by deadline")
+	require.Contains(t, last.Message, "by deadline")
 }
--- a/client/allocrunner/health_hook.go
+++ b/client/allocrunner/health_hook.go
@ -104,6 +104,7 @@ func (h *allocHealthWatcherHook) Name() string {
 func (h *allocHealthWatcherHook) init() error {
 	// No need to watch health as it's already set
 	if h.healthSetter.HasHealth() {
+		h.logger.Trace("not watching; already has health set")
 		return nil
 	}

@ -125,11 +126,11 @@ func (h *allocHealthWatcherHook) init() error {
 	// strategy.
 	deadline, useChecks, minHealthyTime := getHealthParams(time.Now(), tg, h.isDeploy)

-	// Create a context that is canceled when the tracker should shutdown
-	// or the deadline is reached.
+	// Create a context that is canceled when the tracker should shutdown.
 	ctx := context.Background()
-	ctx, h.cancelFn = context.WithDeadline(ctx, deadline)
+	ctx, h.cancelFn = context.WithCancel(ctx)

+	h.logger.Trace("watching", "deadline", deadline, "checks", useChecks, "min_healthy_time", minHealthyTime)
 	// Create a new tracker, start it, and watch for health results.
 	tracker := allochealth.NewTracker(ctx, h.logger, h.alloc,
 		h.listener, h.consul, minHealthyTime, useChecks)
@ -137,7 +138,7 @@ func (h *allocHealthWatcherHook) init() error {

 	// Create a new done chan and start watching for health updates
 	h.watchDone = make(chan struct{})
-	go h.watchHealth(ctx, tracker, h.watchDone)
+	go h.watchHealth(ctx, deadline, tracker, h.watchDone)
 	return nil
 }

@ -196,28 +197,41 @@ func (h *allocHealthWatcherHook) Shutdown() {
 	h.Postrun()
 }

-// watchHealth watches alloc health until it is set, the alloc is stopped, or
-// the context is canceled. watchHealth will be canceled and restarted on
-// Updates so calls are serialized with a lock.
-func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, tracker *allochealth.Tracker, done chan<- struct{}) {
+// watchHealth watches alloc health until it is set, the alloc is stopped, the
+// deadline is reached, or the context is canceled. watchHealth will be
+// canceled and restarted on Updates so calls are serialized with a lock.
+func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, deadline time.Time, tracker *allochealth.Tracker, done chan<- struct{}) {
 	defer close(done)

+	// Default to unhealthy for the deadline reached case
+	healthy := false
+
 	select {
 	case <-ctx.Done():
+		// Graceful shutdown
 		return

 	case <-tracker.AllocStoppedCh():
+		// Allocation has stopped so no need to set health
 		return

-	case healthy := <-tracker.HealthyCh():
-		// If this is an unhealthy deployment emit events for tasks
-		var taskEvents map[string]*structs.TaskEvent
-		if !healthy && h.isDeploy {
-			taskEvents = tracker.TaskEvents()
-		}
+	case <-time.After(deadline.Sub(time.Now())):
+		// Time is up! Fallthrough to set unhealthy.
+		h.logger.Trace("deadline reached; setting unhealthy", "deadline", deadline)

-		h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents)
+	case healthy = <-tracker.HealthyCh():
+		// Health received. Fallthrough to set it.
 	}
+
+	h.logger.Trace("health set", "healthy", healthy)
+
+	// If this is an unhealthy deployment emit events for tasks
+	var taskEvents map[string]*structs.TaskEvent
+	if !healthy && h.isDeploy {
+		taskEvents = tracker.TaskEvents()
+	}
+
+	h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents)
 }

 // getHealthParams returns the health watcher parameters which vary based on