From 159042a1a3285ccb66f2fe5d1a6ce3ae1da3066e Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 15 Feb 2019 15:42:46 -0800 Subject: [PATCH] client: fix setting alloc unhealthy at deadline During the 0.9 client refactor the code to fail a deployment when the deadline was reached was broken. This restores and tests that behavior. --- client/allochealth/tracker.go | 4 +-- client/allocrunner/alloc_runner_test.go | 27 ++++++++++++--- client/allocrunner/health_hook.go | 44 ++++++++++++++++--------- 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/client/allochealth/tracker.go b/client/allochealth/tracker.go index 47e62cf92..d9f943ccc 100644 --- a/client/allochealth/tracker.go +++ b/client/allochealth/tracker.go @@ -300,8 +300,8 @@ func (t *Tracker) watchTaskEvents() { } } -// watchConsulEvents iis a long lived watcher that watches for the health of the -// allocation's Consul checks. +// watchConsulEvents is a long lived watcher for the health of the allocation's +// Consul checks. func (t *Tracker) watchConsulEvents() { // checkTicker is the ticker that triggers us to look at the checks in // Consul diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index a15657f0d..d34c9ce9a 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -463,7 +463,6 @@ func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) { // TestAllocRunner_DeploymentHealth_Unhealthy_Checks asserts that the health // watcher will mark the allocation as unhealthy with failing checks. func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { - t.Skip("FIXME(schmichael): fails and needs fixing") t.Parallel() alloc := mock.Alloc() @@ -473,6 +472,23 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { "run_for": "10s", } + // Set a service with check + task.Services = []*structs.Service{ + { + Name: "fakservice", + PortLabel: "http", + Checks: []*structs.ServiceCheck{ + { + Name: "fakecheck", + Type: structs.ServiceCheckScript, + Command: "true", + Interval: 30 * time.Second, + Timeout: 5 * time.Second, + }, + }, + }, + } + // Make the alloc be part of a deployment alloc.DeploymentID = uuid.Generate() alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() @@ -497,7 +513,7 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { task.Name: { Services: map[string]*consul.ServiceRegistration{ "123": { - Service: &api.AgentService{Service: "foo"}, + Service: &api.AgentService{Service: "fakeservice"}, Checks: []*api.AgentCheck{checkUnhealthy}, }, }, @@ -509,7 +525,10 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { ar, err := NewAllocRunner(conf) require.NoError(t, err) go ar.Run() - defer ar.Destroy() + defer func() { + ar.Destroy() + <-ar.DestroyCh() + }() var lastUpdate *structs.Allocation upd := conf.StateUpdater.(*MockStateUpdater) @@ -536,5 +555,5 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { require.NotEmpty(t, state.Events) last := state.Events[len(state.Events)-1] require.Equal(t, allochealth.AllocHealthEventSource, last.Type) - require.Contains(t, last.Message, "Services not healthy by deadline") + require.Contains(t, last.Message, "by deadline") } diff --git a/client/allocrunner/health_hook.go b/client/allocrunner/health_hook.go index 698548d4e..70d43ef49 100644 --- a/client/allocrunner/health_hook.go +++ b/client/allocrunner/health_hook.go @@ -104,6 +104,7 @@ func (h *allocHealthWatcherHook) Name() string { func (h *allocHealthWatcherHook) init() error { // No need to watch health as it's already set if h.healthSetter.HasHealth() { + h.logger.Trace("not watching; already has health set") return nil } @@ -125,11 +126,11 @@ func (h *allocHealthWatcherHook) init() error { // strategy. deadline, useChecks, minHealthyTime := getHealthParams(time.Now(), tg, h.isDeploy) - // Create a context that is canceled when the tracker should shutdown - // or the deadline is reached. + // Create a context that is canceled when the tracker should shutdown. ctx := context.Background() - ctx, h.cancelFn = context.WithDeadline(ctx, deadline) + ctx, h.cancelFn = context.WithCancel(ctx) + h.logger.Trace("watching", "deadline", deadline, "checks", useChecks, "min_healthy_time", minHealthyTime) // Create a new tracker, start it, and watch for health results. tracker := allochealth.NewTracker(ctx, h.logger, h.alloc, h.listener, h.consul, minHealthyTime, useChecks) @@ -137,7 +138,7 @@ func (h *allocHealthWatcherHook) init() error { // Create a new done chan and start watching for health updates h.watchDone = make(chan struct{}) - go h.watchHealth(ctx, tracker, h.watchDone) + go h.watchHealth(ctx, deadline, tracker, h.watchDone) return nil } @@ -196,28 +197,41 @@ func (h *allocHealthWatcherHook) Shutdown() { h.Postrun() } -// watchHealth watches alloc health until it is set, the alloc is stopped, or -// the context is canceled. watchHealth will be canceled and restarted on -// Updates so calls are serialized with a lock. -func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, tracker *allochealth.Tracker, done chan<- struct{}) { +// watchHealth watches alloc health until it is set, the alloc is stopped, the +// deadline is reached, or the context is canceled. watchHealth will be +// canceled and restarted on Updates so calls are serialized with a lock. +func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, deadline time.Time, tracker *allochealth.Tracker, done chan<- struct{}) { defer close(done) + // Default to unhealthy for the deadline reached case + healthy := false + select { case <-ctx.Done(): + // Graceful shutdown return case <-tracker.AllocStoppedCh(): + // Allocation has stopped so no need to set health return - case healthy := <-tracker.HealthyCh(): - // If this is an unhealthy deployment emit events for tasks - var taskEvents map[string]*structs.TaskEvent - if !healthy && h.isDeploy { - taskEvents = tracker.TaskEvents() - } + case <-time.After(deadline.Sub(time.Now())): + // Time is up! Fallthrough to set unhealthy. + h.logger.Trace("deadline reached; setting unhealthy", "deadline", deadline) - h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents) + case healthy = <-tracker.HealthyCh(): + // Health received. Fallthrough to set it. } + + h.logger.Trace("health set", "healthy", healthy) + + // If this is an unhealthy deployment emit events for tasks + var taskEvents map[string]*structs.TaskEvent + if !healthy && h.isDeploy { + taskEvents = tracker.TaskEvents() + } + + h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents) } // getHealthParams returns the health watcher parameters which vary based on