Merge pull request #7944 from hashicorp/b-health-checks-after-task-health
Allocs are healthy if service checks get healthy before task health
This commit is contained in:
commit
2622b88b00
|
@ -210,7 +210,8 @@ func (t *Tracker) setTaskHealth(healthy, terminal bool) {
|
|||
}
|
||||
|
||||
// setCheckHealth is used to mark the checks as either healthy or unhealthy.
|
||||
func (t *Tracker) setCheckHealth(healthy bool) {
|
||||
// returns true if health is propagated and no more health monitoring is needed
|
||||
func (t *Tracker) setCheckHealth(healthy bool) bool {
|
||||
t.l.Lock()
|
||||
defer t.l.Unlock()
|
||||
|
||||
|
@ -220,7 +221,7 @@ func (t *Tracker) setCheckHealth(healthy bool) {
|
|||
|
||||
// Only signal if we are healthy and so is the tasks
|
||||
if !t.checksHealthy {
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
select {
|
||||
|
@ -230,6 +231,7 @@ func (t *Tracker) setCheckHealth(healthy bool) {
|
|||
|
||||
// Shutdown the tracker
|
||||
t.cancelFn()
|
||||
return true
|
||||
}
|
||||
|
||||
// markAllocStopped is used to mark the allocation as having stopped.
|
||||
|
@ -379,7 +381,12 @@ OUTER:
|
|||
allocReg = newAllocReg
|
||||
}
|
||||
case <-healthyTimer.C:
|
||||
t.setCheckHealth(true)
|
||||
if t.setCheckHealth(true) {
|
||||
// final health set and propagated
|
||||
return
|
||||
}
|
||||
// tasks are unhealthy, reset and wait until all is healthy
|
||||
primed = false
|
||||
}
|
||||
|
||||
if allocReg == nil {
|
||||
|
|
|
@ -227,3 +227,112 @@ func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) {
|
|||
require.Fail(t, "expected a health status")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc
|
||||
// healthy, if the checks pass before task health pass
|
||||
func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
|
||||
task := alloc.Job.TaskGroups[0].Tasks[0]
|
||||
|
||||
// new task starting unhealthy, without services
|
||||
task2 := task.Copy()
|
||||
task2.Name = task2.Name + "2"
|
||||
task2.Services = nil
|
||||
alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
|
||||
|
||||
// Synthesize running alloc and tasks
|
||||
alloc.ClientStatus = structs.AllocClientStatusRunning
|
||||
alloc.TaskStates = map[string]*structs.TaskState{
|
||||
task.Name: {
|
||||
State: structs.TaskStateRunning,
|
||||
StartedAt: time.Now(),
|
||||
},
|
||||
task2.Name: {
|
||||
State: structs.TaskStatePending,
|
||||
},
|
||||
}
|
||||
|
||||
// Make Consul response
|
||||
check := &consulapi.AgentCheck{
|
||||
Name: task.Services[0].Checks[0].Name,
|
||||
Status: consulapi.HealthPassing,
|
||||
}
|
||||
taskRegs := map[string]*agentconsul.ServiceRegistrations{
|
||||
task.Name: {
|
||||
Services: map[string]*agentconsul.ServiceRegistration{
|
||||
task.Services[0].Name: {
|
||||
Service: &consulapi.AgentService{
|
||||
ID: "foo",
|
||||
Service: task.Services[0].Name,
|
||||
},
|
||||
Checks: []*consulapi.AgentCheck{check},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
logger := testlog.HCLogger(t)
|
||||
b := cstructs.NewAllocBroadcaster(logger)
|
||||
defer b.Close()
|
||||
|
||||
// Don't reply on the first call
|
||||
var called uint64
|
||||
consul := consul.NewMockConsulServiceClient(t, logger)
|
||||
consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
|
||||
if atomic.AddUint64(&called, 1) == 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
reg := &agentconsul.AllocRegistration{
|
||||
Tasks: taskRegs,
|
||||
}
|
||||
|
||||
return reg, nil
|
||||
}
|
||||
|
||||
ctx, cancelFn := context.WithCancel(context.Background())
|
||||
defer cancelFn()
|
||||
|
||||
checkInterval := 10 * time.Millisecond
|
||||
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
|
||||
time.Millisecond, true)
|
||||
tracker.checkLookupInterval = checkInterval
|
||||
tracker.Start()
|
||||
|
||||
// assert that we don't get marked healthy
|
||||
select {
|
||||
case <-time.After(4 * checkInterval):
|
||||
// still unhealthy, good
|
||||
case h := <-tracker.HealthyCh():
|
||||
require.Fail(t, "unexpected health event", h)
|
||||
}
|
||||
require.False(t, tracker.tasksHealthy)
|
||||
require.False(t, tracker.checksHealthy)
|
||||
|
||||
// now set task to healthy
|
||||
runningAlloc := alloc.Copy()
|
||||
runningAlloc.TaskStates = map[string]*structs.TaskState{
|
||||
task.Name: {
|
||||
State: structs.TaskStateRunning,
|
||||
StartedAt: time.Now(),
|
||||
},
|
||||
task2.Name: {
|
||||
State: structs.TaskStateRunning,
|
||||
StartedAt: time.Now(),
|
||||
},
|
||||
}
|
||||
err := b.Send(runningAlloc)
|
||||
require.NoError(t, err)
|
||||
|
||||
// eventually, it is marked as healthy
|
||||
select {
|
||||
case <-time.After(4 * checkInterval):
|
||||
require.Fail(t, "timed out while waiting for health")
|
||||
case h := <-tracker.HealthyCh():
|
||||
require.True(t, h)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue