Vet and small improvement on watcher failure detection
This commit is contained in:
parent
45712c6ca3
commit
bf97a2455c
|
@ -497,6 +497,17 @@ func (r *AllocRunner) Alloc() *structs.Allocation {
|
|||
alloc.ClientStatus = getClientStatus(r.taskStates)
|
||||
r.taskStatusLock.RUnlock()
|
||||
|
||||
// If the client status is failed and we are part of a deployment, mark the
|
||||
// alloc as unhealthy. This guards against the watcher not be started.
|
||||
r.allocLock.Lock()
|
||||
if alloc.ClientStatus == structs.AllocClientStatusFailed &&
|
||||
alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() {
|
||||
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Healthy: helper.BoolToPtr(false),
|
||||
}
|
||||
}
|
||||
r.allocLock.Unlock()
|
||||
|
||||
return alloc
|
||||
}
|
||||
|
||||
|
@ -670,10 +681,6 @@ func (r *AllocRunner) Run() {
|
|||
defer close(r.waitCh)
|
||||
go r.dirtySyncState()
|
||||
|
||||
// Start the watcher
|
||||
wCtx, watcherCancel := context.WithCancel(r.ctx)
|
||||
go r.watchHealth(wCtx)
|
||||
|
||||
// Find the task group to run in the allocation
|
||||
alloc := r.Alloc()
|
||||
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
|
||||
|
@ -716,6 +723,10 @@ func (r *AllocRunner) Run() {
|
|||
return
|
||||
}
|
||||
|
||||
// Start the watcher
|
||||
wCtx, watcherCancel := context.WithCancel(r.ctx)
|
||||
go r.watchHealth(wCtx)
|
||||
|
||||
// Start the task runners
|
||||
r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.alloc.ID)
|
||||
r.taskLock.Lock()
|
||||
|
@ -787,6 +798,10 @@ OUTER:
|
|||
|
||||
// Block until we should destroy the state of the alloc
|
||||
r.handleDestroy()
|
||||
|
||||
// Free up the context. It has likely exited already
|
||||
watcherCancel()
|
||||
|
||||
r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
|
||||
}
|
||||
|
||||
|
|
|
@ -133,6 +133,14 @@ OUTER:
|
|||
return
|
||||
}
|
||||
|
||||
// If the alloc is marked as failed by the client set the status to
|
||||
// unhealthy
|
||||
if alloc.ClientStatus == structs.AllocClientStatusFailed {
|
||||
r.logger.Printf("[TRACE] client.alloc_watcher: client status failed for alloc %q", alloc.ID)
|
||||
setHealth(false)
|
||||
return
|
||||
}
|
||||
|
||||
if len(alloc.TaskStates) != len(tg.Tasks) {
|
||||
r.logger.Printf("[TRACE] client.alloc_watcher: all task runners haven't started")
|
||||
continue OUTER
|
||||
|
|
Loading…
Reference in a new issue