Merge pull request #3296 from hashicorp/ensure_registration_race
Fix race condition between removing a service and adding a check for …
This commit is contained in:
commit
efae3cccc0
|
@ -1790,7 +1790,11 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
|
|||
}
|
||||
|
||||
// Add to the local state for anti-entropy
|
||||
a.state.AddCheck(check, token)
|
||||
err := a.state.AddCheck(check, token)
|
||||
if err != nil {
|
||||
a.cancelCheckMonitors(check.CheckID)
|
||||
return err
|
||||
}
|
||||
|
||||
// Persist the check
|
||||
if persist && !a.config.DevMode {
|
||||
|
@ -1814,6 +1818,21 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error {
|
|||
a.checkLock.Lock()
|
||||
defer a.checkLock.Unlock()
|
||||
|
||||
a.cancelCheckMonitors(checkID)
|
||||
|
||||
if persist {
|
||||
if err := a.purgeCheck(checkID); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := a.purgeCheckState(checkID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
a.logger.Printf("[DEBUG] agent: removed check %q", checkID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) cancelCheckMonitors(checkID types.CheckID) {
|
||||
// Stop any monitors
|
||||
delete(a.checkReapAfter, checkID)
|
||||
if check, ok := a.checkMonitors[checkID]; ok {
|
||||
|
@ -1836,16 +1855,6 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error {
|
|||
check.Stop()
|
||||
delete(a.checkDockers, checkID)
|
||||
}
|
||||
if persist {
|
||||
if err := a.purgeCheck(checkID); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := a.purgeCheckState(checkID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
a.logger.Printf("[DEBUG] agent: removed check %q", checkID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateTTLCheck is used to update the status of a TTL check via the Agent API.
|
||||
|
|
|
@ -247,18 +247,25 @@ func (l *localState) checkToken(checkID types.CheckID) string {
|
|||
// AddCheck is used to add a health check to the local state.
|
||||
// This entry is persistent and the agent will make a best effort to
|
||||
// ensure it is registered
|
||||
func (l *localState) AddCheck(check *structs.HealthCheck, token string) {
|
||||
func (l *localState) AddCheck(check *structs.HealthCheck, token string) error {
|
||||
// Set the node name
|
||||
check.Node = l.config.NodeName
|
||||
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
// if there is a serviceID associated with the check, make sure it exists before adding it
|
||||
// NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor
|
||||
if check.ServiceID != "" && l.services[check.ServiceID] == nil {
|
||||
return fmt.Errorf("ServiceID %q does not exist", check.ServiceID)
|
||||
}
|
||||
|
||||
l.checks[check.CheckID] = check
|
||||
l.checkStatus[check.CheckID] = syncStatus{}
|
||||
l.checkTokens[check.CheckID] = token
|
||||
delete(l.checkCriticalTime, check.CheckID)
|
||||
l.changeMade()
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveCheck is used to remove a health check from the local state.
|
||||
|
|
|
@ -1475,6 +1475,9 @@ func TestAgent_checkCriticalTime(t *testing.T) {
|
|||
cfg := TestConfig()
|
||||
l := NewLocalState(cfg, nil)
|
||||
|
||||
svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000}
|
||||
l.AddService(svc, "")
|
||||
|
||||
// Add a passing check and make sure it's not critical.
|
||||
checkID := types.CheckID("redis:1")
|
||||
chk := &structs.HealthCheck{
|
||||
|
|
Loading…
Reference in New Issue