Merge pull request #3296 from hashicorp/ensure_registration_race
Fix race condition between removing a service and adding a check for …
This commit is contained in:
commit
efae3cccc0
|
@ -1790,7 +1790,11 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add to the local state for anti-entropy
|
// Add to the local state for anti-entropy
|
||||||
a.state.AddCheck(check, token)
|
err := a.state.AddCheck(check, token)
|
||||||
|
if err != nil {
|
||||||
|
a.cancelCheckMonitors(check.CheckID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Persist the check
|
// Persist the check
|
||||||
if persist && !a.config.DevMode {
|
if persist && !a.config.DevMode {
|
||||||
|
@ -1814,6 +1818,21 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error {
|
||||||
a.checkLock.Lock()
|
a.checkLock.Lock()
|
||||||
defer a.checkLock.Unlock()
|
defer a.checkLock.Unlock()
|
||||||
|
|
||||||
|
a.cancelCheckMonitors(checkID)
|
||||||
|
|
||||||
|
if persist {
|
||||||
|
if err := a.purgeCheck(checkID); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := a.purgeCheckState(checkID); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
a.logger.Printf("[DEBUG] agent: removed check %q", checkID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Agent) cancelCheckMonitors(checkID types.CheckID) {
|
||||||
// Stop any monitors
|
// Stop any monitors
|
||||||
delete(a.checkReapAfter, checkID)
|
delete(a.checkReapAfter, checkID)
|
||||||
if check, ok := a.checkMonitors[checkID]; ok {
|
if check, ok := a.checkMonitors[checkID]; ok {
|
||||||
|
@ -1836,16 +1855,6 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error {
|
||||||
check.Stop()
|
check.Stop()
|
||||||
delete(a.checkDockers, checkID)
|
delete(a.checkDockers, checkID)
|
||||||
}
|
}
|
||||||
if persist {
|
|
||||||
if err := a.purgeCheck(checkID); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := a.purgeCheckState(checkID); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
a.logger.Printf("[DEBUG] agent: removed check %q", checkID)
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateTTLCheck is used to update the status of a TTL check via the Agent API.
|
// updateTTLCheck is used to update the status of a TTL check via the Agent API.
|
||||||
|
|
|
@ -247,18 +247,25 @@ func (l *localState) checkToken(checkID types.CheckID) string {
|
||||||
// AddCheck is used to add a health check to the local state.
|
// AddCheck is used to add a health check to the local state.
|
||||||
// This entry is persistent and the agent will make a best effort to
|
// This entry is persistent and the agent will make a best effort to
|
||||||
// ensure it is registered
|
// ensure it is registered
|
||||||
func (l *localState) AddCheck(check *structs.HealthCheck, token string) {
|
func (l *localState) AddCheck(check *structs.HealthCheck, token string) error {
|
||||||
// Set the node name
|
// Set the node name
|
||||||
check.Node = l.config.NodeName
|
check.Node = l.config.NodeName
|
||||||
|
|
||||||
l.Lock()
|
l.Lock()
|
||||||
defer l.Unlock()
|
defer l.Unlock()
|
||||||
|
|
||||||
|
// if there is a serviceID associated with the check, make sure it exists before adding it
|
||||||
|
// NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor
|
||||||
|
if check.ServiceID != "" && l.services[check.ServiceID] == nil {
|
||||||
|
return fmt.Errorf("ServiceID %q does not exist", check.ServiceID)
|
||||||
|
}
|
||||||
|
|
||||||
l.checks[check.CheckID] = check
|
l.checks[check.CheckID] = check
|
||||||
l.checkStatus[check.CheckID] = syncStatus{}
|
l.checkStatus[check.CheckID] = syncStatus{}
|
||||||
l.checkTokens[check.CheckID] = token
|
l.checkTokens[check.CheckID] = token
|
||||||
delete(l.checkCriticalTime, check.CheckID)
|
delete(l.checkCriticalTime, check.CheckID)
|
||||||
l.changeMade()
|
l.changeMade()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RemoveCheck is used to remove a health check from the local state.
|
// RemoveCheck is used to remove a health check from the local state.
|
||||||
|
|
|
@ -1475,6 +1475,9 @@ func TestAgent_checkCriticalTime(t *testing.T) {
|
||||||
cfg := TestConfig()
|
cfg := TestConfig()
|
||||||
l := NewLocalState(cfg, nil)
|
l := NewLocalState(cfg, nil)
|
||||||
|
|
||||||
|
svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000}
|
||||||
|
l.AddService(svc, "")
|
||||||
|
|
||||||
// Add a passing check and make sure it's not critical.
|
// Add a passing check and make sure it's not critical.
|
||||||
checkID := types.CheckID("redis:1")
|
checkID := types.CheckID("redis:1")
|
||||||
chk := &structs.HealthCheck{
|
chk := &structs.HealthCheck{
|
||||||
|
|
Loading…
Reference in New Issue