diff --git a/agent/agent.go b/agent/agent.go index 83de1b357..391fbc21b 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1790,7 +1790,11 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType, } // Add to the local state for anti-entropy - a.state.AddCheck(check, token) + err := a.state.AddCheck(check, token) + if err != nil { + a.cancelCheckMonitors(check.CheckID) + return err + } // Persist the check if persist && !a.config.DevMode { @@ -1814,6 +1818,21 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error { a.checkLock.Lock() defer a.checkLock.Unlock() + a.cancelCheckMonitors(checkID) + + if persist { + if err := a.purgeCheck(checkID); err != nil { + return err + } + if err := a.purgeCheckState(checkID); err != nil { + return err + } + } + a.logger.Printf("[DEBUG] agent: removed check %q", checkID) + return nil +} + +func (a *Agent) cancelCheckMonitors(checkID types.CheckID) { // Stop any monitors delete(a.checkReapAfter, checkID) if check, ok := a.checkMonitors[checkID]; ok { @@ -1836,16 +1855,6 @@ func (a *Agent) RemoveCheck(checkID types.CheckID, persist bool) error { check.Stop() delete(a.checkDockers, checkID) } - if persist { - if err := a.purgeCheck(checkID); err != nil { - return err - } - if err := a.purgeCheckState(checkID); err != nil { - return err - } - } - a.logger.Printf("[DEBUG] agent: removed check %q", checkID) - return nil } // updateTTLCheck is used to update the status of a TTL check via the Agent API. diff --git a/agent/local.go b/agent/local.go index d71ca2211..decf7acdd 100644 --- a/agent/local.go +++ b/agent/local.go @@ -247,18 +247,25 @@ func (l *localState) checkToken(checkID types.CheckID) string { // AddCheck is used to add a health check to the local state. // This entry is persistent and the agent will make a best effort to // ensure it is registered -func (l *localState) AddCheck(check *structs.HealthCheck, token string) { +func (l *localState) AddCheck(check *structs.HealthCheck, token string) error { // Set the node name check.Node = l.config.NodeName l.Lock() defer l.Unlock() + // if there is a serviceID associated with the check, make sure it exists before adding it + // NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor + if check.ServiceID != "" && l.services[check.ServiceID] == nil { + return fmt.Errorf("ServiceID %q does not exist", check.ServiceID) + } + l.checks[check.CheckID] = check l.checkStatus[check.CheckID] = syncStatus{} l.checkTokens[check.CheckID] = token delete(l.checkCriticalTime, check.CheckID) l.changeMade() + return nil } // RemoveCheck is used to remove a health check from the local state. diff --git a/agent/local_test.go b/agent/local_test.go index de66480df..e1ae506c0 100644 --- a/agent/local_test.go +++ b/agent/local_test.go @@ -1475,6 +1475,9 @@ func TestAgent_checkCriticalTime(t *testing.T) { cfg := TestConfig() l := NewLocalState(cfg, nil) + svc := &structs.NodeService{ID: "redis", Service: "redis", Port: 8000} + l.AddService(svc, "") + // Add a passing check and make sure it's not critical. checkID := types.CheckID("redis:1") chk := &structs.HealthCheck{