Fix alias check leak

Preivously when alias check was removed it would not be stopped nor
cleaned up from the associated aliasChecks map.

This means that any time an alias check was deregistered we would
leak a goroutine for CheckAlias.run() because the stopCh would never
be closed.

This issue mostly affects service mesh deployments on platforms where
the client agent is mostly static but proxy services come and go
regularly, since by default sidecars are registered with an alias check.
This commit is contained in:
freddygv 2022-10-10 16:42:29 -06:00
parent a8695c88d4
commit 9f0ab69aef
2 changed files with 35 additions and 22 deletions

View File

@ -3263,7 +3263,10 @@ func (a *Agent) cancelCheckMonitors(checkID structs.CheckID) {
check.Stop()
delete(a.checkH2PINGs, checkID)
}
if check, ok := a.checkAliases[checkID]; ok {
check.Stop()
delete(a.checkAliases, checkID)
}
}
// updateTTLCheck is used to update the status of a TTL check via the Agent API.

View File

@ -1912,7 +1912,7 @@ node_name = "` + a.Config.NodeName + `"
}
}
func TestAgent_AddCheck_Alias(t *testing.T) {
func TestAgent_Alias_AddRemove(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
@ -1922,29 +1922,39 @@ func TestAgent_AddCheck_Alias(t *testing.T) {
a := NewTestAgent(t, "")
defer a.Shutdown()
health := &structs.HealthCheck{
Node: "foo",
CheckID: "aliashealth",
Name: "Alias health check",
Status: api.HealthCritical,
}
chk := &structs.CheckType{
AliasService: "foo",
}
err := a.AddCheck(health, chk, false, "", ConfigSourceLocal)
require.NoError(t, err)
cid := structs.NewCheckID("aliashealth", nil)
// Ensure we have a check mapping
sChk := requireCheckExists(t, a, "aliashealth")
require.Equal(t, api.HealthCritical, sChk.Status)
testutil.RunStep(t, "add check", func(t *testing.T) {
health := &structs.HealthCheck{
Node: "foo",
CheckID: cid.ID,
Name: "Alias health check",
Status: api.HealthCritical,
}
chk := &structs.CheckType{
AliasService: "foo",
}
err := a.AddCheck(health, chk, false, "", ConfigSourceLocal)
require.NoError(t, err)
chkImpl, ok := a.checkAliases[structs.NewCheckID("aliashealth", nil)]
require.True(t, ok, "missing aliashealth check")
require.Equal(t, "", chkImpl.RPCReq.Token)
sChk := requireCheckExists(t, a, cid.ID)
require.Equal(t, api.HealthCritical, sChk.Status)
cs := a.State.CheckState(structs.NewCheckID("aliashealth", nil))
require.NotNil(t, cs)
require.Equal(t, "", cs.Token)
chkImpl, ok := a.checkAliases[cid]
require.True(t, ok, "missing aliashealth check")
require.Equal(t, "", chkImpl.RPCReq.Token)
cs := a.State.CheckState(cid)
require.NotNil(t, cs)
require.Equal(t, "", cs.Token)
})
testutil.RunStep(t, "remove check", func(t *testing.T) {
require.NoError(t, a.RemoveCheck(cid, false))
requireCheckMissing(t, a, cid.ID)
requireCheckMissingMap(t, a.checkAliases, cid.ID)
})
}
func TestAgent_AddCheck_Alias_setToken(t *testing.T) {