diff --git a/.changelog/16097.txt b/.changelog/16097.txt new file mode 100644 index 000000000..e25bf8961 --- /dev/null +++ b/.changelog/16097.txt @@ -0,0 +1,3 @@ +```release-note:bug-fix +agent: Only use the `agent` token for internal deregistration of checks and services during anti-entropy syncing. The service token specified in the `token` field of the check or service definition is no longer used for deregistration. This fixes an issue where the agent failed to ever deregister a service or check if the service token was deleted. +``` diff --git a/agent/local/state.go b/agent/local/state.go index d6ff091ad..d3ffa1e02 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -1293,7 +1293,12 @@ func (l *State) deleteService(key structs.ServiceID) error { return fmt.Errorf("ServiceID missing") } - st := l.aclTokenForServiceSync(key, l.tokens.AgentToken) + // Always use the agent token to delete without trying the service token. + // This works because the agent token really must have node:write + // permission and node:write allows deregistration of services/checks on + // that node. Because the service token may have been deleted, using the + // agent token without fallback logic is a bit faster, simpler, and safer. + st := l.tokens.AgentToken() req := structs.DeregisterRequest{ Datacenter: l.config.Datacenter, Node: l.config.NodeName, @@ -1344,7 +1349,9 @@ func (l *State) deleteCheck(key structs.CheckID) error { return fmt.Errorf("CheckID missing") } - ct := l.aclTokenForCheckSync(key, l.tokens.AgentToken) + // Always use the agent token for deletion. Refer to deleteService() for + // an explanation. + ct := l.tokens.AgentToken() req := structs.DeregisterRequest{ Datacenter: l.config.Datacenter, Node: l.config.NodeName, diff --git a/agent/local/state_test.go b/agent/local/state_test.go index 63256f848..6052fa478 100644 --- a/agent/local/state_test.go +++ b/agent/local/state_test.go @@ -829,10 +829,6 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) { } var testRegisterRules = ` - node "" { - policy = "write" - } - service "api" { policy = "write" } @@ -863,6 +859,9 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.Shutdown() testrpc.WaitForLeader(t, a.RPC, "dc1") + // The agent token is the only token used for deleteService. + setAgentToken(t, a) + token := createToken(t, a, testRegisterRules) // Create service (disallowed) @@ -1153,15 +1152,19 @@ type RPC interface { func createToken(t *testing.T, rpc RPC, policyRules string) string { t.Helper() + uniqueId, err := uuid.GenerateUUID() + require.NoError(t, err) + policyName := "the-policy-" + uniqueId + reqPolicy := structs.ACLPolicySetRequest{ Datacenter: "dc1", Policy: structs.ACLPolicy{ - Name: "the-policy", + Name: policyName, Rules: policyRules, }, WriteRequest: structs.WriteRequest{Token: "root"}, } - err := rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{}) + err = rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{}) require.NoError(t, err) token, err := uuid.GenerateUUID() @@ -1171,7 +1174,7 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string { Datacenter: "dc1", ACLToken: structs.ACLToken{ SecretID: token, - Policies: []structs.ACLTokenPolicyLink{{Name: "the-policy"}}, + Policies: []structs.ACLTokenPolicyLink{{Name: policyName}}, }, WriteRequest: structs.WriteRequest{Token: "root"}, } @@ -1180,6 +1183,27 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string { return token } +// setAgentToken sets the 'agent' token for this agent. It creates a new token +// with node:write for the agent's node name, and service:write for any +// service. +func setAgentToken(t *testing.T, a *agent.TestAgent) { + var policy = fmt.Sprintf(` + node "%s" { + policy = "write" + } + service_prefix "" { + policy = "read" + } + `, a.Config.NodeName) + + token := createToken(t, a, policy) + + _, err := a.Client().Agent().UpdateAgentACLToken(token, &api.WriteOptions{Token: "root"}) + if err != nil { + t.Fatalf("setting agent token: %v", err) + } +} + func TestAgentAntiEntropy_Checks(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") @@ -1481,6 +1505,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, dc) + // The agent token is the only token used for deleteCheck. + setAgentToken(t, a) + token := createToken(t, a, testRegisterRules) // Create services using the root token