From 6ca1c9f15c1a1439b0778119f014b033df8968bf Mon Sep 17 00:00:00 2001 From: Andrew Stucki Date: Fri, 3 Mar 2023 09:56:57 -0500 Subject: [PATCH] Fix issue where terminating gateway service resolvers weren't properly cleaned up (#16498) * Fix issue where terminating gateway service resolvers weren't properly cleaned up * Add integration test for cleaning up resolvers * Add changelog entry * Use state test and drop integration test --- .changelog/16498.txt | 3 +++ agent/proxycfg/state_test.go | 22 +++++++++++++++++++++ agent/proxycfg/terminating_gateway.go | 7 ++++++- test/integration/connect/envoy/helpers.bash | 18 +++++++++++++++-- 4 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 .changelog/16498.txt diff --git a/.changelog/16498.txt b/.changelog/16498.txt new file mode 100644 index 000000000..cdb045d67 --- /dev/null +++ b/.changelog/16498.txt @@ -0,0 +1,3 @@ +```release-note:bug +proxycfg: fix a bug where terminating gateways were not cleaning up deleted service resolvers for their referenced services +``` diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index b3cf47a42..da4de7960 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -2093,6 +2093,28 @@ func TestState_WatchesAndUpdates(t *testing.T) { require.Equal(t, dbResolver.Entry, snap.TerminatingGateway.ServiceResolvers[db]) }, }, + { + requiredWatches: map[string]verifyWatchRequest{ + "service-resolver:" + db.String(): genVerifyResolverWatch("db", "dc1", structs.ServiceResolver), + }, + events: []UpdateEvent{ + { + CorrelationID: "service-resolver:" + db.String(), + Result: &structs.ConfigEntryResponse{ + Entry: nil, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "gateway with service list is valid") + // Finally ensure we cleaned up the resolver + require.Equal(t, []structs.ServiceName{db}, snap.TerminatingGateway.ValidServices()) + + require.False(t, snap.TerminatingGateway.ServiceResolversSet[db]) + require.Nil(t, snap.TerminatingGateway.ServiceResolvers[db]) + }, + }, { events: []UpdateEvent{ { diff --git a/agent/proxycfg/terminating_gateway.go b/agent/proxycfg/terminating_gateway.go index cb371ae2b..483c79f91 100644 --- a/agent/proxycfg/terminating_gateway.go +++ b/agent/proxycfg/terminating_gateway.go @@ -354,8 +354,13 @@ func (s *handlerTerminatingGateway) handleUpdate(ctx context.Context, u UpdateEv // There should only ever be one entry for a service resolver within a namespace if resolver, ok := resp.Entry.(*structs.ServiceResolverConfigEntry); ok { snap.TerminatingGateway.ServiceResolvers[sn] = resolver + snap.TerminatingGateway.ServiceResolversSet[sn] = true + } else { + // we likely have a deleted service resolver, and our cast is a nil + // cast, so clear this out + delete(snap.TerminatingGateway.ServiceResolvers, sn) + snap.TerminatingGateway.ServiceResolversSet[sn] = false } - snap.TerminatingGateway.ServiceResolversSet[sn] = true case strings.HasPrefix(u.CorrelationID, serviceIntentionsIDPrefix): resp, ok := u.Result.(structs.Intentions) diff --git a/test/integration/connect/envoy/helpers.bash b/test/integration/connect/envoy/helpers.bash index c7746d926..65bbe3b00 100755 --- a/test/integration/connect/envoy/helpers.bash +++ b/test/integration/connect/envoy/helpers.bash @@ -383,15 +383,27 @@ function assert_upstream_has_endpoints_in_status_once { GOT_COUNT=$(get_upstream_endpoint_in_status_count $HOSTPORT $CLUSTER_NAME $HEALTH_STATUS) + echo "GOT: $GOT_COUNT" [ "$GOT_COUNT" -eq $EXPECT_COUNT ] } +function assert_upstream_missing_once { + local HOSTPORT=$1 + local CLUSTER_NAME=$2 + + run get_upstream_endpoint $HOSTPORT $CLUSTER_NAME + [ "$status" -eq 0 ] + echo "$output" + [ "" == "$output" ] +} + function assert_upstream_missing { local HOSTPORT=$1 local CLUSTER_NAME=$2 - run retry_default get_upstream_endpoint $HOSTPORT $CLUSTER_NAME + run retry_long assert_upstream_missing_once $HOSTPORT $CLUSTER_NAME echo "OUTPUT: $output $status" - [ "" == "$output" ] + + [ "$status" -eq 0 ] } function assert_upstream_has_endpoints_in_status { @@ -400,6 +412,8 @@ function assert_upstream_has_endpoints_in_status { local HEALTH_STATUS=$3 local EXPECT_COUNT=$4 run retry_long assert_upstream_has_endpoints_in_status_once $HOSTPORT $CLUSTER_NAME $HEALTH_STATUS $EXPECT_COUNT + echo "$output" + [ "$status" -eq 0 ] }