From 04e6e79b09fe915b35e6cfa2ae0119390f6db7d5 Mon Sep 17 00:00:00 2001 From: Freddy Date: Mon, 3 Apr 2023 12:07:45 -0600 Subject: [PATCH] Allow dialer to re-establish terminated peering (#16776) Currently, if an acceptor peer deletes a peering the dialer's peering will eventually get to a "terminated" state. If the two clusters need to be re-peered the acceptor will re-generate the token but the dialer will encounter this error on the call to establish: "failed to get addresses to dial peer: failed to refresh peer server addresses, will continue to use initial addresses: there is no active peering for "<<>>"" This is because in `exchangeSecret().GetDialAddresses()` we will get an error if fetching addresses for an inactive peering. The peering shows up as inactive at this point because of the existing terminated state. Rather than checking whether a peering is active we can instead check whether it was deleted. This way users do not need to delete terminated peerings in the dialing cluster before re-establishing them. --- .changelog/16776.txt | 3 +++ agent/consul/leader_peering_test.go | 24 ++++++++++++++++++++++++ agent/consul/peering_backend.go | 7 +++++-- agent/consul/peering_backend_test.go | 23 +++++++++++++++++++++-- 4 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 .changelog/16776.txt diff --git a/.changelog/16776.txt b/.changelog/16776.txt new file mode 100644 index 000000000..0159aee85 --- /dev/null +++ b/.changelog/16776.txt @@ -0,0 +1,3 @@ +```release-note:improvement +peering: allow re-establishing terminated peering from new token without deleting existing peering first. +``` \ No newline at end of file diff --git a/agent/consul/leader_peering_test.go b/agent/consul/leader_peering_test.go index 7ac8b387d..55c595fd5 100644 --- a/agent/consul/leader_peering_test.go +++ b/agent/consul/leader_peering_test.go @@ -472,6 +472,30 @@ func TestLeader_PeeringSync_Lifecycle_ServerDeletion(t *testing.T) { require.NoError(r, err) require.Equal(r, pbpeering.PeeringState_TERMINATED, peering.State) }) + + // Re-establishing a peering terminated by the acceptor should be possible + // without needing to delete the terminated peering first. + ctx, cancel = context.WithTimeout(context.Background(), 3*time.Second) + t.Cleanup(cancel) + + req = pbpeering.GenerateTokenRequest{ + PeerName: "my-peer-dialer", + } + resp, err = peeringClient.GenerateToken(ctx, &req) + require.NoError(t, err) + + tokenJSON, err = base64.StdEncoding.DecodeString(resp.PeeringToken) + require.NoError(t, err) + + token = structs.PeeringToken{} + require.NoError(t, json.Unmarshal(tokenJSON, &token)) + + establishReq = pbpeering.EstablishRequest{ + PeerName: "my-peer-acceptor", + PeeringToken: resp.PeeringToken, + } + _, err = dialerClient.Establish(ctx, &establishReq) + require.NoError(t, err) } func TestLeader_PeeringSync_FailsForTLSError(t *testing.T) { diff --git a/agent/consul/peering_backend.go b/agent/consul/peering_backend.go index 182d926c7..1771be10f 100644 --- a/agent/consul/peering_backend.go +++ b/agent/consul/peering_backend.go @@ -150,8 +150,11 @@ func (b *PeeringBackend) fetchPeerServerAddresses(ws memdb.WatchSet, peerID stri if err != nil { return nil, fmt.Errorf("failed to fetch peer %q: %w", peerID, err) } - if !peering.IsActive() { - return nil, fmt.Errorf("there is no active peering for %q", peerID) + if peering == nil { + return nil, fmt.Errorf("unknown peering %q", peerID) + } + if peering.DeletedAt != nil && !structs.IsZeroProtoTime(peering.DeletedAt) { + return nil, fmt.Errorf("peering %q was deleted", peerID) } return bufferFromAddresses(peering.GetAddressesToDial()) } diff --git a/agent/consul/peering_backend_test.go b/agent/consul/peering_backend_test.go index 648873f94..648052b7a 100644 --- a/agent/consul/peering_backend_test.go +++ b/agent/consul/peering_backend_test.go @@ -256,7 +256,7 @@ func TestPeeringBackend_GetDialAddresses(t *testing.T) { }, peerID: acceptorPeerID, expect: expectation{ - err: fmt.Sprintf(`there is no active peering for %q`, acceptorPeerID), + err: fmt.Sprintf(`unknown peering %q`, acceptorPeerID), }, }, { @@ -387,6 +387,25 @@ func TestPeeringBackend_GetDialAddresses(t *testing.T) { gatewayAddrs: []string{"5.6.7.8:8443", "6.7.8.9:8443"}, }, }, + { + name: "addresses are returned if the peering is marked as terminated", + setup: func(store *state.Store) { + require.NoError(t, store.PeeringWrite(5, &pbpeering.PeeringWriteRequest{ + Peering: &pbpeering.Peering{ + Name: "dialer", + ID: dialerPeerID, + PeerServerAddresses: []string{"1.2.3.4:8502", "2.3.4.5:8503"}, + State: pbpeering.PeeringState_TERMINATED, + }, + })) + }, + peerID: dialerPeerID, + expect: expectation{ + // Gateways come first, and we use their LAN addresses since this is for outbound communication. + addrs: []string{"5.6.7.8:8443", "6.7.8.9:8443", "1.2.3.4:8502", "2.3.4.5:8503"}, + gatewayAddrs: []string{"5.6.7.8:8443", "6.7.8.9:8443"}, + }, + }, { name: "addresses are not returned if the peering is deleted", setup: func(store *state.Store) { @@ -404,7 +423,7 @@ func TestPeeringBackend_GetDialAddresses(t *testing.T) { }, peerID: dialerPeerID, expect: expectation{ - err: fmt.Sprintf(`there is no active peering for %q`, dialerPeerID), + err: fmt.Sprintf(`peering %q was deleted`, dialerPeerID), }, }, }