xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists (#9651)

Also fix a similar issue in Terminating Gateways that was masked by an overzealous test.
This commit is contained in:
R.B. Boyer 2021-02-08 10:19:57 -06:00 committed by GitHub
parent df0f93f850
commit 77424e179a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 14 deletions

3
.changelog/9651.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists; also prevent some flaps in terminating gateways as well
```

View File

@ -3,6 +3,7 @@ package proxycfg
import ( import (
"context" "context"
"fmt" "fmt"
"sort"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/mitchellh/copystructure" "github.com/mitchellh/copystructure"
@ -245,6 +246,10 @@ func (c *configSnapshotMeshGateway) Datacenters() []string {
dcs = append(dcs, dc) dcs = append(dcs, dc)
} }
} }
// Always sort the results to ensure we generate deterministic things over
// xDS, such as mesh-gateway listener filter chains.
sort.Strings(dcs)
return dcs return dcs
} }

View File

@ -777,6 +777,16 @@ func (s *Server) makeTerminatingGatewayListener(
} }
} }
// Before we add the fallback, sort these chains by the matched name. All
// of these filter chains are independent, but envoy requires them to be in
// some order. If we put them in a random order then every xDS iteration
// envoy will force the listener to be replaced. Sorting these has no
// effect on how they operate, but it does mean that we won't churn
// listeners at idle.
sort.Slice(l.FilterChains, func(i, j int) bool {
return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0]
})
// This fallback catch-all filter ensures a listener will be present for health checks to pass // This fallback catch-all filter ensures a listener will be present for health checks to pass
// Envoy will reset these connections since known endpoints are caught by filter chain matches above // Envoy will reset these connections since known endpoints are caught by filter chain matches above
tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.") tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.")

View File

@ -525,24 +525,14 @@ func TestListenersFromSnapshot(t *testing.T) {
ProxyFeatures: sf, ProxyFeatures: sf,
} }
listeners, err := s.listenersFromSnapshot(cInfo, snap) listeners, err := s.listenersFromSnapshot(cInfo, snap)
require.NoError(err)
// The order of listeners returned via LDS isn't relevant, so it's safe
// to sort these for the purposes of test comparisons.
sort.Slice(listeners, func(i, j int) bool { sort.Slice(listeners, func(i, j int) bool {
return listeners[i].(*envoy.Listener).Name < listeners[j].(*envoy.Listener).Name return listeners[i].(*envoy.Listener).Name < listeners[j].(*envoy.Listener).Name
}) })
// For terminating gateways we create filter chain matches for services/subsets from the ServiceGroups map
for i := 0; i < len(listeners); i++ {
l := listeners[i].(*envoy.Listener)
if l.FilterChains != nil {
// Sort chains by the matched name with the exception of the last one
// The last chain is a fallback and does not have a FilterChainMatch
sort.Slice(l.FilterChains[:len(l.FilterChains)-1], func(i, j int) bool {
return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0]
})
}
}
require.NoError(err)
r, err := createResponse(ListenerType, "00000001", "00000001", listeners) r, err := createResponse(ListenerType, "00000001", "00000001", listeners)
require.NoError(err) require.NoError(err)