diff --git a/.changelog/14373.txt b/.changelog/14373.txt new file mode 100644 index 000000000..d9531b09e --- /dev/null +++ b/.changelog/14373.txt @@ -0,0 +1,3 @@ +```release-note:improvement +xds: Set `max_ejection_percent` on Envoy's outlier detection to 100% for peered services. +``` diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index adde810d3..c3ac71847 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -772,6 +772,13 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( clusterName := generatePeeredClusterName(uid, tbs) + outlierDetection := ToOutlierDetection(cfg.PassiveHealthCheck) + // We can't rely on health checks for services on cluster peers because they + // don't take into account service resolvers, splitters and routers. Setting + // MaxEjectionPercent too 100% gives outlier detection the power to eject the + // entire cluster. + outlierDetection.MaxEjectionPercent = &wrappers.UInt32Value{Value: 100} + s.Logger.Trace("generating cluster for", "cluster", clusterName) if c == nil { c = &envoy_cluster_v3.Cluster{ @@ -785,7 +792,7 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{ Thresholds: makeThresholdsIfNeeded(cfg.Limits), }, - OutlierDetection: ToOutlierDetection(cfg.PassiveHealthCheck), + OutlierDetection: outlierDetection, } if cfg.Protocol == "http2" || cfg.Protocol == "grpc" { if err := s.setHttp2ProtocolOptions(c); err != nil { diff --git a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden index d7c23515f..29059b143 100644 --- a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden +++ b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden @@ -58,7 +58,7 @@ "dnsRefreshRate": "10s", "dnsLookupFamily": "V4_ONLY", "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -115,7 +115,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { diff --git a/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden b/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden index 0dbbf4277..d1f6d0bb0 100644 --- a/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden +++ b/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden @@ -18,7 +18,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -75,7 +75,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -157,7 +157,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": {