diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index 5cb2bff43..f74b6e5dd 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -360,8 +360,7 @@ func (s *Server) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream, CircuitBreakers: &envoycluster.CircuitBreakers{ Thresholds: makeThresholdsIfNeeded(cfg.Limits), }, - // Having an empty config enables outlier detection with default config. - OutlierDetection: &envoycluster.OutlierDetection{}, + OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(), } if cfg.Protocol == "http2" || cfg.Protocol == "grpc" { c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{} @@ -462,8 +461,7 @@ func (s *Server) makeUpstreamClustersForDiscoveryChain( CircuitBreakers: &envoycluster.CircuitBreakers{ Thresholds: makeThresholdsIfNeeded(cfg.Limits), }, - // Having an empty config enables outlier detection with default config. - OutlierDetection: &envoycluster.OutlierDetection{}, + OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(), } proto := cfg.Protocol diff --git a/agent/xds/config.go b/agent/xds/config.go index a97161e4d..20f07ae63 100644 --- a/agent/xds/config.go +++ b/agent/xds/config.go @@ -1,10 +1,13 @@ package xds import ( - "github.com/hashicorp/consul/lib" "strings" + "time" + envoycluster "github.com/envoyproxy/go-control-plane/envoy/api/v2/cluster" + "github.com/gogo/protobuf/types" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/lib" "github.com/mitchellh/mapstructure" ) @@ -160,6 +163,32 @@ type UpstreamConfig struct { // Limits are the set of limits that are applied to the proxy for a specific upstream of a // service instance. Limits UpstreamLimits `mapstructure:"limits"` + + // PassiveHealthCheck configuration + PassiveHealthCheck PassiveHealthCheck `mapstructure:"passive_health_check"` +} + +type PassiveHealthCheck struct { + // Interval between health check analysis sweeps. Each sweep may remove + // hosts or return hosts to the pool. + Interval time.Duration + // MaxFailures is the count of consecutive failures that results in a host + // being removed from the pool. + MaxFailures uint32 `mapstructure:"max_failures"` +} + +// Return an envoy.OutlierDetection populated by the values from this struct. +// If all values are zero a default empty OutlierDetection will be returned to +// enable outlier detection with default values. +func (p PassiveHealthCheck) AsOutlierDetection() *envoycluster.OutlierDetection { + od := &envoycluster.OutlierDetection{} + if p.Interval != 0 { + od.Interval = types.DurationProto(p.Interval) + } + if p.MaxFailures != 0 { + od.Consecutive_5Xx = &types.UInt32Value{Value: p.MaxFailures} + } + return od } func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) { diff --git a/agent/xds/config_test.go b/agent/xds/config_test.go index 0957e6d6e..dea869f3d 100644 --- a/agent/xds/config_test.go +++ b/agent/xds/config_test.go @@ -1,9 +1,10 @@ package xds import ( - "github.com/hashicorp/consul/agent/structs" "testing" + "time" + "github.com/hashicorp/consul/agent/structs" "github.com/stretchr/testify/require" ) @@ -244,6 +245,23 @@ func TestParseUpstreamConfig(t *testing.T) { }, }, }, + { + name: "passive health check map", + input: map[string]interface{}{ + "passive_health_check": map[string]interface{}{ + "interval": 22 * time.Second, + "max_failures": 7, + }, + }, + want: UpstreamConfig{ + ConnectTimeoutMs: 5000, + Protocol: "tcp", + PassiveHealthCheck: PassiveHealthCheck{ + Interval: 22 * time.Second, + MaxFailures: 7, + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/website/pages/docs/connect/proxies/envoy.mdx b/website/pages/docs/connect/proxies/envoy.mdx index a028fb3ba..8209ea7ea 100644 --- a/website/pages/docs/connect/proxies/envoy.mdx +++ b/website/pages/docs/connect/proxies/envoy.mdx @@ -277,6 +277,17 @@ definition](/docs/connect/registration/service-registration) or since HTTP/2 has many requests per connection. For this configuration to be respected, a L7 protocol must be defined in the `protocol` field. +- `passive_health_check` - Passive health checks are used to remove hosts from + the upstream cluster which are unreachable or are returning errors. + + - `interval` - The time between checks. Each check will cause hosts which + have exceeded `max_failures` to be removed from the load balancer, and + any hosts which have passed their ejection time to be returned to the + load balancer. + - `max_failures` - The number of consecutive failures which cause a host to be + removed from the load balancer. + + ### Gateway Options These fields may also be overridden explicitly in the [proxy service