Merge pull request #7713 from hashicorp/dnephin/connect-proxy-passive-healthcheck
xds: Add passive health check config for upstreams (aka envoy outlier detection)
This commit is contained in:
commit
a8adcf2a96
|
@ -360,8 +360,7 @@ func (s *Server) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream,
|
|||
CircuitBreakers: &envoycluster.CircuitBreakers{
|
||||
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
||||
},
|
||||
// Having an empty config enables outlier detection with default config.
|
||||
OutlierDetection: &envoycluster.OutlierDetection{},
|
||||
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
|
||||
}
|
||||
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
|
||||
c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{}
|
||||
|
@ -462,8 +461,7 @@ func (s *Server) makeUpstreamClustersForDiscoveryChain(
|
|||
CircuitBreakers: &envoycluster.CircuitBreakers{
|
||||
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
||||
},
|
||||
// Having an empty config enables outlier detection with default config.
|
||||
OutlierDetection: &envoycluster.OutlierDetection{},
|
||||
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
|
||||
}
|
||||
|
||||
proto := cfg.Protocol
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
package xds
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/consul/lib"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
envoycluster "github.com/envoyproxy/go-control-plane/envoy/api/v2/cluster"
|
||||
"github.com/gogo/protobuf/types"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/lib"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
|
@ -160,11 +163,48 @@ type UpstreamConfig struct {
|
|||
// Limits are the set of limits that are applied to the proxy for a specific upstream of a
|
||||
// service instance.
|
||||
Limits UpstreamLimits `mapstructure:"limits"`
|
||||
|
||||
// PassiveHealthCheck configuration
|
||||
PassiveHealthCheck PassiveHealthCheck `mapstructure:"passive_health_check"`
|
||||
}
|
||||
|
||||
type PassiveHealthCheck struct {
|
||||
// Interval between health check analysis sweeps. Each sweep may remove
|
||||
// hosts or return hosts to the pool.
|
||||
Interval time.Duration
|
||||
// MaxFailures is the count of consecutive failures that results in a host
|
||||
// being removed from the pool.
|
||||
MaxFailures uint32 `mapstructure:"max_failures"`
|
||||
}
|
||||
|
||||
// Return an envoy.OutlierDetection populated by the values from this struct.
|
||||
// If all values are zero a default empty OutlierDetection will be returned to
|
||||
// enable outlier detection with default values.
|
||||
func (p PassiveHealthCheck) AsOutlierDetection() *envoycluster.OutlierDetection {
|
||||
od := &envoycluster.OutlierDetection{}
|
||||
if p.Interval != 0 {
|
||||
od.Interval = types.DurationProto(p.Interval)
|
||||
}
|
||||
if p.MaxFailures != 0 {
|
||||
od.Consecutive_5Xx = &types.UInt32Value{Value: p.MaxFailures}
|
||||
}
|
||||
return od
|
||||
}
|
||||
|
||||
func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) {
|
||||
var cfg UpstreamConfig
|
||||
err := mapstructure.WeakDecode(m, &cfg)
|
||||
config := &mapstructure.DecoderConfig{
|
||||
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||
Result: &cfg,
|
||||
WeaklyTypedInput: true,
|
||||
}
|
||||
|
||||
decoder, err := mapstructure.NewDecoder(config)
|
||||
if err != nil {
|
||||
return cfg, err
|
||||
}
|
||||
|
||||
err = decoder.Decode(m)
|
||||
return cfg, err
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
package xds
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -244,6 +245,23 @@ func TestParseUpstreamConfig(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "passive health check map",
|
||||
input: map[string]interface{}{
|
||||
"passive_health_check": map[string]interface{}{
|
||||
"interval": "22s",
|
||||
"max_failures": 7,
|
||||
},
|
||||
},
|
||||
want: UpstreamConfig{
|
||||
ConnectTimeoutMs: 5000,
|
||||
Protocol: "tcp",
|
||||
PassiveHealthCheck: PassiveHealthCheck{
|
||||
Interval: 22 * time.Second,
|
||||
MaxFailures: 7,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
|
|
@ -13,4 +13,4 @@ services {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,10 @@ services {
|
|||
max_pending_requests = 4
|
||||
max_concurrent_requests = 5
|
||||
}
|
||||
passive_health_check {
|
||||
interval = "22s"
|
||||
max_failures = 4
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
|
@ -27,7 +27,7 @@ load helpers
|
|||
}
|
||||
|
||||
@test "s1 proxy should have been configured with max_connections on the cluster" {
|
||||
CLUSTER_THRESHOLD=$(get_envoy_cluster_threshold localhost:19000 s2.default.primary)
|
||||
CLUSTER_THRESHOLD=$(get_envoy_cluster_config localhost:19000 s2.default.primary | jq '.circuit_breakers.thresholds[0]')
|
||||
echo $CLUSTER_THRESHOLD
|
||||
|
||||
MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections')
|
||||
|
@ -42,3 +42,11 @@ load helpers
|
|||
[ "$MAX_PENDING_REQS" = "4" ]
|
||||
[ "$MAX_REQS" = "5" ]
|
||||
}
|
||||
|
||||
@test "s1 proxy should have been configured with passive_health_check" {
|
||||
CLUSTER_CONFIG=$(get_envoy_cluster_config localhost:19000 s2.default.primary)
|
||||
echo $CLUSTER_CONFIG
|
||||
|
||||
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.consecutive_5xx')" = "4" ]
|
||||
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.interval')" = "22s" ]
|
||||
}
|
|
@ -156,7 +156,7 @@ function get_envoy_listener_filters {
|
|||
echo "$output" | jq --raw-output "$QUERY"
|
||||
}
|
||||
|
||||
function get_envoy_cluster_threshold {
|
||||
function get_envoy_cluster_config {
|
||||
local HOSTPORT=$1
|
||||
local CLUSTER_NAME=$2
|
||||
run retry_default curl -s -f $HOSTPORT/config_dump
|
||||
|
@ -164,7 +164,7 @@ function get_envoy_cluster_threshold {
|
|||
echo "$output" | jq --raw-output "
|
||||
.configs[1].dynamic_active_clusters[]
|
||||
| select(.cluster.name|startswith(\"${CLUSTER_NAME}\"))
|
||||
| .cluster.circuit_breakers.thresholds[0]
|
||||
| .cluster
|
||||
"
|
||||
}
|
||||
|
||||
|
|
|
@ -277,6 +277,17 @@ definition](/docs/connect/registration/service-registration) or
|
|||
since HTTP/2 has many requests per connection. For this configuration to be
|
||||
respected, a L7 protocol must be defined in the `protocol` field.
|
||||
|
||||
- `passive_health_check` - Passive health checks are used to remove hosts from
|
||||
the upstream cluster which are unreachable or are returning errors.
|
||||
|
||||
- `interval` - The time between checks. Each check will cause hosts which
|
||||
have exceeded `max_failures` to be removed from the load balancer, and
|
||||
any hosts which have passed their ejection time to be returned to the
|
||||
load balancer.
|
||||
- `max_failures` - The number of consecutive failures which cause a host to be
|
||||
removed from the load balancer.
|
||||
|
||||
|
||||
### Gateway Options
|
||||
|
||||
These fields may also be overridden explicitly in the [proxy service
|
||||
|
|
Loading…
Reference in New Issue