Merge pull request #7713 from hashicorp/dnephin/connect-proxy-passive-healthcheck

xds: Add passive health check config for upstreams (aka envoy outlier detection)
This commit is contained in:
Daniel Nephin 2020-05-08 15:48:50 -04:00 committed by GitHub
commit a8adcf2a96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 90 additions and 11 deletions

View File

@ -360,8 +360,7 @@ func (s *Server) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream,
CircuitBreakers: &envoycluster.CircuitBreakers{ CircuitBreakers: &envoycluster.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits), Thresholds: makeThresholdsIfNeeded(cfg.Limits),
}, },
// Having an empty config enables outlier detection with default config. OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
OutlierDetection: &envoycluster.OutlierDetection{},
} }
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" { if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{} c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{}
@ -462,8 +461,7 @@ func (s *Server) makeUpstreamClustersForDiscoveryChain(
CircuitBreakers: &envoycluster.CircuitBreakers{ CircuitBreakers: &envoycluster.CircuitBreakers{
Thresholds: makeThresholdsIfNeeded(cfg.Limits), Thresholds: makeThresholdsIfNeeded(cfg.Limits),
}, },
// Having an empty config enables outlier detection with default config. OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
OutlierDetection: &envoycluster.OutlierDetection{},
} }
proto := cfg.Protocol proto := cfg.Protocol

View File

@ -1,10 +1,13 @@
package xds package xds
import ( import (
"github.com/hashicorp/consul/lib"
"strings" "strings"
"time"
envoycluster "github.com/envoyproxy/go-control-plane/envoy/api/v2/cluster"
"github.com/gogo/protobuf/types"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
) )
@ -160,11 +163,48 @@ type UpstreamConfig struct {
// Limits are the set of limits that are applied to the proxy for a specific upstream of a // Limits are the set of limits that are applied to the proxy for a specific upstream of a
// service instance. // service instance.
Limits UpstreamLimits `mapstructure:"limits"` Limits UpstreamLimits `mapstructure:"limits"`
// PassiveHealthCheck configuration
PassiveHealthCheck PassiveHealthCheck `mapstructure:"passive_health_check"`
}
type PassiveHealthCheck struct {
// Interval between health check analysis sweeps. Each sweep may remove
// hosts or return hosts to the pool.
Interval time.Duration
// MaxFailures is the count of consecutive failures that results in a host
// being removed from the pool.
MaxFailures uint32 `mapstructure:"max_failures"`
}
// Return an envoy.OutlierDetection populated by the values from this struct.
// If all values are zero a default empty OutlierDetection will be returned to
// enable outlier detection with default values.
func (p PassiveHealthCheck) AsOutlierDetection() *envoycluster.OutlierDetection {
od := &envoycluster.OutlierDetection{}
if p.Interval != 0 {
od.Interval = types.DurationProto(p.Interval)
}
if p.MaxFailures != 0 {
od.Consecutive_5Xx = &types.UInt32Value{Value: p.MaxFailures}
}
return od
} }
func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) { func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) {
var cfg UpstreamConfig var cfg UpstreamConfig
err := mapstructure.WeakDecode(m, &cfg) config := &mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
Result: &cfg,
WeaklyTypedInput: true,
}
decoder, err := mapstructure.NewDecoder(config)
if err != nil {
return cfg, err
}
err = decoder.Decode(m)
return cfg, err return cfg, err
} }

View File

@ -1,9 +1,10 @@
package xds package xds
import ( import (
"github.com/hashicorp/consul/agent/structs"
"testing" "testing"
"time"
"github.com/hashicorp/consul/agent/structs"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -244,6 +245,23 @@ func TestParseUpstreamConfig(t *testing.T) {
}, },
}, },
}, },
{
name: "passive health check map",
input: map[string]interface{}{
"passive_health_check": map[string]interface{}{
"interval": "22s",
"max_failures": 7,
},
},
want: UpstreamConfig{
ConnectTimeoutMs: 5000,
Protocol: "tcp",
PassiveHealthCheck: PassiveHealthCheck{
Interval: 22 * time.Second,
MaxFailures: 7,
},
},
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {

View File

@ -14,6 +14,10 @@ services {
max_pending_requests = 4 max_pending_requests = 4
max_concurrent_requests = 5 max_concurrent_requests = 5
} }
passive_health_check {
interval = "22s"
max_failures = 4
}
} }
} }
] ]

View File

@ -27,7 +27,7 @@ load helpers
} }
@test "s1 proxy should have been configured with max_connections on the cluster" { @test "s1 proxy should have been configured with max_connections on the cluster" {
CLUSTER_THRESHOLD=$(get_envoy_cluster_threshold localhost:19000 s2.default.primary) CLUSTER_THRESHOLD=$(get_envoy_cluster_config localhost:19000 s2.default.primary | jq '.circuit_breakers.thresholds[0]')
echo $CLUSTER_THRESHOLD echo $CLUSTER_THRESHOLD
MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections') MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections')
@ -42,3 +42,11 @@ load helpers
[ "$MAX_PENDING_REQS" = "4" ] [ "$MAX_PENDING_REQS" = "4" ]
[ "$MAX_REQS" = "5" ] [ "$MAX_REQS" = "5" ]
} }
@test "s1 proxy should have been configured with passive_health_check" {
CLUSTER_CONFIG=$(get_envoy_cluster_config localhost:19000 s2.default.primary)
echo $CLUSTER_CONFIG
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.consecutive_5xx')" = "4" ]
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.interval')" = "22s" ]
}

View File

@ -156,7 +156,7 @@ function get_envoy_listener_filters {
echo "$output" | jq --raw-output "$QUERY" echo "$output" | jq --raw-output "$QUERY"
} }
function get_envoy_cluster_threshold { function get_envoy_cluster_config {
local HOSTPORT=$1 local HOSTPORT=$1
local CLUSTER_NAME=$2 local CLUSTER_NAME=$2
run retry_default curl -s -f $HOSTPORT/config_dump run retry_default curl -s -f $HOSTPORT/config_dump
@ -164,7 +164,7 @@ function get_envoy_cluster_threshold {
echo "$output" | jq --raw-output " echo "$output" | jq --raw-output "
.configs[1].dynamic_active_clusters[] .configs[1].dynamic_active_clusters[]
| select(.cluster.name|startswith(\"${CLUSTER_NAME}\")) | select(.cluster.name|startswith(\"${CLUSTER_NAME}\"))
| .cluster.circuit_breakers.thresholds[0] | .cluster
" "
} }

View File

@ -277,6 +277,17 @@ definition](/docs/connect/registration/service-registration) or
since HTTP/2 has many requests per connection. For this configuration to be since HTTP/2 has many requests per connection. For this configuration to be
respected, a L7 protocol must be defined in the `protocol` field. respected, a L7 protocol must be defined in the `protocol` field.
- `passive_health_check` - Passive health checks are used to remove hosts from
the upstream cluster which are unreachable or are returning errors.
- `interval` - The time between checks. Each check will cause hosts which
have exceeded `max_failures` to be removed from the load balancer, and
any hosts which have passed their ejection time to be returned to the
load balancer.
- `max_failures` - The number of consecutive failures which cause a host to be
removed from the load balancer.
### Gateway Options ### Gateway Options
These fields may also be overridden explicitly in the [proxy service These fields may also be overridden explicitly in the [proxy service