Merge pull request #7713 from hashicorp/dnephin/connect-proxy-passive-healthcheck
xds: Add passive health check config for upstreams (aka envoy outlier detection)
This commit is contained in:
commit
a8adcf2a96
|
@ -360,8 +360,7 @@ func (s *Server) makeUpstreamClusterForPreparedQuery(upstream structs.Upstream,
|
||||||
CircuitBreakers: &envoycluster.CircuitBreakers{
|
CircuitBreakers: &envoycluster.CircuitBreakers{
|
||||||
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
||||||
},
|
},
|
||||||
// Having an empty config enables outlier detection with default config.
|
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
|
||||||
OutlierDetection: &envoycluster.OutlierDetection{},
|
|
||||||
}
|
}
|
||||||
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
|
if cfg.Protocol == "http2" || cfg.Protocol == "grpc" {
|
||||||
c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{}
|
c.Http2ProtocolOptions = &envoycore.Http2ProtocolOptions{}
|
||||||
|
@ -462,8 +461,7 @@ func (s *Server) makeUpstreamClustersForDiscoveryChain(
|
||||||
CircuitBreakers: &envoycluster.CircuitBreakers{
|
CircuitBreakers: &envoycluster.CircuitBreakers{
|
||||||
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
Thresholds: makeThresholdsIfNeeded(cfg.Limits),
|
||||||
},
|
},
|
||||||
// Having an empty config enables outlier detection with default config.
|
OutlierDetection: cfg.PassiveHealthCheck.AsOutlierDetection(),
|
||||||
OutlierDetection: &envoycluster.OutlierDetection{},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
proto := cfg.Protocol
|
proto := cfg.Protocol
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
package xds
|
package xds
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/hashicorp/consul/lib"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
envoycluster "github.com/envoyproxy/go-control-plane/envoy/api/v2/cluster"
|
||||||
|
"github.com/gogo/protobuf/types"
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/lib"
|
||||||
"github.com/mitchellh/mapstructure"
|
"github.com/mitchellh/mapstructure"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -160,11 +163,48 @@ type UpstreamConfig struct {
|
||||||
// Limits are the set of limits that are applied to the proxy for a specific upstream of a
|
// Limits are the set of limits that are applied to the proxy for a specific upstream of a
|
||||||
// service instance.
|
// service instance.
|
||||||
Limits UpstreamLimits `mapstructure:"limits"`
|
Limits UpstreamLimits `mapstructure:"limits"`
|
||||||
|
|
||||||
|
// PassiveHealthCheck configuration
|
||||||
|
PassiveHealthCheck PassiveHealthCheck `mapstructure:"passive_health_check"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PassiveHealthCheck struct {
|
||||||
|
// Interval between health check analysis sweeps. Each sweep may remove
|
||||||
|
// hosts or return hosts to the pool.
|
||||||
|
Interval time.Duration
|
||||||
|
// MaxFailures is the count of consecutive failures that results in a host
|
||||||
|
// being removed from the pool.
|
||||||
|
MaxFailures uint32 `mapstructure:"max_failures"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an envoy.OutlierDetection populated by the values from this struct.
|
||||||
|
// If all values are zero a default empty OutlierDetection will be returned to
|
||||||
|
// enable outlier detection with default values.
|
||||||
|
func (p PassiveHealthCheck) AsOutlierDetection() *envoycluster.OutlierDetection {
|
||||||
|
od := &envoycluster.OutlierDetection{}
|
||||||
|
if p.Interval != 0 {
|
||||||
|
od.Interval = types.DurationProto(p.Interval)
|
||||||
|
}
|
||||||
|
if p.MaxFailures != 0 {
|
||||||
|
od.Consecutive_5Xx = &types.UInt32Value{Value: p.MaxFailures}
|
||||||
|
}
|
||||||
|
return od
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) {
|
func ParseUpstreamConfigNoDefaults(m map[string]interface{}) (UpstreamConfig, error) {
|
||||||
var cfg UpstreamConfig
|
var cfg UpstreamConfig
|
||||||
err := mapstructure.WeakDecode(m, &cfg)
|
config := &mapstructure.DecoderConfig{
|
||||||
|
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
|
||||||
|
Result: &cfg,
|
||||||
|
WeaklyTypedInput: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
decoder, err := mapstructure.NewDecoder(config)
|
||||||
|
if err != nil {
|
||||||
|
return cfg, err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = decoder.Decode(m)
|
||||||
return cfg, err
|
return cfg, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
package xds
|
package xds
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -244,6 +245,23 @@ func TestParseUpstreamConfig(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "passive health check map",
|
||||||
|
input: map[string]interface{}{
|
||||||
|
"passive_health_check": map[string]interface{}{
|
||||||
|
"interval": "22s",
|
||||||
|
"max_failures": 7,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: UpstreamConfig{
|
||||||
|
ConnectTimeoutMs: 5000,
|
||||||
|
Protocol: "tcp",
|
||||||
|
PassiveHealthCheck: PassiveHealthCheck{
|
||||||
|
Interval: 22 * time.Second,
|
||||||
|
MaxFailures: 7,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
|
|
@ -13,4 +13,4 @@ services {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,10 @@ services {
|
||||||
max_pending_requests = 4
|
max_pending_requests = 4
|
||||||
max_concurrent_requests = 5
|
max_concurrent_requests = 5
|
||||||
}
|
}
|
||||||
|
passive_health_check {
|
||||||
|
interval = "22s"
|
||||||
|
max_failures = 4
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -27,7 +27,7 @@ load helpers
|
||||||
}
|
}
|
||||||
|
|
||||||
@test "s1 proxy should have been configured with max_connections on the cluster" {
|
@test "s1 proxy should have been configured with max_connections on the cluster" {
|
||||||
CLUSTER_THRESHOLD=$(get_envoy_cluster_threshold localhost:19000 s2.default.primary)
|
CLUSTER_THRESHOLD=$(get_envoy_cluster_config localhost:19000 s2.default.primary | jq '.circuit_breakers.thresholds[0]')
|
||||||
echo $CLUSTER_THRESHOLD
|
echo $CLUSTER_THRESHOLD
|
||||||
|
|
||||||
MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections')
|
MAX_CONNS=$(echo $CLUSTER_THRESHOLD | jq --raw-output '.max_connections')
|
||||||
|
@ -42,3 +42,11 @@ load helpers
|
||||||
[ "$MAX_PENDING_REQS" = "4" ]
|
[ "$MAX_PENDING_REQS" = "4" ]
|
||||||
[ "$MAX_REQS" = "5" ]
|
[ "$MAX_REQS" = "5" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "s1 proxy should have been configured with passive_health_check" {
|
||||||
|
CLUSTER_CONFIG=$(get_envoy_cluster_config localhost:19000 s2.default.primary)
|
||||||
|
echo $CLUSTER_CONFIG
|
||||||
|
|
||||||
|
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.consecutive_5xx')" = "4" ]
|
||||||
|
[ "$(echo $CLUSTER_CONFIG | jq --raw-output '.outlier_detection.interval')" = "22s" ]
|
||||||
|
}
|
|
@ -156,7 +156,7 @@ function get_envoy_listener_filters {
|
||||||
echo "$output" | jq --raw-output "$QUERY"
|
echo "$output" | jq --raw-output "$QUERY"
|
||||||
}
|
}
|
||||||
|
|
||||||
function get_envoy_cluster_threshold {
|
function get_envoy_cluster_config {
|
||||||
local HOSTPORT=$1
|
local HOSTPORT=$1
|
||||||
local CLUSTER_NAME=$2
|
local CLUSTER_NAME=$2
|
||||||
run retry_default curl -s -f $HOSTPORT/config_dump
|
run retry_default curl -s -f $HOSTPORT/config_dump
|
||||||
|
@ -164,7 +164,7 @@ function get_envoy_cluster_threshold {
|
||||||
echo "$output" | jq --raw-output "
|
echo "$output" | jq --raw-output "
|
||||||
.configs[1].dynamic_active_clusters[]
|
.configs[1].dynamic_active_clusters[]
|
||||||
| select(.cluster.name|startswith(\"${CLUSTER_NAME}\"))
|
| select(.cluster.name|startswith(\"${CLUSTER_NAME}\"))
|
||||||
| .cluster.circuit_breakers.thresholds[0]
|
| .cluster
|
||||||
"
|
"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -277,6 +277,17 @@ definition](/docs/connect/registration/service-registration) or
|
||||||
since HTTP/2 has many requests per connection. For this configuration to be
|
since HTTP/2 has many requests per connection. For this configuration to be
|
||||||
respected, a L7 protocol must be defined in the `protocol` field.
|
respected, a L7 protocol must be defined in the `protocol` field.
|
||||||
|
|
||||||
|
- `passive_health_check` - Passive health checks are used to remove hosts from
|
||||||
|
the upstream cluster which are unreachable or are returning errors.
|
||||||
|
|
||||||
|
- `interval` - The time between checks. Each check will cause hosts which
|
||||||
|
have exceeded `max_failures` to be removed from the load balancer, and
|
||||||
|
any hosts which have passed their ejection time to be returned to the
|
||||||
|
load balancer.
|
||||||
|
- `max_failures` - The number of consecutive failures which cause a host to be
|
||||||
|
removed from the load balancer.
|
||||||
|
|
||||||
|
|
||||||
### Gateway Options
|
### Gateway Options
|
||||||
|
|
||||||
These fields may also be overridden explicitly in the [proxy service
|
These fields may also be overridden explicitly in the [proxy service
|
||||||
|
|
Loading…
Reference in New Issue