diff --git a/agent/proxycfg/testing_upstreams.go b/agent/proxycfg/testing_upstreams.go index afb310c75..25679c692 100644 --- a/agent/proxycfg/testing_upstreams.go +++ b/agent/proxycfg/testing_upstreams.go @@ -692,6 +692,16 @@ func setupTestVariationDiscoveryChain( RetryOnConnectFailure: true, }, }, + { + Match: httpMatch(&structs.ServiceRouteHTTPMatch{ + PathPrefix: "/retry-reset", + }), + Destination: &structs.ServiceRouteDestination{ + Service: "retry-reset", + NumRetries: 15, + RetryOn: []string{"reset"}, + }, + }, { Match: httpMatch(&structs.ServiceRouteHTTPMatch{ PathPrefix: "/retry-codes", @@ -704,11 +714,12 @@ func setupTestVariationDiscoveryChain( }, { Match: httpMatch(&structs.ServiceRouteHTTPMatch{ - PathPrefix: "/retry-both", + PathPrefix: "/retry-all", }), Destination: &structs.ServiceRouteDestination{ - Service: "retry-both", + Service: "retry-all", RetryOnConnectFailure: true, + RetryOn: []string{"5xx", "gateway-error", "reset", "connect-failure", "envoy-ratelimited", "retriable-4xx", "refused-stream", "cancelled", "deadline-exceeded", "internal", "resource-exhausted", "unavailable"}, RetryOnStatusCodes: []uint32{401, 409, 451}, }, }, diff --git a/agent/structs/config_entry_discoverychain.go b/agent/structs/config_entry_discoverychain.go index 7867602ca..69800d993 100644 --- a/agent/structs/config_entry_discoverychain.go +++ b/agent/structs/config_entry_discoverychain.go @@ -228,6 +228,12 @@ func (e *ServiceRouterConfigEntry) Validate() error { if route.Destination.PrefixRewrite != "" && !eligibleForPrefixRewrite { return fmt.Errorf("Route[%d] cannot make use of PrefixRewrite without configuring either PathExact or PathPrefix", i) } + + for _, r := range route.Destination.RetryOn { + if !isValidRetryCondition(r) { + return fmt.Errorf("Route[%d] contains an invalid retry condition: %q", i, r) + } + } } } @@ -251,6 +257,26 @@ func isValidHTTPMethod(method string) bool { } } +func isValidRetryCondition(retryOn string) bool { + switch retryOn { + case "5xx", + "gateway-error", + "reset", + "connect-failure", + "envoy-ratelimited", + "retriable-4xx", + "refused-stream", + "cancelled", + "deadline-exceeded", + "internal", + "resource-exhausted", + "unavailable": + return true + default: + return false + } +} + func (e *ServiceRouterConfigEntry) CanRead(authz acl.Authorizer) error { return canReadDiscoveryChain(e, authz) } @@ -409,6 +435,10 @@ type ServiceRouteDestination struct { // 4 failure bubbling up to layer 7. RetryOnConnectFailure bool `json:",omitempty" alias:"retry_on_connect_failure"` + // RetryOn allows setting envoy specific conditions when a request should + // be automatically retried. + RetryOn []string `json:",omitempty" alias:"retry_on"` + // RetryOnStatusCodes is a flat list of http response status codes that are // eligible for retry. This again should be feasible in any reasonable proxy. RetryOnStatusCodes []uint32 `json:",omitempty" alias:"retry_on_status_codes"` @@ -455,7 +485,7 @@ func (e *ServiceRouteDestination) UnmarshalJSON(data []byte) error { } func (d *ServiceRouteDestination) HasRetryFeatures() bool { - return d.NumRetries > 0 || d.RetryOnConnectFailure || len(d.RetryOnStatusCodes) > 0 + return d.NumRetries > 0 || d.RetryOnConnectFailure || len(d.RetryOnStatusCodes) > 0 || len(d.RetryOn) > 0 } // ServiceSplitterConfigEntry defines how incoming requests are split across diff --git a/agent/structs/config_entry_discoverychain_test.go b/agent/structs/config_entry_discoverychain_test.go index 0d6691fa0..dbd766744 100644 --- a/agent/structs/config_entry_discoverychain_test.go +++ b/agent/structs/config_entry_discoverychain_test.go @@ -2054,6 +2054,43 @@ func TestServiceRouterConfigEntry(t *testing.T) { }))), validateErr: "Methods contains \"GET\" more than once", }, + //////////////// + { + name: "route with no match with retry condition", + entry: makerouter(ServiceRoute{ + Match: nil, + Destination: &ServiceRouteDestination{ + Service: "other", + RetryOn: []string{ + "5xx", + "gateway-error", + "reset", + "connect-failure", + "envoy-ratelimited", + "retriable-4xx", + "refused-stream", + "cancelled", + "deadline-exceeded", + "internal", + "resource-exhausted", + "unavailable", + }, + }, + }), + }, + { + name: "route with no match with invalid retry condition", + entry: makerouter(ServiceRoute{ + Match: nil, + Destination: &ServiceRouteDestination{ + Service: "other", + RetryOn: []string{ + "invalid-retry-condition", + }, + }, + }), + validateErr: "contains an invalid retry condition: \"invalid-retry-condition\"", + }, } for _, tc := range cases { @@ -2122,3 +2159,22 @@ func TestIsProtocolHTTPLike(t *testing.T) { assert.True(t, IsProtocolHTTPLike("http2")) assert.True(t, IsProtocolHTTPLike("grpc")) } + +func TestIsValidRetryCondition(t *testing.T) { + assert.False(t, isValidRetryCondition("")) + assert.False(t, isValidRetryCondition("retriable-headers")) + assert.False(t, isValidRetryCondition("retriable-status-codes")) + + assert.True(t, isValidRetryCondition("5xx")) + assert.True(t, isValidRetryCondition("gateway-error")) + assert.True(t, isValidRetryCondition("reset")) + assert.True(t, isValidRetryCondition("connect-failure")) + assert.True(t, isValidRetryCondition("envoy-ratelimited")) + assert.True(t, isValidRetryCondition("retriable-4xx")) + assert.True(t, isValidRetryCondition("refused-stream")) + assert.True(t, isValidRetryCondition("cancelled")) + assert.True(t, isValidRetryCondition("deadline-exceeded")) + assert.True(t, isValidRetryCondition("internal")) + assert.True(t, isValidRetryCondition("resource-exhausted")) + assert.True(t, isValidRetryCondition("unavailable")) +} diff --git a/agent/xds/routes.go b/agent/xds/routes.go index 9bbf3ff71..0ca73f6fd 100644 --- a/agent/xds/routes.go +++ b/agent/xds/routes.go @@ -578,25 +578,7 @@ func (s *ResourceGenerator) makeUpstreamRouteForDiscoveryChain( } if destination.HasRetryFeatures() { - retryPolicy := &envoy_route_v3.RetryPolicy{} - if destination.NumRetries > 0 { - retryPolicy.NumRetries = makeUint32Value(int(destination.NumRetries)) - } - - // The RetryOn magic values come from: https://www.envoyproxy.io/docs/envoy/v1.10.0/configuration/http_filters/router_filter#config-http-filters-router-x-envoy-retry-on - if destination.RetryOnConnectFailure { - retryPolicy.RetryOn = "connect-failure" - } - if len(destination.RetryOnStatusCodes) > 0 { - if retryPolicy.RetryOn != "" { - retryPolicy.RetryOn = retryPolicy.RetryOn + ",retriable-status-codes" - } else { - retryPolicy.RetryOn = "retriable-status-codes" - } - retryPolicy.RetriableStatusCodes = destination.RetryOnStatusCodes - } - - routeAction.Route.RetryPolicy = retryPolicy + routeAction.Route.RetryPolicy = getRetryPolicyForDestination(destination) } if err := injectHeaderManipToRoute(destination, route); err != nil { @@ -663,6 +645,43 @@ func (s *ResourceGenerator) makeUpstreamRouteForDiscoveryChain( return host, nil } +func getRetryPolicyForDestination(destination *structs.ServiceRouteDestination) *envoy_route_v3.RetryPolicy { + retryPolicy := &envoy_route_v3.RetryPolicy{} + if destination.NumRetries > 0 { + retryPolicy.NumRetries = makeUint32Value(int(destination.NumRetries)) + } + + // The RetryOn magic values come from: https://www.envoyproxy.io/docs/envoy/v1.10.0/configuration/http_filters/router_filter#config-http-filters-router-x-envoy-retry-on + var retryStrings []string + + if len(destination.RetryOn) > 0 { + retryStrings = append(retryStrings, destination.RetryOn...) + } + + if destination.RetryOnConnectFailure { + // connect-failure can be enabled by either adding connect-failure to the RetryOn list or by using the legacy RetryOnConnectFailure option + // Check that it's not already in the RetryOn list, so we don't set it twice + connectFailureExists := false + for _, r := range retryStrings { + if r == "connect-failure" { + connectFailureExists = true + } + } + if !connectFailureExists { + retryStrings = append(retryStrings, "connect-failure") + } + } + + if len(destination.RetryOnStatusCodes) > 0 { + retryStrings = append(retryStrings, "retriable-status-codes") + retryPolicy.RetriableStatusCodes = destination.RetryOnStatusCodes + } + + retryPolicy.RetryOn = strings.Join(retryStrings, ",") + + return retryPolicy +} + func makeRouteMatchForDiscoveryRoute(discoveryRoute *structs.DiscoveryRoute) *envoy_route_v3.RouteMatch { match := discoveryRoute.Definition.Match if match == nil || match.IsEmpty() { diff --git a/agent/xds/testdata/routes/connect-proxy-with-chain-and-router.latest.golden b/agent/xds/testdata/routes/connect-proxy-with-chain-and-router.latest.golden index 5f48cd972..adc661135 100644 --- a/agent/xds/testdata/routes/connect-proxy-with-chain-and-router.latest.golden +++ b/agent/xds/testdata/routes/connect-proxy-with-chain-and-router.latest.golden @@ -286,6 +286,18 @@ } } }, + { + "match": { + "prefix": "/retry-reset" + }, + "route": { + "cluster": "retry-reset.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "retryPolicy": { + "retryOn": "reset", + "numRetries": 15 + } + } + }, { "match": { "prefix": "/retry-codes" @@ -305,12 +317,12 @@ }, { "match": { - "prefix": "/retry-both" + "prefix": "/retry-all" }, "route": { - "cluster": "retry-both.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "cluster": "retry-all.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", "retryPolicy": { - "retryOn": "connect-failure,retriable-status-codes", + "retryOn": "5xx,gateway-error,reset,connect-failure,envoy-ratelimited,retriable-4xx,refused-stream,cancelled,deadline-exceeded,internal,resource-exhausted,unavailable,retriable-status-codes", "retriableStatusCodes": [ 401, 409, diff --git a/agent/xds/testdata/routes/ingress-with-chain-and-router-header-manip.latest.golden b/agent/xds/testdata/routes/ingress-with-chain-and-router-header-manip.latest.golden index 4f4ade54c..eaca45d8e 100644 --- a/agent/xds/testdata/routes/ingress-with-chain-and-router-header-manip.latest.golden +++ b/agent/xds/testdata/routes/ingress-with-chain-and-router-header-manip.latest.golden @@ -287,6 +287,18 @@ } } }, + { + "match": { + "prefix": "/retry-reset" + }, + "route": { + "cluster": "retry-reset.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "retryPolicy": { + "retryOn": "reset", + "numRetries": 15 + } + } + }, { "match": { "prefix": "/retry-codes" @@ -306,12 +318,12 @@ }, { "match": { - "prefix": "/retry-both" + "prefix": "/retry-all" }, "route": { - "cluster": "retry-both.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "cluster": "retry-all.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", "retryPolicy": { - "retryOn": "connect-failure,retriable-status-codes", + "retryOn": "5xx,gateway-error,reset,connect-failure,envoy-ratelimited,retriable-4xx,refused-stream,cancelled,deadline-exceeded,internal,resource-exhausted,unavailable,retriable-status-codes", "retriableStatusCodes": [ 401, 409, diff --git a/agent/xds/testdata/routes/ingress-with-chain-and-router.latest.golden b/agent/xds/testdata/routes/ingress-with-chain-and-router.latest.golden index 06a8dcc84..a34ea1f68 100644 --- a/agent/xds/testdata/routes/ingress-with-chain-and-router.latest.golden +++ b/agent/xds/testdata/routes/ingress-with-chain-and-router.latest.golden @@ -287,6 +287,18 @@ } } }, + { + "match": { + "prefix": "/retry-reset" + }, + "route": { + "cluster": "retry-reset.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "retryPolicy": { + "retryOn": "reset", + "numRetries": 15 + } + } + }, { "match": { "prefix": "/retry-codes" @@ -306,12 +318,12 @@ }, { "match": { - "prefix": "/retry-both" + "prefix": "/retry-all" }, "route": { - "cluster": "retry-both.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "cluster": "retry-all.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", "retryPolicy": { - "retryOn": "connect-failure,retriable-status-codes", + "retryOn": "5xx,gateway-error,reset,connect-failure,envoy-ratelimited,retriable-4xx,refused-stream,cancelled,deadline-exceeded,internal,resource-exhausted,unavailable,retriable-status-codes", "retriableStatusCodes": [ 401, 409, diff --git a/api/config_entry_discoverychain.go b/api/config_entry_discoverychain.go index f827708ee..734f9454e 100644 --- a/api/config_entry_discoverychain.go +++ b/api/config_entry_discoverychain.go @@ -72,6 +72,7 @@ type ServiceRouteDestination struct { NumRetries uint32 `json:",omitempty" alias:"num_retries"` RetryOnConnectFailure bool `json:",omitempty" alias:"retry_on_connect_failure"` RetryOnStatusCodes []uint32 `json:",omitempty" alias:"retry_on_status_codes"` + RetryOn []string `json:",omitempty" alias:"retry_on"` RequestHeaders *HTTPHeaderModifiers `json:",omitempty" alias:"request_headers"` ResponseHeaders *HTTPHeaderModifiers `json:",omitempty" alias:"response_headers"` } diff --git a/api/config_entry_discoverychain_test.go b/api/config_entry_discoverychain_test.go index 8facb72e1..521494f17 100644 --- a/api/config_entry_discoverychain_test.go +++ b/api/config_entry_discoverychain_test.go @@ -272,6 +272,18 @@ func TestAPI_ConfigEntry_DiscoveryChain(t *testing.T) { NumRetries: 5, RetryOnConnectFailure: true, RetryOnStatusCodes: []uint32{500, 503, 401}, + RetryOn: []string{ + "gateway-error", + "reset", + "envoy-ratelimited", + "retriable-4xx", + "refused-stream", + "cancelled", + "deadline-exceeded", + "internal", + "resource-exhausted", + "unavailable", + }, RequestHeaders: &HTTPHeaderModifiers{ Set: map[string]string{ "x-foo": "bar", diff --git a/test/integration/connect/envoy/case-cfg-router-features/config_entries.hcl b/test/integration/connect/envoy/case-cfg-router-features/config_entries.hcl index 5eaafeda9..800988207 100644 --- a/test/integration/connect/envoy/case-cfg-router-features/config_entries.hcl +++ b/test/integration/connect/envoy/case-cfg-router-features/config_entries.hcl @@ -68,6 +68,7 @@ config_entries { destination { service_subset = "v2" retry_on_connect_failure = true # TODO: test + retry_on = ["reset"] # TODO: test retry_on_status_codes = [500, 512] # TODO: test } }, diff --git a/website/content/docs/connect/config-entries/service-router.mdx b/website/content/docs/connect/config-entries/service-router.mdx index 29ea6f046..5f3722f51 100644 --- a/website/content/docs/connect/config-entries/service-router.mdx +++ b/website/content/docs/connect/config-entries/service-router.mdx @@ -303,7 +303,7 @@ Routes = [ Match{ HTTP { PathPrefix = "/coffees" - } + } } Destination { @@ -311,13 +311,14 @@ Routes = [ RequestTimeout = "10s" NumRetries = 3 RetryOnConnectFailure = true + RetryOn = ["reset"] } }, { Match{ HTTP { PathPrefix = "/orders" - } + } } Destination { @@ -325,6 +326,7 @@ Routes = [ RequestTimeout = "10s" NumRetries = 3 RetryOnConnectFailure = true + RetryOn = ["reset"] } } ] @@ -345,6 +347,7 @@ spec: requestTimeout: 10s numRetries: 3 retryOnConnectFailure: true + retryOn: ['reset'] - match: http: pathExact: /orders @@ -353,7 +356,7 @@ spec: requestTimeout: 10s numRetries: 3 retryOnConnectFailure: true - + retryOn: ['reset'] ``` @@ -372,6 +375,7 @@ spec: "NumRetries": 3, "RequestTimeout": "10s", "RetryOnConnectFailure": true, + "RetryOn": ["reset"], "Service": "procurement" } }, @@ -385,6 +389,7 @@ spec: "NumRetries": 3, "RequestTimeout": "10s", "RetryOnConnectFailure": true, + "RetryOn": ["reset"], "Service": "procurement" } } @@ -702,6 +707,13 @@ spec: type: 'bool: false', description: 'Allows for connection failure errors to trigger a retry.', }, + { + name: 'RetryOn', + type: 'array', + description: `Allows Consul to retry requests when they meet one of the following sets of conditions: + \`5xx\`, \`gateway-error\`, \`reset\`, \`connect-failure\`, \`envoy-ratelimited\`, \`retriable-4xx\`, + \`refused-stream\`, \`cancelled\`, \`deadline-exceeded\`, \`internal\`, \`resource-exhausted\`, or \`unavailable\``, + }, { name: 'RetryOnStatusCodes', type: 'array',